fix: lazy init MLX for quantization and improve library discovery

- Add lazy MLX initialization in quantizeTensor to ensure the library is loaded when quantization is requested - Add exe-relative build path search for dev mode on macOS, so the ollama binary can find libmlxc.dylib in build/lib/ollama/ when running from the repo root
2026-01-23 06:53:03 -05:00 · 2026-01-17 22:46:20 -08:00
122 changed files with 8626 additions and 15230 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -749,7 +749,7 @@ type ShowResponse struct {
 	Messages      []Message          `json:"messages,omitempty"`
 	RemoteModel   string             `json:"remote_model,omitempty"`
 	RemoteHost    string             `json:"remote_host,omitempty"`
-	ModelInfo     map[string]any     `json:"model_info"`
+	ModelInfo     map[string]any     `json:"model_info,omitempty"`
 	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -35,7 +35,6 @@ import (
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/parser"
@@ -900,11 +899,11 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 	for _, arg := range args {
 		// Unload the model if it's running before deletion
 		if err := loadOrUnloadModel(cmd, &runOptions{
-			Model:     arg,
+			Model:     args[0],
 			KeepAlive: &api.Duration{Duration: 0},
 		}); err != nil {
 			if !strings.Contains(strings.ToLower(err.Error()), "not found") {
-				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", arg)
+				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", args[0])
 			}
 		}

@@ -2027,7 +2026,6 @@ func NewCLI() *cobra.Command {
 		copyCmd,
 		deleteCmd,
 		runnerCmd,
-		config.ConfigCmd(checkServerHeartbeat),
 	)

 	return rootCmd
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -1,36 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-)
-
-// Claude implements Runner for Claude Code integration
-type Claude struct{}
-
-func (c *Claude) String() string { return "Claude Code" }
-
-func (c *Claude) args(model string) []string {
-	if model != "" {
-		return []string{"--model", model}
-	}
-	return nil
-}
-
-func (c *Claude) Run(model string) error {
-	if _, err := exec.LookPath("claude"); err != nil {
-		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
-	}
-
-	cmd := exec.Command("claude", c.args(model)...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	cmd.Env = append(os.Environ(),
-		"ANTHROPIC_BASE_URL=http://localhost:11434",
-		"ANTHROPIC_API_KEY=",
-		"ANTHROPIC_AUTH_TOKEN=ollama",
-	)
-	return cmd.Run()
-}
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -1,42 +0,0 @@
-package config
-
-import (
-	"slices"
-	"testing"
-)
-
-func TestClaudeIntegration(t *testing.T) {
-	c := &Claude{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "Claude Code" {
-			t.Errorf("String() = %q, want %q", got, "Claude Code")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = c
-	})
-}
-
-func TestClaudeArgs(t *testing.T) {
-	c := &Claude{}
-
-	tests := []struct {
-		name  string
-		model string
-		want  []string
-	}{
-		{"with model", "llama3.2", []string{"--model", "llama3.2"}},
-		{"empty model", "", nil},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := c.args(tt.model)
-			if !slices.Equal(got, tt.want) {
-				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
-			}
-		})
-	}
-}
--- a/cmd/config/codex.go
+++ b/cmd/config/codex.go
@@ -1,61 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-
-	"golang.org/x/mod/semver"
-)
-
-// Codex implements Runner for Codex integration
-type Codex struct{}
-
-func (c *Codex) String() string { return "Codex" }
-
-func (c *Codex) args(model string) []string {
-	args := []string{"--oss"}
-	if model != "" {
-		args = append(args, "-m", model)
-	}
-	return args
-}
-
-func (c *Codex) Run(model string) error {
-	if err := checkCodexVersion(); err != nil {
-		return err
-	}
-
-	cmd := exec.Command("codex", c.args(model)...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func checkCodexVersion() error {
-	if _, err := exec.LookPath("codex"); err != nil {
-		return fmt.Errorf("codex is not installed, install with: npm install -g @openai/codex")
-	}
-
-	out, err := exec.Command("codex", "--version").Output()
-	if err != nil {
-		return fmt.Errorf("failed to get codex version: %w", err)
-	}
-
-	// Parse output like "codex-cli 0.87.0"
-	fields := strings.Fields(strings.TrimSpace(string(out)))
-	if len(fields) < 2 {
-		return fmt.Errorf("unexpected codex version output: %s", string(out))
-	}
-
-	version := "v" + fields[len(fields)-1]
-	minVersion := "v0.81.0"
-
-	if semver.Compare(version, minVersion) < 0 {
-		return fmt.Errorf("codex version %s is too old, minimum required is %s, update with: npm update -g @openai/codex", fields[len(fields)-1], "0.81.0")
-	}
-
-	return nil
-}
--- a/cmd/config/codex_test.go
+++ b/cmd/config/codex_test.go
@@ -1,28 +0,0 @@
-package config
-
-import (
-	"slices"
-	"testing"
-)
-
-func TestCodexArgs(t *testing.T) {
-	c := &Codex{}
-
-	tests := []struct {
-		name  string
-		model string
-		want  []string
-	}{
-		{"with model", "llama3.2", []string{"--oss", "-m", "llama3.2"}},
-		{"empty model", "", []string{"--oss"}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := c.args(tt.model)
-			if !slices.Equal(got, tt.want) {
-				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
-			}
-		})
-	}
-}
--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -1,115 +0,0 @@
-// Package config provides integration configuration for external coding tools
-// (Claude Code, Codex, Droid, OpenCode) to use Ollama models.
-package config
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-type integration struct {
-	Models []string `json:"models"`
-}
-
-type config struct {
-	Integrations map[string]*integration `json:"integrations"`
-}
-
-func configPath() (string, error) {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(home, ".ollama", "config", "config.json"), nil
-}
-
-func load() (*config, error) {
-	path, err := configPath()
-	if err != nil {
-		return nil, err
-	}
-
-	data, err := os.ReadFile(path)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return &config{Integrations: make(map[string]*integration)}, nil
-		}
-		return nil, err
-	}
-
-	var cfg config
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		return nil, fmt.Errorf("failed to parse config: %w, at: %s", err, path)
-	}
-	if cfg.Integrations == nil {
-		cfg.Integrations = make(map[string]*integration)
-	}
-	return &cfg, nil
-}
-
-func save(cfg *config) error {
-	path, err := configPath()
-	if err != nil {
-		return err
-	}
-
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		return err
-	}
-
-	data, err := json.MarshalIndent(cfg, "", "  ")
-	if err != nil {
-		return err
-	}
-
-	return writeWithBackup(path, data)
-}
-
-func saveIntegration(appName string, models []string) error {
-	if appName == "" {
-		return errors.New("app name cannot be empty")
-	}
-
-	cfg, err := load()
-	if err != nil {
-		return err
-	}
-
-	cfg.Integrations[strings.ToLower(appName)] = &integration{
-		Models: models,
-	}
-
-	return save(cfg)
-}
-
-func loadIntegration(appName string) (*integration, error) {
-	cfg, err := load()
-	if err != nil {
-		return nil, err
-	}
-
-	ic, ok := cfg.Integrations[strings.ToLower(appName)]
-	if !ok {
-		return nil, os.ErrNotExist
-	}
-
-	return ic, nil
-}
-
-func listIntegrations() ([]integration, error) {
-	cfg, err := load()
-	if err != nil {
-		return nil, err
-	}
-
-	result := make([]integration, 0, len(cfg.Integrations))
-	for _, ic := range cfg.Integrations {
-		result = append(result, *ic)
-	}
-
-	return result, nil
-}
--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -1,373 +0,0 @@
-package config
-
-import (
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-)
-
-// setTestHome sets both HOME (Unix) and USERPROFILE (Windows) for cross-platform tests
-func setTestHome(t *testing.T, dir string) {
-	t.Setenv("HOME", dir)
-	t.Setenv("USERPROFILE", dir)
-}
-
-// editorPaths is a test helper that safely calls Paths if the runner implements Editor
-func editorPaths(r Runner) []string {
-	if editor, ok := r.(Editor); ok {
-		return editor.Paths()
-	}
-	return nil
-}
-
-func TestIntegrationConfig(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("save and load round-trip", func(t *testing.T) {
-		models := []string{"llama3.2", "mistral", "qwen2.5"}
-		if err := saveIntegration("claude", models); err != nil {
-			t.Fatal(err)
-		}
-
-		config, err := loadIntegration("claude")
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(config.Models) != len(models) {
-			t.Errorf("expected %d models, got %d", len(models), len(config.Models))
-		}
-		for i, m := range models {
-			if config.Models[i] != m {
-				t.Errorf("model %d: expected %s, got %s", i, m, config.Models[i])
-			}
-		}
-	})
-
-	t.Run("defaultModel returns first model", func(t *testing.T) {
-		saveIntegration("codex", []string{"model-a", "model-b"})
-
-		config, _ := loadIntegration("codex")
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "model-a" {
-			t.Errorf("expected model-a, got %s", defaultModel)
-		}
-	})
-
-	t.Run("defaultModel returns empty for no models", func(t *testing.T) {
-		config := &integration{Models: []string{}}
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "" {
-			t.Errorf("expected empty string, got %s", defaultModel)
-		}
-	})
-
-	t.Run("app name is case-insensitive", func(t *testing.T) {
-		saveIntegration("Claude", []string{"model-x"})
-
-		config, err := loadIntegration("claude")
-		if err != nil {
-			t.Fatal(err)
-		}
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "model-x" {
-			t.Errorf("expected model-x, got %s", defaultModel)
-		}
-	})
-
-	t.Run("multiple integrations in single file", func(t *testing.T) {
-		saveIntegration("app1", []string{"model-1"})
-		saveIntegration("app2", []string{"model-2"})
-
-		config1, _ := loadIntegration("app1")
-		config2, _ := loadIntegration("app2")
-
-		defaultModel1 := ""
-		if len(config1.Models) > 0 {
-			defaultModel1 = config1.Models[0]
-		}
-		defaultModel2 := ""
-		if len(config2.Models) > 0 {
-			defaultModel2 = config2.Models[0]
-		}
-		if defaultModel1 != "model-1" {
-			t.Errorf("expected model-1, got %s", defaultModel1)
-		}
-		if defaultModel2 != "model-2" {
-			t.Errorf("expected model-2, got %s", defaultModel2)
-		}
-	})
-}
-
-func TestListIntegrations(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty when no integrations", func(t *testing.T) {
-		configs, err := listIntegrations()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if len(configs) != 0 {
-			t.Errorf("expected 0 integrations, got %d", len(configs))
-		}
-	})
-
-	t.Run("returns all saved integrations", func(t *testing.T) {
-		saveIntegration("claude", []string{"model-1"})
-		saveIntegration("droid", []string{"model-2"})
-
-		configs, err := listIntegrations()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if len(configs) != 2 {
-			t.Errorf("expected 2 integrations, got %d", len(configs))
-		}
-	})
-}
-
-func TestEditorPaths(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty for claude (no Editor)", func(t *testing.T) {
-		r := integrations["claude"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths for claude, got %v", paths)
-		}
-	})
-
-	t.Run("returns empty for codex (no Editor)", func(t *testing.T) {
-		r := integrations["codex"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths for codex, got %v", paths)
-		}
-	})
-
-	t.Run("returns empty for droid when no config exists", func(t *testing.T) {
-		r := integrations["droid"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths, got %v", paths)
-		}
-	})
-
-	t.Run("returns path for droid when config exists", func(t *testing.T) {
-		settingsDir, _ := os.UserHomeDir()
-		settingsDir = filepath.Join(settingsDir, ".factory")
-		os.MkdirAll(settingsDir, 0o755)
-		os.WriteFile(filepath.Join(settingsDir, "settings.json"), []byte(`{}`), 0o644)
-
-		r := integrations["droid"]
-		paths := editorPaths(r)
-		if len(paths) != 1 {
-			t.Errorf("expected 1 path, got %d", len(paths))
-		}
-	})
-
-	t.Run("returns paths for opencode when configs exist", func(t *testing.T) {
-		home, _ := os.UserHomeDir()
-		configDir := filepath.Join(home, ".config", "opencode")
-		stateDir := filepath.Join(home, ".local", "state", "opencode")
-		os.MkdirAll(configDir, 0o755)
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "opencode.json"), []byte(`{}`), 0o644)
-		os.WriteFile(filepath.Join(stateDir, "model.json"), []byte(`{}`), 0o644)
-
-		r := integrations["opencode"]
-		paths := editorPaths(r)
-		if len(paths) != 2 {
-			t.Errorf("expected 2 paths, got %d: %v", len(paths), paths)
-		}
-	})
-}
-
-func TestLoadIntegration_CorruptedJSON(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	// Create corrupted config.json file
-	dir := filepath.Join(tmpDir, ".ollama", "config")
-	os.MkdirAll(dir, 0o755)
-	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{corrupted json`), 0o644)
-
-	// Corrupted file is treated as empty, so loadIntegration returns not found
-	_, err := loadIntegration("test")
-	if err == nil {
-		t.Error("expected error for nonexistent integration in corrupted file")
-	}
-}
-
-func TestSaveIntegration_NilModels(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	if err := saveIntegration("test", nil); err != nil {
-		t.Fatalf("saveIntegration with nil models failed: %v", err)
-	}
-
-	config, err := loadIntegration("test")
-	if err != nil {
-		t.Fatalf("loadIntegration failed: %v", err)
-	}
-
-	if config.Models == nil {
-		// nil is acceptable
-	} else if len(config.Models) != 0 {
-		t.Errorf("expected empty or nil models, got %v", config.Models)
-	}
-}
-
-func TestSaveIntegration_EmptyAppName(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	err := saveIntegration("", []string{"model"})
-	if err == nil {
-		t.Error("expected error for empty app name, got nil")
-	}
-	if err != nil && !strings.Contains(err.Error(), "app name cannot be empty") {
-		t.Errorf("expected 'app name cannot be empty' error, got: %v", err)
-	}
-}
-
-func TestLoadIntegration_NonexistentIntegration(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	_, err := loadIntegration("nonexistent")
-	if err == nil {
-		t.Error("expected error for nonexistent integration, got nil")
-	}
-	if !os.IsNotExist(err) {
-		t.Logf("error type is os.ErrNotExist as expected: %v", err)
-	}
-}
-
-func TestConfigPath(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	path, err := configPath()
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	expected := filepath.Join(tmpDir, ".ollama", "config", "config.json")
-	if path != expected {
-		t.Errorf("expected %s, got %s", expected, path)
-	}
-}
-
-func TestLoad(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty config when file does not exist", func(t *testing.T) {
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if cfg == nil {
-			t.Fatal("expected non-nil config")
-		}
-		if cfg.Integrations == nil {
-			t.Error("expected non-nil Integrations map")
-		}
-		if len(cfg.Integrations) != 0 {
-			t.Errorf("expected empty Integrations, got %d", len(cfg.Integrations))
-		}
-	})
-
-	t.Run("loads existing config", func(t *testing.T) {
-		path, _ := configPath()
-		os.MkdirAll(filepath.Dir(path), 0o755)
-		os.WriteFile(path, []byte(`{"integrations":{"test":{"models":["model-a"]}}}`), 0o644)
-
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if cfg.Integrations["test"] == nil {
-			t.Fatal("expected test integration")
-		}
-		if len(cfg.Integrations["test"].Models) != 1 {
-			t.Errorf("expected 1 model, got %d", len(cfg.Integrations["test"].Models))
-		}
-	})
-
-	t.Run("returns error for corrupted JSON", func(t *testing.T) {
-		path, _ := configPath()
-		os.MkdirAll(filepath.Dir(path), 0o755)
-		os.WriteFile(path, []byte(`{corrupted`), 0o644)
-
-		_, err := load()
-		if err == nil {
-			t.Error("expected error for corrupted JSON")
-		}
-	})
-}
-
-func TestSave(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("creates config file", func(t *testing.T) {
-		cfg := &config{
-			Integrations: map[string]*integration{
-				"test": {Models: []string{"model-a", "model-b"}},
-			},
-		}
-
-		if err := save(cfg); err != nil {
-			t.Fatal(err)
-		}
-
-		path, _ := configPath()
-		if _, err := os.Stat(path); os.IsNotExist(err) {
-			t.Error("config file was not created")
-		}
-	})
-
-	t.Run("round-trip preserves data", func(t *testing.T) {
-		cfg := &config{
-			Integrations: map[string]*integration{
-				"claude": {Models: []string{"llama3.2", "mistral"}},
-				"codex":  {Models: []string{"qwen2.5"}},
-			},
-		}
-
-		if err := save(cfg); err != nil {
-			t.Fatal(err)
-		}
-
-		loaded, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(loaded.Integrations) != 2 {
-			t.Errorf("expected 2 integrations, got %d", len(loaded.Integrations))
-		}
-		if loaded.Integrations["claude"] == nil {
-			t.Error("missing claude integration")
-		}
-		if len(loaded.Integrations["claude"].Models) != 2 {
-			t.Errorf("expected 2 models for claude, got %d", len(loaded.Integrations["claude"].Models))
-		}
-	})
-}
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -1,164 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"slices"
-)
-
-// Droid implements Runner and Editor for Droid integration
-type Droid struct{}
-
-// droidModelEntry represents a custom model entry in Droid's settings.json
-type droidModelEntry struct {
-	Model           string `json:"model"`
-	DisplayName     string `json:"displayName"`
-	BaseURL         string `json:"baseUrl"`
-	APIKey          string `json:"apiKey"`
-	Provider        string `json:"provider"`
-	MaxOutputTokens int    `json:"maxOutputTokens"`
-	SupportsImages  bool   `json:"supportsImages"`
-	ID              string `json:"id"`
-	Index           int    `json:"index"`
-}
-
-func (d *Droid) String() string { return "Droid" }
-
-func (d *Droid) Run(model string) error {
-	if _, err := exec.LookPath("droid"); err != nil {
-		return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
-	}
-
-	// Call Edit() to ensure config is up-to-date before launch
-	models := []string{model}
-	if config, err := loadIntegration("droid"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := d.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	cmd := exec.Command("droid")
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func (d *Droid) Paths() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-	p := filepath.Join(home, ".factory", "settings.json")
-	if _, err := os.Stat(p); err == nil {
-		return []string{p}
-	}
-	return nil
-}
-
-func (d *Droid) Edit(models []string) error {
-	if len(models) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	settingsPath := filepath.Join(home, ".factory", "settings.json")
-	if err := os.MkdirAll(filepath.Dir(settingsPath), 0o755); err != nil {
-		return err
-	}
-
-	settings := make(map[string]any)
-	if data, err := os.ReadFile(settingsPath); err == nil {
-		if err := json.Unmarshal(data, &settings); err != nil {
-			return fmt.Errorf("failed to parse settings file: %w, at: %s", err, settingsPath)
-		}
-	}
-
-	customModels, _ := settings["customModels"].([]any)
-
-	// Keep only non-Ollama models (we'll rebuild Ollama models fresh)
-	nonOllamaModels := slices.DeleteFunc(slices.Clone(customModels), func(m any) bool {
-		entry, ok := m.(droidModelEntry)
-		if !ok {
-			return false
-		}
-		return entry.APIKey != "ollama"
-	})
-
-	// Build new Ollama model entries with sequential indices (0, 1, 2, ...)
-	var ollamaModels []any
-	var defaultModelID string
-	for i, model := range models {
-		modelID := fmt.Sprintf("custom:%s-%d", model, i)
-		ollamaModels = append(ollamaModels, droidModelEntry{
-			Model:           model,
-			DisplayName:     model,
-			BaseURL:         "http://localhost:11434/v1",
-			APIKey:          "ollama",
-			Provider:        "generic-chat-completion-api",
-			MaxOutputTokens: 64000,
-			SupportsImages:  false,
-			ID:              modelID,
-			Index:           i,
-		})
-		if i == 0 {
-			defaultModelID = modelID
-		}
-	}
-
-	settings["customModels"] = append(ollamaModels, nonOllamaModels...)
-
-	sessionSettings, ok := settings["sessionDefaultSettings"].(map[string]any)
-	if !ok {
-		sessionSettings = make(map[string]any)
-	}
-	sessionSettings["model"] = defaultModelID
-
-	if effort, ok := sessionSettings["reasoningEffort"].(string); !ok || !isValidReasoningEffort(effort) {
-		sessionSettings["reasoningEffort"] = "none"
-	}
-
-	settings["sessionDefaultSettings"] = sessionSettings
-
-	data, err := json.MarshalIndent(settings, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(settingsPath, data)
-}
-
-func (d *Droid) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-	settings, err := readJSONFile(filepath.Join(home, ".factory", "settings.json"))
-	if err != nil {
-		return nil
-	}
-
-	customModels, _ := settings["customModels"].([]droidModelEntry)
-
-	var result []string
-	for _, m := range customModels {
-		if m.APIKey != "ollama" {
-			continue
-		}
-		result = append(result, m.Model)
-	}
-	return result
-}
-
-var validReasoningEfforts = []string{"high", "medium", "low", "none"}
-
-func isValidReasoningEffort(effort string) bool {
-	return slices.Contains(validReasoningEfforts, effort)
-}
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
@@ -1,454 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestDroidIntegration(t *testing.T) {
-	d := &Droid{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := d.String(); got != "Droid" {
-			t.Errorf("String() = %q, want %q", got, "Droid")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = d
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = d
-	})
-}
-
-func TestDroidEdit(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	cleanup := func() {
-		os.RemoveAll(settingsDir)
-	}
-
-	readSettings := func() map[string]any {
-		data, _ := os.ReadFile(settingsPath)
-		var settings map[string]any
-		json.Unmarshal(data, &settings)
-		return settings
-	}
-
-	getCustomModels := func(settings map[string]any) []map[string]any {
-		models, ok := settings["customModels"].([]any)
-		if !ok {
-			return nil
-		}
-		var result []map[string]any
-		for _, m := range models {
-			if entry, ok := m.(map[string]any); ok {
-				result = append(result, entry)
-			}
-		}
-		return result
-	}
-
-	t.Run("fresh install creates models with sequential indices", func(t *testing.T) {
-		cleanup()
-		if err := d.Edit([]string{"model-a", "model-b"}); err != nil {
-			t.Fatal(err)
-		}
-
-		settings := readSettings()
-		models := getCustomModels(settings)
-
-		if len(models) != 2 {
-			t.Fatalf("expected 2 models, got %d", len(models))
-		}
-
-		// Check first model
-		if models[0]["model"] != "model-a" {
-			t.Errorf("expected model-a, got %s", models[0]["model"])
-		}
-		if models[0]["id"] != "custom:model-a-[Ollama]-0" {
-			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", models[0]["id"])
-		}
-		if models[0]["index"] != float64(0) {
-			t.Errorf("expected index 0, got %v", models[0]["index"])
-		}
-
-		// Check second model
-		if models[1]["model"] != "model-b" {
-			t.Errorf("expected model-b, got %s", models[1]["model"])
-		}
-		if models[1]["id"] != "custom:model-b-[Ollama]-1" {
-			t.Errorf("expected custom:model-b-[Ollama]-1, got %s", models[1]["id"])
-		}
-		if models[1]["index"] != float64(1) {
-			t.Errorf("expected index 1, got %v", models[1]["index"])
-		}
-	})
-
-	t.Run("sets sessionDefaultSettings.model to first model ID", func(t *testing.T) {
-		cleanup()
-		if err := d.Edit([]string{"model-a", "model-b"}); err != nil {
-			t.Fatal(err)
-		}
-
-		settings := readSettings()
-		session, ok := settings["sessionDefaultSettings"].(map[string]any)
-		if !ok {
-			t.Fatal("sessionDefaultSettings not found")
-		}
-		if session["model"] != "custom:model-a-[Ollama]-0" {
-			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", session["model"])
-		}
-	})
-
-	t.Run("re-indexes when models removed", func(t *testing.T) {
-		cleanup()
-		// Add three models
-		d.Edit([]string{"model-a", "model-b", "model-c"})
-
-		// Remove middle model
-		d.Edit([]string{"model-a", "model-c"})
-
-		settings := readSettings()
-		models := getCustomModels(settings)
-
-		if len(models) != 2 {
-			t.Fatalf("expected 2 models, got %d", len(models))
-		}
-
-		// Check indices are sequential 0, 1
-		if models[0]["index"] != float64(0) {
-			t.Errorf("expected index 0, got %v", models[0]["index"])
-		}
-		if models[1]["index"] != float64(1) {
-			t.Errorf("expected index 1, got %v", models[1]["index"])
-		}
-
-		// Check IDs match new indices
-		if models[0]["id"] != "custom:model-a-[Ollama]-0" {
-			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", models[0]["id"])
-		}
-		if models[1]["id"] != "custom:model-c-[Ollama]-1" {
-			t.Errorf("expected custom:model-c-[Ollama]-1, got %s", models[1]["id"])
-		}
-	})
-
-	t.Run("preserves non-Ollama custom models", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(settingsDir, 0o755)
-		// Pre-existing non-Ollama model
-		os.WriteFile(settingsPath, []byte(`{
-			"customModels": [
-				{"model": "gpt-4", "displayName": "GPT-4", "provider": "openai"}
-			]
-		}`), 0o644)
-
-		d.Edit([]string{"model-a"})
-
-		settings := readSettings()
-		models := getCustomModels(settings)
-
-		if len(models) != 2 {
-			t.Fatalf("expected 2 models (1 Ollama + 1 non-Ollama), got %d", len(models))
-		}
-
-		// Ollama model should be first
-		if models[0]["model"] != "model-a" {
-			t.Errorf("expected Ollama model first, got %s", models[0]["model"])
-		}
-
-		// Non-Ollama model should be preserved at end
-		if models[1]["model"] != "gpt-4" {
-			t.Errorf("expected gpt-4 preserved, got %s", models[1]["model"])
-		}
-	})
-
-	t.Run("preserves other settings", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(settingsDir, 0o755)
-		os.WriteFile(settingsPath, []byte(`{
-			"theme": "dark",
-			"enableHooks": true,
-			"sessionDefaultSettings": {"autonomyMode": "auto-high"}
-		}`), 0o644)
-
-		d.Edit([]string{"model-a"})
-
-		settings := readSettings()
-
-		if settings["theme"] != "dark" {
-			t.Error("theme was not preserved")
-		}
-		if settings["enableHooks"] != true {
-			t.Error("enableHooks was not preserved")
-		}
-
-		session := settings["sessionDefaultSettings"].(map[string]any)
-		if session["autonomyMode"] != "auto-high" {
-			t.Error("autonomyMode was not preserved")
-		}
-	})
-
-	t.Run("required fields present", func(t *testing.T) {
-		cleanup()
-		d.Edit([]string{"test-model"})
-
-		settings := readSettings()
-		models := getCustomModels(settings)
-
-		if len(models) != 1 {
-			t.Fatal("expected 1 model")
-		}
-
-		model := models[0]
-		requiredFields := []string{"model", "displayName", "baseUrl", "apiKey", "provider", "maxOutputTokens", "id", "index"}
-		for _, field := range requiredFields {
-			if model[field] == nil {
-				t.Errorf("missing required field: %s", field)
-			}
-		}
-
-		if model["baseUrl"] != "http://localhost:11434/v1" {
-			t.Errorf("unexpected baseUrl: %s", model["baseUrl"])
-		}
-		if model["apiKey"] != "ollama" {
-			t.Errorf("unexpected apiKey: %s", model["apiKey"])
-		}
-		if model["provider"] != "generic-chat-completion-api" {
-			t.Errorf("unexpected provider: %s", model["provider"])
-		}
-	})
-
-	t.Run("fixes invalid reasoningEffort", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(settingsDir, 0o755)
-		// Pre-existing settings with invalid reasoningEffort
-		os.WriteFile(settingsPath, []byte(`{
-			"sessionDefaultSettings": {"reasoningEffort": "off"}
-		}`), 0o644)
-
-		d.Edit([]string{"model-a"})
-
-		settings := readSettings()
-		session := settings["sessionDefaultSettings"].(map[string]any)
-
-		if session["reasoningEffort"] != "none" {
-			t.Errorf("expected reasoningEffort to be fixed to 'none', got %s", session["reasoningEffort"])
-		}
-	})
-
-	t.Run("preserves valid reasoningEffort", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(settingsDir, 0o755)
-		os.WriteFile(settingsPath, []byte(`{
-			"sessionDefaultSettings": {"reasoningEffort": "high"}
-		}`), 0o644)
-
-		d.Edit([]string{"model-a"})
-
-		settings := readSettings()
-		session := settings["sessionDefaultSettings"].(map[string]any)
-
-		if session["reasoningEffort"] != "high" {
-			t.Errorf("expected reasoningEffort to remain 'high', got %s", session["reasoningEffort"])
-		}
-	})
-}
-
-// Edge case tests for droid.go
-
-func TestDroidEdit_CorruptedJSON(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	os.MkdirAll(settingsDir, 0o755)
-	os.WriteFile(settingsPath, []byte(`{corrupted json content`), 0o644)
-
-	// Corrupted JSON should return an error so user knows something is wrong
-	err := d.Edit([]string{"model-a"})
-	if err == nil {
-		t.Fatal("expected error for corrupted JSON, got nil")
-	}
-
-	// Original corrupted file should be preserved (not overwritten)
-	data, _ := os.ReadFile(settingsPath)
-	if string(data) != `{corrupted json content` {
-		t.Errorf("corrupted file was modified: got %s", string(data))
-	}
-}
-
-func TestDroidEdit_WrongTypeCustomModels(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	os.MkdirAll(settingsDir, 0o755)
-	// customModels is a string instead of array
-	os.WriteFile(settingsPath, []byte(`{"customModels": "not an array"}`), 0o644)
-
-	// Should not panic - wrong type should be handled gracefully
-	err := d.Edit([]string{"model-a"})
-	if err != nil {
-		t.Fatalf("Edit failed with wrong type customModels: %v", err)
-	}
-
-	// Verify models were added correctly
-	data, _ := os.ReadFile(settingsPath)
-	var settings map[string]any
-	json.Unmarshal(data, &settings)
-
-	customModels, ok := settings["customModels"].([]any)
-	if !ok {
-		t.Fatalf("customModels should be array after setup, got %T", settings["customModels"])
-	}
-	if len(customModels) != 1 {
-		t.Errorf("expected 1 model, got %d", len(customModels))
-	}
-}
-
-func TestDroidEdit_EmptyModels(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	os.MkdirAll(settingsDir, 0o755)
-	originalContent := `{"customModels": [{"model": "existing"}]}`
-	os.WriteFile(settingsPath, []byte(originalContent), 0o644)
-
-	// Empty models should be no-op
-	err := d.Edit([]string{})
-	if err != nil {
-		t.Fatalf("Edit with empty models failed: %v", err)
-	}
-
-	// Original content should be preserved (file not modified)
-	data, _ := os.ReadFile(settingsPath)
-	if string(data) != originalContent {
-		t.Errorf("empty models should not modify file, but content changed")
-	}
-}
-
-func TestDroidEdit_DuplicateModels(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	// Add same model twice
-	err := d.Edit([]string{"model-a", "model-a"})
-	if err != nil {
-		t.Fatalf("Edit with duplicates failed: %v", err)
-	}
-
-	settings, err := readJSONFile(settingsPath)
-	if err != nil {
-		t.Fatalf("readJSONFile failed: %v", err)
-	}
-
-	customModels, _ := settings["customModels"].([]any)
-	// Document current behavior: duplicates are kept as separate entries
-	if len(customModels) != 2 {
-		t.Logf("Note: duplicates result in %d entries (documenting behavior)", len(customModels))
-	}
-}
-
-func TestDroidEdit_MalformedModelEntry(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	os.MkdirAll(settingsDir, 0o755)
-	// Model entry is a string instead of a map
-	os.WriteFile(settingsPath, []byte(`{"customModels": ["not a map", 123]}`), 0o644)
-
-	err := d.Edit([]string{"model-a"})
-	if err != nil {
-		t.Fatalf("Edit with malformed entries failed: %v", err)
-	}
-
-	// Malformed entries should be preserved in nonOllamaModels
-	settings, _ := readJSONFile(settingsPath)
-	customModels, _ := settings["customModels"].([]any)
-
-	// Should have: 1 new Ollama model + 2 preserved malformed entries
-	if len(customModels) != 3 {
-		t.Errorf("expected 3 entries (1 new + 2 preserved malformed), got %d", len(customModels))
-	}
-}
-
-func TestDroidEdit_WrongTypeSessionSettings(t *testing.T) {
-	d := &Droid{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	settingsDir := filepath.Join(tmpDir, ".factory")
-	settingsPath := filepath.Join(settingsDir, "settings.json")
-
-	os.MkdirAll(settingsDir, 0o755)
-	// sessionDefaultSettings is a string instead of map
-	os.WriteFile(settingsPath, []byte(`{"sessionDefaultSettings": "not a map"}`), 0o644)
-
-	err := d.Edit([]string{"model-a"})
-	if err != nil {
-		t.Fatalf("Edit with wrong type sessionDefaultSettings failed: %v", err)
-	}
-
-	// Should create proper sessionDefaultSettings
-	settings, _ := readJSONFile(settingsPath)
-	session, ok := settings["sessionDefaultSettings"].(map[string]any)
-	if !ok {
-		t.Fatalf("sessionDefaultSettings should be map after setup, got %T", settings["sessionDefaultSettings"])
-	}
-	if session["model"] == nil {
-		t.Error("expected model to be set in sessionDefaultSettings")
-	}
-}
-
-func TestIsValidReasoningEffort(t *testing.T) {
-	tests := []struct {
-		effort string
-		valid  bool
-	}{
-		{"high", true},
-		{"medium", true},
-		{"low", true},
-		{"none", true},
-		{"off", false},
-		{"", false},
-		{"HIGH", false}, // case sensitive
-		{"max", false},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.effort, func(t *testing.T) {
-			got := isValidReasoningEffort(tt.effort)
-			if got != tt.valid {
-				t.Errorf("isValidReasoningEffort(%q) = %v, want %v", tt.effort, got, tt.valid)
-			}
-		})
-	}
-}
--- a/cmd/config/files.go
+++ b/cmd/config/files.go
@@ -1,99 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"time"
-)
-
-func readJSONFile(path string) (map[string]any, error) {
-	data, err := os.ReadFile(path)
-	if err != nil {
-		return nil, err
-	}
-	var result map[string]any
-	if err := json.Unmarshal(data, &result); err != nil {
-		return nil, err
-	}
-	return result, nil
-}
-
-func copyFile(src, dst string) error {
-	info, err := os.Stat(src)
-	if err != nil {
-		return err
-	}
-	data, err := os.ReadFile(src)
-	if err != nil {
-		return err
-	}
-	return os.WriteFile(dst, data, info.Mode().Perm())
-}
-
-func backupDir() string {
-	return filepath.Join(os.TempDir(), "ollama-backups")
-}
-
-func backupToTmp(srcPath string) (string, error) {
-	dir := backupDir()
-	if err := os.MkdirAll(dir, 0o755); err != nil {
-		return "", err
-	}
-
-	backupPath := filepath.Join(dir, fmt.Sprintf("%s.%d", filepath.Base(srcPath), time.Now().Unix()))
-	if err := copyFile(srcPath, backupPath); err != nil {
-		return "", err
-	}
-	return backupPath, nil
-}
-
-// writeWithBackup writes data to path via temp file + rename, backing up any existing file first
-func writeWithBackup(path string, data []byte) error {
-	var backupPath string
-	// backup must be created before any writes to the target file
-	if existingContent, err := os.ReadFile(path); err == nil {
-		if !bytes.Equal(existingContent, data) {
-			backupPath, err = backupToTmp(path)
-			if err != nil {
-				return fmt.Errorf("backup failed: %w", err)
-			}
-		}
-	} else if !os.IsNotExist(err) {
-		return fmt.Errorf("read existing file: %w", err)
-	}
-
-	dir := filepath.Dir(path)
-	tmp, err := os.CreateTemp(dir, ".tmp-*")
-	if err != nil {
-		return fmt.Errorf("create temp failed: %w", err)
-	}
-	tmpPath := tmp.Name()
-
-	if _, err := tmp.Write(data); err != nil {
-		_ = tmp.Close()
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("write failed: %w", err)
-	}
-	if err := tmp.Sync(); err != nil {
-		_ = tmp.Close()
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("sync failed: %w", err)
-	}
-	if err := tmp.Close(); err != nil {
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("close failed: %w", err)
-	}
-
-	if err := os.Rename(tmpPath, path); err != nil {
-		_ = os.Remove(tmpPath)
-		if backupPath != "" {
-			_ = copyFile(backupPath, path)
-		}
-		return fmt.Errorf("rename failed: %w", err)
-	}
-
-	return nil
-}
--- a/cmd/config/files_test.go
+++ b/cmd/config/files_test.go
@@ -1,502 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"runtime"
-	"testing"
-)
-
-func mustMarshal(t *testing.T, v any) []byte {
-	t.Helper()
-	data, err := json.MarshalIndent(v, "", "  ")
-	if err != nil {
-		t.Fatal(err)
-	}
-	return data
-}
-
-func TestWriteWithBackup(t *testing.T) {
-	tmpDir := t.TempDir()
-
-	t.Run("creates file", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "new.json")
-		data := mustMarshal(t, map[string]string{"key": "value"})
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		content, err := os.ReadFile(path)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		var result map[string]string
-		if err := json.Unmarshal(content, &result); err != nil {
-			t.Fatal(err)
-		}
-		if result["key"] != "value" {
-			t.Errorf("expected value, got %s", result["key"])
-		}
-	})
-
-	t.Run("creates backup in /tmp/ollama-backups", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "backup.json")
-
-		os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-
-		data := mustMarshal(t, map[string]bool{"updated": true})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, err := os.ReadDir(backupDir())
-		if err != nil {
-			t.Fatal("backup directory not created")
-		}
-
-		var foundBackup bool
-		for _, entry := range entries {
-			if filepath.Ext(entry.Name()) != ".json" {
-				name := entry.Name()
-				if len(name) > len("backup.json.") && name[:len("backup.json.")] == "backup.json." {
-					backupPath := filepath.Join(backupDir(), name)
-					backup, err := os.ReadFile(backupPath)
-					if err == nil {
-						var backupData map[string]bool
-						json.Unmarshal(backup, &backupData)
-						if backupData["original"] {
-							foundBackup = true
-							os.Remove(backupPath)
-							break
-						}
-					}
-				}
-			}
-		}
-
-		if !foundBackup {
-			t.Error("backup file not created in /tmp/ollama-backups")
-		}
-
-		current, _ := os.ReadFile(path)
-		var currentData map[string]bool
-		json.Unmarshal(current, &currentData)
-		if !currentData["updated"] {
-			t.Error("file doesn't contain updated data")
-		}
-	})
-
-	t.Run("no backup for new file", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "nobak.json")
-
-		data := mustMarshal(t, map[string]string{"new": "file"})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, _ := os.ReadDir(backupDir())
-		for _, entry := range entries {
-			if len(entry.Name()) > len("nobak.json.") && entry.Name()[:len("nobak.json.")] == "nobak.json." {
-				t.Error("backup should not exist for new file")
-			}
-		}
-	})
-
-	t.Run("no backup when content unchanged", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "unchanged.json")
-
-		data := mustMarshal(t, map[string]string{"key": "value"})
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries1, _ := os.ReadDir(backupDir())
-		countBefore := 0
-		for _, e := range entries1 {
-			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
-				countBefore++
-			}
-		}
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries2, _ := os.ReadDir(backupDir())
-		countAfter := 0
-		for _, e := range entries2 {
-			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
-				countAfter++
-			}
-		}
-
-		if countAfter != countBefore {
-			t.Errorf("backup was created when content unchanged (before=%d, after=%d)", countBefore, countAfter)
-		}
-	})
-
-	t.Run("backup filename contains unix timestamp", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "timestamped.json")
-
-		os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
-		data := mustMarshal(t, map[string]int{"v": 2})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, _ := os.ReadDir(backupDir())
-		var found bool
-		for _, entry := range entries {
-			name := entry.Name()
-			if len(name) > len("timestamped.json.") && name[:len("timestamped.json.")] == "timestamped.json." {
-				timestamp := name[len("timestamped.json."):]
-				for _, c := range timestamp {
-					if c < '0' || c > '9' {
-						t.Errorf("backup filename timestamp contains non-numeric character: %s", name)
-					}
-				}
-				found = true
-				os.Remove(filepath.Join(backupDir(), name))
-				break
-			}
-		}
-		if !found {
-			t.Error("backup file with timestamp not found")
-		}
-	})
-}
-
-// Edge case tests for files.go
-
-// TestWriteWithBackup_FailsIfBackupFails documents critical behavior: if backup fails, we must not proceed.
-// User could lose their config with no way to recover.
-func TestWriteWithBackup_FailsIfBackupFails(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "config.json")
-
-	// Create original file
-	originalContent := []byte(`{"original": true}`)
-	os.WriteFile(path, originalContent, 0o644)
-
-	// Make backup directory read-only to force backup failure
-	backupDir := backupDir()
-	os.MkdirAll(backupDir, 0o755)
-	os.Chmod(backupDir, 0o444) // Read-only
-	defer os.Chmod(backupDir, 0o755)
-
-	newContent := []byte(`{"updated": true}`)
-	err := writeWithBackup(path, newContent)
-
-	// Should fail because backup couldn't be created
-	if err == nil {
-		t.Error("expected error when backup fails, got nil")
-	}
-
-	// Original file should be preserved
-	current, _ := os.ReadFile(path)
-	if string(current) != string(originalContent) {
-		t.Errorf("original file was modified despite backup failure: got %s", string(current))
-	}
-}
-
-// TestWriteWithBackup_PermissionDenied verifies clear error when target file has wrong permissions.
-// Common issue when config owned by root or wrong perms.
-func TestWriteWithBackup_PermissionDenied(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-
-	// Create a read-only directory
-	readOnlyDir := filepath.Join(tmpDir, "readonly")
-	os.MkdirAll(readOnlyDir, 0o755)
-	os.Chmod(readOnlyDir, 0o444)
-	defer os.Chmod(readOnlyDir, 0o755)
-
-	path := filepath.Join(readOnlyDir, "config.json")
-	err := writeWithBackup(path, []byte(`{"test": true}`))
-
-	if err == nil {
-		t.Error("expected permission error, got nil")
-	}
-}
-
-// TestWriteWithBackup_DirectoryDoesNotExist verifies behavior when target directory doesn't exist.
-// writeWithBackup doesn't create directories - caller is responsible.
-func TestWriteWithBackup_DirectoryDoesNotExist(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "nonexistent", "subdir", "config.json")
-
-	err := writeWithBackup(path, []byte(`{"test": true}`))
-
-	// Should fail because directory doesn't exist
-	if err == nil {
-		t.Error("expected error for nonexistent directory, got nil")
-	}
-}
-
-// TestWriteWithBackup_SymlinkTarget documents behavior when target is a symlink.
-// Documents what happens if user symlinks their config file.
-func TestWriteWithBackup_SymlinkTarget(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("symlink tests may require admin on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	realFile := filepath.Join(tmpDir, "real.json")
-	symlink := filepath.Join(tmpDir, "link.json")
-
-	// Create real file and symlink
-	os.WriteFile(realFile, []byte(`{"v": 1}`), 0o644)
-	os.Symlink(realFile, symlink)
-
-	// Write through symlink
-	err := writeWithBackup(symlink, []byte(`{"v": 2}`))
-	if err != nil {
-		t.Fatalf("writeWithBackup through symlink failed: %v", err)
-	}
-
-	// The real file should be updated (symlink followed for temp file creation)
-	content, _ := os.ReadFile(symlink)
-	if string(content) != `{"v": 2}` {
-		t.Errorf("symlink target not updated correctly: got %s", string(content))
-	}
-}
-
-// TestBackupToTmp_SpecialCharsInFilename verifies backup works with special characters.
-// User may have config files with unusual names.
-func TestBackupToTmp_SpecialCharsInFilename(t *testing.T) {
-	tmpDir := t.TempDir()
-
-	// File with spaces and special chars
-	path := filepath.Join(tmpDir, "my config (backup).json")
-	os.WriteFile(path, []byte(`{"test": true}`), 0o644)
-
-	backupPath, err := backupToTmp(path)
-	if err != nil {
-		t.Fatalf("backupToTmp with special chars failed: %v", err)
-	}
-
-	// Verify backup exists and has correct content
-	content, err := os.ReadFile(backupPath)
-	if err != nil {
-		t.Fatalf("could not read backup: %v", err)
-	}
-	if string(content) != `{"test": true}` {
-		t.Errorf("backup content mismatch: got %s", string(content))
-	}
-
-	os.Remove(backupPath)
-}
-
-// TestCopyFile_PreservesPermissions verifies that copyFile preserves file permissions.
-func TestCopyFile_PreservesPermissions(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission preservation tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	src := filepath.Join(tmpDir, "src.json")
-	dst := filepath.Join(tmpDir, "dst.json")
-
-	// Create source with specific permissions
-	os.WriteFile(src, []byte(`{"test": true}`), 0o600)
-
-	err := copyFile(src, dst)
-	if err != nil {
-		t.Fatalf("copyFile failed: %v", err)
-	}
-
-	srcInfo, _ := os.Stat(src)
-	dstInfo, _ := os.Stat(dst)
-
-	if srcInfo.Mode().Perm() != dstInfo.Mode().Perm() {
-		t.Errorf("permissions not preserved: src=%v, dst=%v", srcInfo.Mode().Perm(), dstInfo.Mode().Perm())
-	}
-}
-
-// TestCopyFile_SourceNotFound verifies clear error when source doesn't exist.
-func TestCopyFile_SourceNotFound(t *testing.T) {
-	tmpDir := t.TempDir()
-	src := filepath.Join(tmpDir, "nonexistent.json")
-	dst := filepath.Join(tmpDir, "dst.json")
-
-	err := copyFile(src, dst)
-	if err == nil {
-		t.Error("expected error for nonexistent source, got nil")
-	}
-}
-
-// TestWriteWithBackup_TargetIsDirectory verifies error when path points to a directory.
-func TestWriteWithBackup_TargetIsDirectory(t *testing.T) {
-	tmpDir := t.TempDir()
-	dirPath := filepath.Join(tmpDir, "actualdir")
-	os.MkdirAll(dirPath, 0o755)
-
-	err := writeWithBackup(dirPath, []byte(`{"test": true}`))
-	if err == nil {
-		t.Error("expected error when target is a directory, got nil")
-	}
-}
-
-// TestWriteWithBackup_EmptyData verifies writing zero bytes works correctly.
-func TestWriteWithBackup_EmptyData(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "empty.json")
-
-	err := writeWithBackup(path, []byte{})
-	if err != nil {
-		t.Fatalf("writeWithBackup with empty data failed: %v", err)
-	}
-
-	content, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatalf("could not read file: %v", err)
-	}
-	if len(content) != 0 {
-		t.Errorf("expected empty file, got %d bytes", len(content))
-	}
-}
-
-// TestWriteWithBackup_FileUnreadableButDirWritable verifies behavior when existing file
-// cannot be read (for backup comparison) but directory is writable.
-func TestWriteWithBackup_FileUnreadableButDirWritable(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "unreadable.json")
-
-	// Create file and make it unreadable
-	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-	os.Chmod(path, 0o000)
-	defer os.Chmod(path, 0o644)
-
-	// Should fail because we can't read the file to compare/backup
-	err := writeWithBackup(path, []byte(`{"updated": true}`))
-	if err == nil {
-		t.Error("expected error when file is unreadable, got nil")
-	}
-}
-
-// TestWriteWithBackup_RapidSuccessiveWrites verifies backup works with multiple writes
-// within the same second (timestamp collision scenario).
-func TestWriteWithBackup_RapidSuccessiveWrites(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "rapid.json")
-
-	// Create initial file
-	os.WriteFile(path, []byte(`{"v": 0}`), 0o644)
-
-	// Rapid successive writes
-	for i := 1; i <= 3; i++ {
-		data := []byte(fmt.Sprintf(`{"v": %d}`, i))
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatalf("write %d failed: %v", i, err)
-		}
-	}
-
-	// Verify final content
-	content, _ := os.ReadFile(path)
-	if string(content) != `{"v": 3}` {
-		t.Errorf("expected final content {\"v\": 3}, got %s", string(content))
-	}
-
-	// Verify at least one backup exists
-	entries, _ := os.ReadDir(backupDir())
-	var backupCount int
-	for _, e := range entries {
-		if len(e.Name()) > len("rapid.json.") && e.Name()[:len("rapid.json.")] == "rapid.json." {
-			backupCount++
-		}
-	}
-	if backupCount == 0 {
-		t.Error("expected at least one backup file from rapid writes")
-	}
-}
-
-// TestWriteWithBackup_BackupDirIsFile verifies error when backup directory path is a file.
-func TestWriteWithBackup_BackupDirIsFile(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("test modifies system temp directory")
-	}
-
-	// Create a file at the backup directory path
-	backupPath := backupDir()
-	// Clean up any existing directory first
-	os.RemoveAll(backupPath)
-	// Create a file instead of directory
-	os.WriteFile(backupPath, []byte("not a directory"), 0o644)
-	defer func() {
-		os.Remove(backupPath)
-		os.MkdirAll(backupPath, 0o755)
-	}()
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "test.json")
-	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-
-	err := writeWithBackup(path, []byte(`{"updated": true}`))
-	if err == nil {
-		t.Error("expected error when backup dir is a file, got nil")
-	}
-}
-
-// TestWriteWithBackup_NoOrphanTempFiles verifies temp files are cleaned up on failure.
-func TestWriteWithBackup_NoOrphanTempFiles(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-
-	// Count existing temp files
-	countTempFiles := func() int {
-		entries, _ := os.ReadDir(tmpDir)
-		count := 0
-		for _, e := range entries {
-			if len(e.Name()) > 4 && e.Name()[:4] == ".tmp" {
-				count++
-			}
-		}
-		return count
-	}
-
-	before := countTempFiles()
-
-	// Create a file, then make directory read-only to cause rename failure
-	path := filepath.Join(tmpDir, "orphan.json")
-	os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
-
-	// Make a subdirectory and try to write there after making parent read-only
-	subDir := filepath.Join(tmpDir, "subdir")
-	os.MkdirAll(subDir, 0o755)
-	subPath := filepath.Join(subDir, "config.json")
-	os.WriteFile(subPath, []byte(`{"v": 1}`), 0o644)
-
-	// Make subdir read-only after creating temp file would succeed but rename would fail
-	// This is tricky to test - the temp file is created in the same dir, so if we can't
-	// rename, we also couldn't create. Let's just verify normal failure cleanup works.
-
-	// Force a failure by making the target a directory
-	badPath := filepath.Join(tmpDir, "isdir")
-	os.MkdirAll(badPath, 0o755)
-
-	_ = writeWithBackup(badPath, []byte(`{"test": true}`))
-
-	after := countTempFiles()
-	if after > before {
-		t.Errorf("orphan temp files left behind: before=%d, after=%d", before, after)
-	}
-}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -1,361 +0,0 @@
-package config
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"maps"
-	"os"
-	"os/exec"
-	"runtime"
-	"slices"
-	"strings"
-	"time"
-
-	"github.com/ollama/ollama/api"
-	"github.com/spf13/cobra"
-)
-
-// Runners execute the launching of a model with the integration - claude, codex
-// Editors can edit config files (supports multi-model selection) - opencode, droid
-// They are composable interfaces where in some cases an editor is also a runner - opencode, droid
-// Runner can run an integration with a model.
-
-type Runner interface {
-	Run(model string) error
-	// String returns the human-readable name of the integration
-	String() string
-}
-
-// Editor can edit config files (supports multi-model selection)
-type Editor interface {
-	// Paths returns the paths to the config files for the integration
-	Paths() []string
-	// Edit updates the config files for the integration with the given models
-	Edit(models []string) error
-	// Models returns the models currently configured for the integration
-	Models() []string
-}
-
-// integrations is the registry of available integrations.
-var integrations = map[string]Runner{
-	"claude":   &Claude{},
-	"codex":    &Codex{},
-	"droid":    &Droid{},
-	"opencode": &OpenCode{},
-}
-
-func selectIntegration() (string, error) {
-	if len(integrations) == 0 {
-		return "", fmt.Errorf("no integrations available")
-	}
-
-	names := slices.Sorted(maps.Keys(integrations))
-	var items []selectItem
-	for _, name := range names {
-		r := integrations[name]
-		description := r.String()
-		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
-			description = fmt.Sprintf("%s (%s)", r.String(), conn.Models[0])
-		}
-		items = append(items, selectItem{Name: name, Description: description})
-	}
-
-	return selectPrompt("Select integration:", items)
-}
-
-// selectModels lets the user select models for an integration
-func selectModels(ctx context.Context, name, current string) ([]string, error) {
-	r, ok := integrations[name]
-	if !ok {
-		return nil, fmt.Errorf("unknown integration: %s", name)
-	}
-
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		return nil, err
-	}
-
-	models, err := client.List(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	if len(models.Models) == 0 {
-		return nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
-	}
-
-	var items []selectItem
-	cloudModels := make(map[string]bool)
-	for _, m := range models.Models {
-		if m.RemoteModel != "" {
-			cloudModels[m.Name] = true
-		}
-		items = append(items, selectItem{Name: m.Name})
-	}
-
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no local models available, run 'ollama pull <model>' first")
-	}
-
-	// Get previously configured models (saved config takes precedence)
-	var preChecked []string
-	if saved, err := loadIntegration(name); err == nil {
-		preChecked = saved.Models
-	} else if editor, ok := r.(Editor); ok {
-		preChecked = editor.Models()
-	}
-	checked := make(map[string]bool, len(preChecked))
-	for _, n := range preChecked {
-		checked[n] = true
-	}
-
-	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
-	for _, item := range items {
-		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
-			current = item.Name
-			break
-		}
-	}
-
-	// If current model is configured, move to front of preChecked
-	if checked[current] {
-		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
-	}
-
-	// Sort: checked first, then alphabetical
-	slices.SortFunc(items, func(a, b selectItem) int {
-		ac, bc := checked[a.Name], checked[b.Name]
-		if ac != bc {
-			if ac {
-				return -1
-			}
-			return 1
-		}
-		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
-	})
-
-	var selected []string
-	// only editors support multi-model selection
-	if _, ok := r.(Editor); ok {
-		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		model, err := selectPrompt(fmt.Sprintf("Select model for %s:", r), items)
-		if err != nil {
-			return nil, err
-		}
-		selected = []string{model}
-	}
-
-	// if any model in selected is a cloud model, ensure signed in
-	var selectedCloudModels []string
-	for _, m := range selected {
-		if cloudModels[m] {
-			selectedCloudModels = append(selectedCloudModels, m)
-		}
-	}
-	if len(selectedCloudModels) > 0 {
-		// ensure user is signed in
-		user, err := client.Whoami(ctx)
-		if err == nil && user != nil && user.Name != "" {
-			return selected, nil
-		}
-
-		var aErr api.AuthorizationError
-		if !errors.As(err, &aErr) || aErr.SigninURL == "" {
-			return nil, err
-		}
-
-		modelList := strings.Join(selectedCloudModels, ", ")
-		yes, err := confirmPrompt(fmt.Sprintf("sign in to use %s?", modelList))
-		if err != nil || !yes {
-			return nil, fmt.Errorf("%s requires sign in", modelList)
-		}
-
-		fmt.Fprintf(os.Stderr, "\nTo sign in, navigate to:\n    %s\n\n", aErr.SigninURL)
-
-		// TODO(parthsareen): extract into auth package for cmd
-		// Auto-open browser (best effort, fail silently)
-		switch runtime.GOOS {
-		case "darwin":
-			_ = exec.Command("open", aErr.SigninURL).Start()
-		case "linux":
-			_ = exec.Command("xdg-open", aErr.SigninURL).Start()
-		case "windows":
-			_ = exec.Command("rundll32", "url.dll,FileProtocolHandler", aErr.SigninURL).Start()
-		}
-
-		spinnerFrames := []string{"|", "/", "-", "\\"}
-		frame := 0
-
-		fmt.Fprintf(os.Stderr, "\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[0])
-
-		ticker := time.NewTicker(200 * time.Millisecond)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ctx.Done():
-				fmt.Fprintf(os.Stderr, "\r\033[K")
-				return nil, ctx.Err()
-			case <-ticker.C:
-				frame++
-				fmt.Fprintf(os.Stderr, "\r\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
-
-				// poll every 10th frame (~2 seconds)
-				if frame%10 == 0 {
-					u, err := client.Whoami(ctx)
-					if err == nil && u != nil && u.Name != "" {
-						fmt.Fprintf(os.Stderr, "\r\033[K\033[A\r\033[K\033[1msigned in:\033[0m %s\n", u.Name)
-						return selected, nil
-					}
-				}
-			}
-		}
-	}
-
-	return selected, nil
-}
-
-func runIntegration(name, modelName string) error {
-	r, ok := integrations[name]
-	if !ok {
-		return fmt.Errorf("unknown integration: %s", name)
-	}
-	fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
-	return r.Run(modelName)
-}
-
-// ConfigCmd returns the cobra command for configuring integrations.
-func ConfigCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) error) *cobra.Command {
-	var modelFlag string
-	var launchFlag bool
-
-	cmd := &cobra.Command{
-		Use:   "config [INTEGRATION]",
-		Short: "Configure an external integration to use Ollama",
-		Long: `Configure an external application to use Ollama models.
-
-Supported integrations:
-  claude    Claude Code
-  codex     Codex
-  droid     Droid
-  opencode  OpenCode
-
-Examples:
-  ollama config
-  ollama config claude
-  ollama config droid --launch`,
-		Args:    cobra.MaximumNArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE: func(cmd *cobra.Command, args []string) error {
-			var name string
-			if len(args) > 0 {
-				name = args[0]
-			} else {
-				var err error
-				name, err = selectIntegration()
-				if errors.Is(err, errCancelled) {
-					return nil
-				}
-				if err != nil {
-					return err
-				}
-			}
-
-			r, ok := integrations[strings.ToLower(name)]
-			if !ok {
-				return fmt.Errorf("unknown integration: %s", name)
-			}
-
-			// If --launch without --model, use saved config if available
-			if launchFlag && modelFlag == "" {
-				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
-					return runIntegration(name, config.Models[0])
-				}
-			}
-
-			var models []string
-			if modelFlag != "" {
-				// When --model is specified, merge with existing models (new model becomes default)
-				models = []string{modelFlag}
-				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
-					for _, m := range existing.Models {
-						if m != modelFlag {
-							models = append(models, m)
-						}
-					}
-				}
-			} else {
-				var err error
-				models, err = selectModels(cmd.Context(), name, "")
-				if errors.Is(err, errCancelled) {
-					return nil
-				}
-				if err != nil {
-					return err
-				}
-			}
-
-			if editor, isEditor := r.(Editor); isEditor {
-				paths := editor.Paths()
-				if len(paths) > 0 {
-					fmt.Fprintf(os.Stderr, "This will modify your %s configuration:\n", r)
-					for _, p := range paths {
-						fmt.Fprintf(os.Stderr, "  %s\n", p)
-					}
-					fmt.Fprintf(os.Stderr, "Backups will be saved to %s/\n\n", backupDir())
-
-					if ok, _ := confirmPrompt("Proceed?"); !ok {
-						return nil
-					}
-				}
-			}
-
-			if err := saveIntegration(name, models); err != nil {
-				return fmt.Errorf("failed to save: %w", err)
-			}
-
-			if editor, isEditor := r.(Editor); isEditor {
-				if err := editor.Edit(models); err != nil {
-					return fmt.Errorf("setup failed: %w", err)
-				}
-			}
-
-			if _, isEditor := r.(Editor); isEditor {
-				if len(models) == 1 {
-					fmt.Fprintf(os.Stderr, "Added %s to %s\n", models[0], r)
-				} else {
-					fmt.Fprintf(os.Stderr, "Added %d models to %s (default: %s)\n", len(models), r, models[0])
-				}
-			}
-
-			if slices.ContainsFunc(models, func(m string) bool {
-				return !strings.HasSuffix(m, "cloud")
-			}) {
-				fmt.Fprintln(os.Stderr)
-				fmt.Fprintln(os.Stderr, "Coding agents work best with at least 64k context. Either:")
-				fmt.Fprintln(os.Stderr, "  - Set the context slider in Ollama app settings")
-				fmt.Fprintln(os.Stderr, "  - Run: OLLAMA_CONTEXT_LENGTH=64000 ollama serve")
-			}
-
-			if launchFlag {
-				return runIntegration(name, models[0])
-			}
-
-			if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
-				return runIntegration(name, models[0])
-			}
-
-			fmt.Fprintf(os.Stderr, "Run 'ollama config %s --launch' to start with %s\n", strings.ToLower(name), models[0])
-			return nil
-		},
-	}
-
-	cmd.Flags().StringVar(&modelFlag, "model", "", "Model to use")
-	cmd.Flags().BoolVar(&launchFlag, "launch", false, "Launch the integration after configuring")
-	return cmd
-}
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -1,188 +0,0 @@
-package config
-
-import (
-	"slices"
-	"strings"
-	"testing"
-
-	"github.com/spf13/cobra"
-)
-
-func TestIntegrationLookup(t *testing.T) {
-	tests := []struct {
-		name      string
-		input     string
-		wantFound bool
-		wantName  string
-	}{
-		{"claude lowercase", "claude", true, "Claude Code"},
-		{"claude uppercase", "CLAUDE", true, "Claude Code"},
-		{"claude mixed case", "Claude", true, "Claude Code"},
-		{"codex", "codex", true, "Codex"},
-		{"droid", "droid", true, "Droid"},
-		{"opencode", "opencode", true, "OpenCode"},
-		{"unknown integration", "unknown", false, ""},
-		{"empty string", "", false, ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, found := integrations[strings.ToLower(tt.input)]
-			if found != tt.wantFound {
-				t.Errorf("integrations[%q] found = %v, want %v", tt.input, found, tt.wantFound)
-			}
-			if found && r.String() != tt.wantName {
-				t.Errorf("integrations[%q].String() = %q, want %q", tt.input, r.String(), tt.wantName)
-			}
-		})
-	}
-}
-
-func TestIntegrationRegistry(t *testing.T) {
-	expectedIntegrations := []string{"claude", "codex", "droid", "opencode"}
-
-	for _, name := range expectedIntegrations {
-		t.Run(name, func(t *testing.T) {
-			r, ok := integrations[name]
-			if !ok {
-				t.Fatalf("integration %q not found in registry", name)
-			}
-			if r.String() == "" {
-				t.Error("integration.String() should not be empty")
-			}
-		})
-	}
-}
-
-func TestHasLocalModel(t *testing.T) {
-	tests := []struct {
-		name   string
-		models []string
-		want   bool
-	}{
-		{"empty list", []string{}, false},
-		{"single local model", []string{"llama3.2"}, true},
-		{"single cloud model", []string{"cloud-model"}, false},
-		{"mixed models", []string{"cloud-model", "llama3.2"}, true},
-		{"multiple local models", []string{"llama3.2", "qwen2.5"}, true},
-		{"multiple cloud models", []string{"cloud-a", "cloud-b"}, false},
-		{"local model first", []string{"llama3.2", "cloud-model"}, true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := slices.ContainsFunc(tt.models, func(m string) bool {
-				return !strings.Contains(m, "cloud")
-			})
-			if got != tt.want {
-				t.Errorf("hasLocalModel(%v) = %v, want %v", tt.models, got, tt.want)
-			}
-		})
-	}
-}
-
-func TestConfigCmd(t *testing.T) {
-	// Mock checkServerHeartbeat that always succeeds
-	mockCheck := func(cmd *cobra.Command, args []string) error {
-		return nil
-	}
-
-	cmd := ConfigCmd(mockCheck)
-
-	t.Run("command structure", func(t *testing.T) {
-		if cmd.Use != "config [INTEGRATION]" {
-			t.Errorf("Use = %q, want %q", cmd.Use, "config [INTEGRATION]")
-		}
-		if cmd.Short == "" {
-			t.Error("Short description should not be empty")
-		}
-		if cmd.Long == "" {
-			t.Error("Long description should not be empty")
-		}
-	})
-
-	t.Run("flags exist", func(t *testing.T) {
-		modelFlag := cmd.Flags().Lookup("model")
-		if modelFlag == nil {
-			t.Error("--model flag should exist")
-		}
-
-		launchFlag := cmd.Flags().Lookup("launch")
-		if launchFlag == nil {
-			t.Error("--launch flag should exist")
-		}
-	})
-
-	t.Run("PreRunE is set", func(t *testing.T) {
-		if cmd.PreRunE == nil {
-			t.Error("PreRunE should be set to checkServerHeartbeat")
-		}
-	})
-}
-
-func TestRunIntegration_UnknownIntegration(t *testing.T) {
-	err := runIntegration("unknown-integration", "model")
-	if err == nil {
-		t.Error("expected error for unknown integration, got nil")
-	}
-	if !strings.Contains(err.Error(), "unknown integration") {
-		t.Errorf("error should mention 'unknown integration', got: %v", err)
-	}
-}
-
-func TestHasLocalModel_DocumentsHeuristic(t *testing.T) {
-	tests := []struct {
-		name   string
-		models []string
-		want   bool
-		reason string
-	}{
-		{"empty list", []string{}, false, "empty list has no local models"},
-		{"contains-cloud-substring", []string{"deepseek-r1:cloud"}, false, "model with 'cloud' substring is considered cloud"},
-		{"cloud-in-name", []string{"my-cloud-model"}, false, "'cloud' anywhere in name = cloud model"},
-		{"cloudless", []string{"cloudless-model"}, false, "'cloudless' still contains 'cloud'"},
-		{"local-model", []string{"llama3.2"}, true, "no 'cloud' = local"},
-		{"mixed", []string{"cloud-model", "llama3.2"}, true, "one local model = hasLocalModel true"},
-		{"all-cloud", []string{"cloud-a", "cloud-b"}, false, "all contain 'cloud'"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := slices.ContainsFunc(tt.models, func(m string) bool {
-				return !strings.Contains(m, "cloud")
-			})
-			if got != tt.want {
-				t.Errorf("hasLocalModel(%v) = %v, want %v (%s)", tt.models, got, tt.want, tt.reason)
-			}
-		})
-	}
-}
-
-func TestConfigCmd_NilHeartbeat(t *testing.T) {
-	// This should not panic - cmd creation should work even with nil
-	cmd := ConfigCmd(nil)
-	if cmd == nil {
-		t.Fatal("ConfigCmd returned nil")
-	}
-
-	// PreRunE should be nil when passed nil
-	if cmd.PreRunE != nil {
-		t.Log("Note: PreRunE is set even when nil is passed (acceptable)")
-	}
-}
-
-func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
-	for name, r := range integrations {
-		t.Run(name, func(t *testing.T) {
-			// Test String() doesn't panic and returns non-empty
-			displayName := r.String()
-			if displayName == "" {
-				t.Error("String() should not return empty")
-			}
-
-			// Test Run() exists (we can't call it without actually running the command)
-			// Just verify the method is available
-			var _ func(string) error = r.Run
-		})
-	}
-}
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -1,203 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"maps"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"slices"
-	"strings"
-)
-
-// OpenCode implements Runner and Editor for OpenCode integration
-type OpenCode struct{}
-
-func (o *OpenCode) String() string { return "OpenCode" }
-
-func (o *OpenCode) Run(model string) error {
-	if _, err := exec.LookPath("opencode"); err != nil {
-		return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
-	}
-
-	// Call Edit() to ensure config is up-to-date before launch
-	models := []string{model}
-	if config, err := loadIntegration("opencode"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := o.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	cmd := exec.Command("opencode")
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func (o *OpenCode) Paths() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	var paths []string
-	p := filepath.Join(home, ".config", "opencode", "opencode.json")
-	if _, err := os.Stat(p); err == nil {
-		paths = append(paths, p)
-	}
-	sp := filepath.Join(home, ".local", "state", "opencode", "model.json")
-	if _, err := os.Stat(sp); err == nil {
-		paths = append(paths, sp)
-	}
-	return paths
-}
-
-func (o *OpenCode) Edit(modelList []string) error {
-	if len(modelList) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	configPath := filepath.Join(home, ".config", "opencode", "opencode.json")
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
-		return err
-	}
-
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config) // Ignore parse errors; treat missing/corrupt files as empty
-	}
-
-	config["$schema"] = "https://opencode.ai/config.json"
-
-	provider, ok := config["provider"].(map[string]any)
-	if !ok {
-		provider = make(map[string]any)
-	}
-
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		ollama = map[string]any{
-			"npm":  "@ai-sdk/openai-compatible",
-			"name": "Ollama (local)",
-			"options": map[string]any{
-				"baseURL": "http://localhost:11434/v1",
-			},
-		}
-	}
-
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		models = make(map[string]any)
-	}
-
-	selectedSet := make(map[string]bool)
-	for _, m := range modelList {
-		selectedSet[m] = true
-	}
-
-	for name, cfg := range models {
-		if cfgMap, ok := cfg.(map[string]any); ok {
-			if displayName, ok := cfgMap["name"].(string); ok {
-				if strings.HasSuffix(displayName, "[Ollama]") && !selectedSet[name] {
-					delete(models, name)
-				}
-			}
-		}
-	}
-
-	for _, model := range modelList {
-		models[model] = map[string]any{
-			"name": fmt.Sprintf("%s [Ollama]", model),
-		}
-	}
-
-	ollama["models"] = models
-	provider["ollama"] = ollama
-	config["provider"] = provider
-
-	configData, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return err
-	}
-	if err := writeWithBackup(configPath, configData); err != nil {
-		return err
-	}
-
-	statePath := filepath.Join(home, ".local", "state", "opencode", "model.json")
-	if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
-		return err
-	}
-
-	state := map[string]any{
-		"recent":   []any{},
-		"favorite": []any{},
-		"variant":  map[string]any{},
-	}
-	if data, err := os.ReadFile(statePath); err == nil {
-		_ = json.Unmarshal(data, &state) // Ignore parse errors; use defaults
-	}
-
-	recent, _ := state["recent"].([]any)
-
-	modelSet := make(map[string]bool)
-	for _, m := range modelList {
-		modelSet[m] = true
-	}
-
-	// Filter out existing Ollama models we're about to re-add
-	newRecent := slices.DeleteFunc(slices.Clone(recent), func(entry any) bool {
-		e, ok := entry.(map[string]any)
-		if !ok || e["providerID"] != "ollama" {
-			return false
-		}
-		modelID, _ := e["modelID"].(string)
-		return modelSet[modelID]
-	})
-
-	// Prepend models in reverse order so first model ends up first
-	for _, model := range slices.Backward(modelList) {
-		newRecent = slices.Insert(newRecent, 0, any(map[string]any{
-			"providerID": "ollama",
-			"modelID":    model,
-		}))
-	}
-
-	const maxRecentModels = 10
-	newRecent = newRecent[:min(len(newRecent), maxRecentModels)]
-
-	state["recent"] = newRecent
-
-	stateData, err := json.MarshalIndent(state, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(statePath, stateData)
-}
-
-func (o *OpenCode) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-	config, err := readJSONFile(filepath.Join(home, ".config", "opencode", "opencode.json"))
-	if err != nil {
-		return nil
-	}
-	provider, _ := config["provider"].(map[string]any)
-	ollama, _ := provider["ollama"].(map[string]any)
-	models, _ := ollama["models"].(map[string]any)
-	if len(models) == 0 {
-		return nil
-	}
-	keys := slices.Collect(maps.Keys(models))
-	slices.Sort(keys)
-	return keys
-}
--- a/cmd/config/opencode_test.go
+++ b/cmd/config/opencode_test.go
@@ -1,437 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestOpenCodeIntegration(t *testing.T) {
-	o := &OpenCode{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := o.String(); got != "OpenCode" {
-			t.Errorf("String() = %q, want %q", got, "OpenCode")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = o
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = o
-	})
-}
-
-func TestOpenCodeEdit(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	cleanup := func() {
-		os.RemoveAll(configDir)
-		os.RemoveAll(stateDir)
-	}
-
-	t.Run("fresh install", func(t *testing.T) {
-		cleanup()
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-	})
-
-	t.Run("preserve other providers", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"provider":{"anthropic":{"apiKey":"xxx"}}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		provider := cfg["provider"].(map[string]any)
-		if provider["anthropic"] == nil {
-			t.Error("anthropic provider was removed")
-		}
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("preserve other models", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"mistral":{"name":"Mistral"}}}}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeModelExists(t, configPath, "mistral")
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("update existing model", func(t *testing.T) {
-		cleanup()
-		o.Edit([]string{"llama3.2"})
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("preserve top-level keys", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"theme":"dark","keybindings":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("theme was removed")
-		}
-		if cfg["keybindings"] == nil {
-			t.Error("keybindings was removed")
-		}
-	})
-
-	t.Run("model state - insert at index 0", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-		assertOpenCodeRecentModel(t, statePath, 1, "anthropic", "claude")
-	})
-
-	t.Run("model state - preserve favorites and variants", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[],"favorite":[{"providerID":"x","modelID":"y"}],"variant":{"a":"b"}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(statePath)
-		var state map[string]any
-		json.Unmarshal(data, &state)
-		if len(state["favorite"].([]any)) != 1 {
-			t.Error("favorite was modified")
-		}
-		if state["variant"].(map[string]any)["a"] != "b" {
-			t.Error("variant was modified")
-		}
-	})
-
-	t.Run("model state - deduplicate on re-add", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"ollama","modelID":"llama3.2"},{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(statePath)
-		var state map[string]any
-		json.Unmarshal(data, &state)
-		recent := state["recent"].([]any)
-		if len(recent) != 2 {
-			t.Errorf("expected 2 recent entries, got %d", len(recent))
-		}
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-	})
-
-	t.Run("remove model", func(t *testing.T) {
-		cleanup()
-		// First add two models
-		o.Edit([]string{"llama3.2", "mistral"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelExists(t, configPath, "mistral")
-
-		// Then remove one by only selecting the other
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelNotExists(t, configPath, "mistral")
-	})
-
-	t.Run("remove model preserves non-ollama models", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		// Add a non-Ollama model manually
-		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"external":{"name":"External Model"}}}}}`), 0o644)
-
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelExists(t, configPath, "external") // Should be preserved
-	})
-}
-
-func assertOpenCodeModelExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatal(err)
-	}
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		t.Fatal("provider not found")
-	}
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		t.Fatal("ollama provider not found")
-	}
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		t.Fatal("models not found")
-	}
-	if models[model] == nil {
-		t.Errorf("model %s not found", model)
-	}
-}
-
-func assertOpenCodeModelNotExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatal(err)
-	}
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		return // No provider means no model
-	}
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		return // No ollama means no model
-	}
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		return // No models means no model
-	}
-	if models[model] != nil {
-		t.Errorf("model %s should not exist but was found", model)
-	}
-}
-
-func assertOpenCodeRecentModel(t *testing.T, path string, index int, providerID, modelID string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var state map[string]any
-	if err := json.Unmarshal(data, &state); err != nil {
-		t.Fatal(err)
-	}
-	recent, ok := state["recent"].([]any)
-	if !ok {
-		t.Fatal("recent not found")
-	}
-	if index >= len(recent) {
-		t.Fatalf("index %d out of range (len=%d)", index, len(recent))
-	}
-	entry, ok := recent[index].(map[string]any)
-	if !ok {
-		t.Fatal("entry is not a map")
-	}
-	if entry["providerID"] != providerID {
-		t.Errorf("expected providerID %s, got %s", providerID, entry["providerID"])
-	}
-	if entry["modelID"] != modelID {
-		t.Errorf("expected modelID %s, got %s", modelID, entry["modelID"])
-	}
-}
-
-// Edge case tests for opencode.go
-
-func TestOpenCodeEdit_CorruptedConfigJSON(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(`{corrupted json content`), 0o644)
-
-	// Should not panic - corrupted JSON should be treated as empty
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit failed with corrupted config: %v", err)
-	}
-
-	// Verify valid JSON was created
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Errorf("resulting config is not valid JSON: %v", err)
-	}
-}
-
-func TestOpenCodeEdit_CorruptedStateJSON(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	os.MkdirAll(stateDir, 0o755)
-	os.WriteFile(statePath, []byte(`{corrupted state`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit failed with corrupted state: %v", err)
-	}
-
-	// Verify valid state was created
-	data, _ := os.ReadFile(statePath)
-	var state map[string]any
-	if err := json.Unmarshal(data, &state); err != nil {
-		t.Errorf("resulting state is not valid JSON: %v", err)
-	}
-}
-
-func TestOpenCodeEdit_WrongTypeProvider(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(`{"provider": "not a map"}`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit with wrong type provider failed: %v", err)
-	}
-
-	// Verify provider is now correct type
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		t.Fatalf("provider should be map after setup, got %T", cfg["provider"])
-	}
-	if provider["ollama"] == nil {
-		t.Error("ollama provider should be created")
-	}
-}
-
-func TestOpenCodeEdit_WrongTypeRecent(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	os.MkdirAll(stateDir, 0o755)
-	os.WriteFile(statePath, []byte(`{"recent": "not an array", "favorite": [], "variant": {}}`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit with wrong type recent failed: %v", err)
-	}
-
-	// The function should handle this gracefully
-	data, _ := os.ReadFile(statePath)
-	var state map[string]any
-	json.Unmarshal(data, &state)
-
-	// recent should be properly set after setup
-	recent, ok := state["recent"].([]any)
-	if !ok {
-		t.Logf("Note: recent type after setup is %T (documenting behavior)", state["recent"])
-	} else if len(recent) == 0 {
-		t.Logf("Note: recent is empty (documenting behavior)")
-	}
-}
-
-func TestOpenCodeEdit_EmptyModels(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	originalContent := `{"provider":{"ollama":{"models":{"existing":{}}}}}`
-	os.WriteFile(configPath, []byte(originalContent), 0o644)
-
-	// Empty models should be no-op
-	err := o.Edit([]string{})
-	if err != nil {
-		t.Fatalf("Edit with empty models failed: %v", err)
-	}
-
-	// Original content should be preserved (file not modified)
-	data, _ := os.ReadFile(configPath)
-	if string(data) != originalContent {
-		t.Errorf("empty models should not modify file, but content changed")
-	}
-}
-
-func TestOpenCodeEdit_SpecialCharsInModelName(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	// Model name with special characters (though unusual)
-	specialModel := `model-with-"quotes"`
-
-	err := o.Edit([]string{specialModel})
-	if err != nil {
-		t.Fatalf("Edit with special chars failed: %v", err)
-	}
-
-	// Verify it was stored correctly
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-	data, _ := os.ReadFile(configPath)
-
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatalf("resulting config is invalid JSON: %v", err)
-	}
-
-	// Model should be accessible
-	provider, _ := cfg["provider"].(map[string]any)
-	ollama, _ := provider["ollama"].(map[string]any)
-	models, _ := ollama["models"].(map[string]any)
-
-	if models[specialModel] == nil {
-		t.Errorf("model with special chars not found in config")
-	}
-}
-
-func TestOpenCodeModels_NoConfig(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	models := o.Models()
-	if len(models) > 0 {
-		t.Errorf("expected nil/empty for missing config, got %v", models)
-	}
-}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -1,499 +0,0 @@
-package config
-
-import (
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"strings"
-
-	"golang.org/x/term"
-)
-
-// ANSI escape sequences for terminal formatting.
-const (
-	ansiHideCursor = "\033[?25l"
-	ansiShowCursor = "\033[?25h"
-	ansiBold       = "\033[1m"
-	ansiReset      = "\033[0m"
-	ansiGray       = "\033[37m"
-	ansiClearDown  = "\033[J"
-)
-
-const maxDisplayedItems = 10
-
-var errCancelled = errors.New("cancelled")
-
-type selectItem struct {
-	Name        string
-	Description string
-}
-
-type inputEvent int
-
-const (
-	eventNone inputEvent = iota
-	eventEnter
-	eventEscape
-	eventUp
-	eventDown
-	eventTab
-	eventBackspace
-	eventChar
-)
-
-type selectState struct {
-	items        []selectItem
-	filter       string
-	selected     int
-	scrollOffset int
-}
-
-func newSelectState(items []selectItem) *selectState {
-	return &selectState{items: items}
-}
-
-func (s *selectState) filtered() []selectItem {
-	return filterItems(s.items, s.filter)
-}
-
-func (s *selectState) handleInput(event inputEvent, char byte) (done bool, result string, err error) {
-	filtered := s.filtered()
-
-	switch event {
-	case eventEnter:
-		if len(filtered) > 0 && s.selected < len(filtered) {
-			return true, filtered[s.selected].Name, nil
-		}
-	case eventEscape:
-		return true, "", errCancelled
-	case eventBackspace:
-		if len(s.filter) > 0 {
-			s.filter = s.filter[:len(s.filter)-1]
-			s.selected = 0
-			s.scrollOffset = 0
-		}
-	case eventUp:
-		if s.selected > 0 {
-			s.selected--
-			if s.selected < s.scrollOffset {
-				s.scrollOffset = s.selected
-			}
-		}
-	case eventDown:
-		if s.selected < len(filtered)-1 {
-			s.selected++
-			if s.selected >= s.scrollOffset+maxDisplayedItems {
-				s.scrollOffset = s.selected - maxDisplayedItems + 1
-			}
-		}
-	case eventChar:
-		s.filter += string(char)
-		s.selected = 0
-		s.scrollOffset = 0
-	}
-
-	return false, "", nil
-}
-
-type multiSelectState struct {
-	items         []selectItem
-	itemIndex     map[string]int
-	filter        string
-	highlighted   int
-	scrollOffset  int
-	checked       map[int]bool
-	checkOrder    []int
-	focusOnButton bool
-}
-
-func newMultiSelectState(items []selectItem, preChecked []string) *multiSelectState {
-	s := &multiSelectState{
-		items:     items,
-		itemIndex: make(map[string]int, len(items)),
-		checked:   make(map[int]bool),
-	}
-
-	for i, item := range items {
-		s.itemIndex[item.Name] = i
-	}
-
-	for _, name := range preChecked {
-		if idx, ok := s.itemIndex[name]; ok {
-			s.checked[idx] = true
-			s.checkOrder = append(s.checkOrder, idx)
-		}
-	}
-
-	return s
-}
-
-func (s *multiSelectState) filtered() []selectItem {
-	return filterItems(s.items, s.filter)
-}
-
-func (s *multiSelectState) toggleItem() {
-	filtered := s.filtered()
-	if len(filtered) == 0 || s.highlighted >= len(filtered) {
-		return
-	}
-
-	item := filtered[s.highlighted]
-	origIdx := s.itemIndex[item.Name]
-
-	if s.checked[origIdx] {
-		delete(s.checked, origIdx)
-		for i, idx := range s.checkOrder {
-			if idx == origIdx {
-				s.checkOrder = append(s.checkOrder[:i], s.checkOrder[i+1:]...)
-				break
-			}
-		}
-	} else {
-		s.checked[origIdx] = true
-		s.checkOrder = append(s.checkOrder, origIdx)
-	}
-}
-
-func (s *multiSelectState) handleInput(event inputEvent, char byte) (done bool, result []string, err error) {
-	filtered := s.filtered()
-
-	switch event {
-	case eventEnter:
-		if s.focusOnButton && len(s.checkOrder) > 0 {
-			var res []string
-			for _, idx := range s.checkOrder {
-				res = append(res, s.items[idx].Name)
-			}
-			return true, res, nil
-		} else if !s.focusOnButton {
-			s.toggleItem()
-		}
-	case eventTab:
-		if len(s.checkOrder) > 0 {
-			s.focusOnButton = !s.focusOnButton
-		}
-	case eventEscape:
-		return true, nil, errCancelled
-	case eventBackspace:
-		if len(s.filter) > 0 {
-			s.filter = s.filter[:len(s.filter)-1]
-			s.highlighted = 0
-			s.scrollOffset = 0
-			s.focusOnButton = false
-		}
-	case eventUp:
-		if s.focusOnButton {
-			s.focusOnButton = false
-		} else if s.highlighted > 0 {
-			s.highlighted--
-			if s.highlighted < s.scrollOffset {
-				s.scrollOffset = s.highlighted
-			}
-		}
-	case eventDown:
-		if s.focusOnButton {
-			s.focusOnButton = false
-		} else if s.highlighted < len(filtered)-1 {
-			s.highlighted++
-			if s.highlighted >= s.scrollOffset+maxDisplayedItems {
-				s.scrollOffset = s.highlighted - maxDisplayedItems + 1
-			}
-		}
-	case eventChar:
-		s.filter += string(char)
-		s.highlighted = 0
-		s.scrollOffset = 0
-		s.focusOnButton = false
-	}
-
-	return false, nil, nil
-}
-
-func (s *multiSelectState) selectedCount() int {
-	return len(s.checkOrder)
-}
-
-// Terminal I/O handling
-
-type terminalState struct {
-	fd       int
-	oldState *term.State
-}
-
-func enterRawMode() (*terminalState, error) {
-	fd := int(os.Stdin.Fd())
-	oldState, err := term.MakeRaw(fd)
-	if err != nil {
-		return nil, err
-	}
-	fmt.Fprint(os.Stderr, ansiHideCursor)
-	return &terminalState{fd: fd, oldState: oldState}, nil
-}
-
-func (t *terminalState) restore() {
-	fmt.Fprint(os.Stderr, ansiShowCursor)
-	term.Restore(t.fd, t.oldState)
-}
-
-func clearLines(n int) {
-	if n > 0 {
-		fmt.Fprintf(os.Stderr, "\033[%dA", n)
-		fmt.Fprint(os.Stderr, ansiClearDown)
-	}
-}
-
-func parseInput(r io.Reader) (inputEvent, byte, error) {
-	buf := make([]byte, 3)
-	n, err := r.Read(buf)
-	if err != nil {
-		return 0, 0, err
-	}
-
-	switch {
-	case n == 1 && buf[0] == 13:
-		return eventEnter, 0, nil
-	case n == 1 && (buf[0] == 3 || buf[0] == 27):
-		return eventEscape, 0, nil
-	case n == 1 && buf[0] == 9:
-		return eventTab, 0, nil
-	case n == 1 && buf[0] == 127:
-		return eventBackspace, 0, nil
-	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 65:
-		return eventUp, 0, nil
-	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 66:
-		return eventDown, 0, nil
-	case n == 1 && buf[0] >= 32 && buf[0] < 127:
-		return eventChar, buf[0], nil
-	}
-
-	return eventNone, 0, nil
-}
-
-// Rendering
-
-func renderSelect(w io.Writer, prompt string, s *selectState) int {
-	filtered := s.filtered()
-
-	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	lineCount := 1
-
-	if len(filtered) == 0 {
-		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
-		lineCount++
-	} else {
-		displayCount := min(len(filtered), maxDisplayedItems)
-
-		for i := range displayCount {
-			idx := s.scrollOffset + i
-			if idx >= len(filtered) {
-				break
-			}
-			item := filtered[idx]
-			prefix := "    "
-			if idx == s.selected {
-				prefix = "  " + ansiBold + "> "
-			}
-			if item.Description != "" {
-				fmt.Fprintf(w, "%s%s%s %s- %s%s\r\n", prefix, item.Name, ansiReset, ansiGray, item.Description, ansiReset)
-			} else {
-				fmt.Fprintf(w, "%s%s%s\r\n", prefix, item.Name, ansiReset)
-			}
-			lineCount++
-		}
-
-		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
-			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
-			lineCount++
-		}
-	}
-
-	return lineCount
-}
-
-func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
-	filtered := s.filtered()
-
-	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	lineCount := 1
-
-	if len(filtered) == 0 {
-		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
-		lineCount++
-	} else {
-		displayCount := min(len(filtered), maxDisplayedItems)
-
-		for i := range displayCount {
-			idx := s.scrollOffset + i
-			if idx >= len(filtered) {
-				break
-			}
-			item := filtered[idx]
-			origIdx := s.itemIndex[item.Name]
-
-			checkbox := "[ ]"
-			if s.checked[origIdx] {
-				checkbox = "[x]"
-			}
-
-			prefix := "  "
-			suffix := ""
-			if idx == s.highlighted && !s.focusOnButton {
-				prefix = "> "
-			}
-			if len(s.checkOrder) > 0 && s.checkOrder[0] == origIdx {
-				suffix = " " + ansiGray + "(default)" + ansiReset
-			}
-
-			if idx == s.highlighted && !s.focusOnButton {
-				fmt.Fprintf(w, "  %s%s %s %s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, suffix)
-			} else {
-				fmt.Fprintf(w, "  %s %s %s%s\r\n", prefix, checkbox, item.Name, suffix)
-			}
-			lineCount++
-		}
-
-		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
-			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
-			lineCount++
-		}
-	}
-
-	fmt.Fprintf(w, "\r\n")
-	lineCount++
-	count := s.selectedCount()
-	switch {
-	case count == 0:
-		fmt.Fprintf(w, "  %sSelect at least one model.%s\r\n", ansiGray, ansiReset)
-	case s.focusOnButton:
-		fmt.Fprintf(w, "  %s> [ Continue ]%s %s(%d selected)%s\r\n", ansiBold, ansiReset, ansiGray, count, ansiReset)
-	default:
-		fmt.Fprintf(w, "    %s[ Continue ] (%d selected) - press Tab%s\r\n", ansiGray, count, ansiReset)
-	}
-	lineCount++
-
-	return lineCount
-}
-
-// selectPrompt prompts the user to select a single item from a list.
-func selectPrompt(prompt string, items []selectItem) (string, error) {
-	if len(items) == 0 {
-		return "", fmt.Errorf("no items to select from")
-	}
-
-	ts, err := enterRawMode()
-	if err != nil {
-		return "", err
-	}
-	defer ts.restore()
-
-	state := newSelectState(items)
-	var lastLineCount int
-
-	render := func() {
-		clearLines(lastLineCount)
-		lastLineCount = renderSelect(os.Stderr, prompt, state)
-	}
-
-	render()
-
-	for {
-		event, char, err := parseInput(os.Stdin)
-		if err != nil {
-			return "", err
-		}
-
-		done, result, err := state.handleInput(event, char)
-		if done {
-			clearLines(lastLineCount)
-			if err != nil {
-				return "", err
-			}
-			return result, nil
-		}
-
-		render()
-	}
-}
-
-// multiSelectPrompt prompts the user to select multiple items from a list.
-func multiSelectPrompt(prompt string, items []selectItem, preChecked []string) ([]string, error) {
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no items to select from")
-	}
-
-	ts, err := enterRawMode()
-	if err != nil {
-		return nil, err
-	}
-	defer ts.restore()
-
-	state := newMultiSelectState(items, preChecked)
-	var lastLineCount int
-
-	render := func() {
-		clearLines(lastLineCount)
-		lastLineCount = renderMultiSelect(os.Stderr, prompt, state)
-	}
-
-	render()
-
-	for {
-		event, char, err := parseInput(os.Stdin)
-		if err != nil {
-			return nil, err
-		}
-
-		done, result, err := state.handleInput(event, char)
-		if done {
-			clearLines(lastLineCount)
-			if err != nil {
-				return nil, err
-			}
-			return result, nil
-		}
-
-		render()
-	}
-}
-
-func confirmPrompt(prompt string) (bool, error) {
-	fd := int(os.Stdin.Fd())
-	oldState, err := term.MakeRaw(fd)
-	if err != nil {
-		return false, err
-	}
-	defer term.Restore(fd, oldState)
-
-	fmt.Fprintf(os.Stderr, "%s [y/n] ", prompt)
-
-	buf := make([]byte, 1)
-	for {
-		if _, err := os.Stdin.Read(buf); err != nil {
-			return false, err
-		}
-
-		switch buf[0] {
-		case 'Y', 'y', 13:
-			fmt.Fprintf(os.Stderr, "yes\r\n")
-			return true, nil
-		case 'N', 'n', 27, 3:
-			fmt.Fprintf(os.Stderr, "no\r\n")
-			return false, nil
-		}
-	}
-}
-
-func filterItems(items []selectItem, filter string) []selectItem {
-	if filter == "" {
-		return items
-	}
-	var result []selectItem
-	filterLower := strings.ToLower(filter)
-	for _, item := range items {
-		if strings.Contains(strings.ToLower(item.Name), filterLower) {
-			result = append(result, item)
-		}
-	}
-	return result
-}
--- a/cmd/config/selector_test.go
+++ b/cmd/config/selector_test.go
@@ -1,913 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"strings"
-	"testing"
-)
-
-func TestFilterItems(t *testing.T) {
-	items := []selectItem{
-		{Name: "llama3.2:latest"},
-		{Name: "qwen2.5:7b"},
-		{Name: "deepseek-v3:cloud"},
-		{Name: "GPT-OSS:20b"},
-	}
-
-	t.Run("EmptyFilter_ReturnsAllItems", func(t *testing.T) {
-		result := filterItems(items, "")
-		if len(result) != len(items) {
-			t.Errorf("expected %d items, got %d", len(items), len(result))
-		}
-	})
-
-	t.Run("CaseInsensitive_UppercaseFilterMatchesLowercase", func(t *testing.T) {
-		result := filterItems(items, "LLAMA")
-		if len(result) != 1 || result[0].Name != "llama3.2:latest" {
-			t.Errorf("expected llama3.2:latest, got %v", result)
-		}
-	})
-
-	t.Run("CaseInsensitive_LowercaseFilterMatchesUppercase", func(t *testing.T) {
-		result := filterItems(items, "gpt")
-		if len(result) != 1 || result[0].Name != "GPT-OSS:20b" {
-			t.Errorf("expected GPT-OSS:20b, got %v", result)
-		}
-	})
-
-	t.Run("PartialMatch", func(t *testing.T) {
-		result := filterItems(items, "deep")
-		if len(result) != 1 || result[0].Name != "deepseek-v3:cloud" {
-			t.Errorf("expected deepseek-v3:cloud, got %v", result)
-		}
-	})
-
-	t.Run("NoMatch_ReturnsEmpty", func(t *testing.T) {
-		result := filterItems(items, "nonexistent")
-		if len(result) != 0 {
-			t.Errorf("expected 0 items, got %d", len(result))
-		}
-	})
-}
-
-func TestSelectState(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-		{Name: "item3"},
-	}
-
-	t.Run("InitialState", func(t *testing.T) {
-		s := newSelectState(items)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0, got %d", s.selected)
-		}
-		if s.filter != "" {
-			t.Errorf("expected empty filter, got %q", s.filter)
-		}
-		if s.scrollOffset != 0 {
-			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Enter_SelectsCurrentItem", func(t *testing.T) {
-		s := newSelectState(items)
-		done, result, err := s.handleInput(eventEnter, 0)
-		if !done || result != "item1" || err != nil {
-			t.Errorf("expected (true, item1, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Enter_WithFilter_SelectsFilteredItem", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "item3"
-		done, result, err := s.handleInput(eventEnter, 0)
-		if !done || result != "item3" || err != nil {
-			t.Errorf("expected (true, item3, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Enter_EmptyFilteredList_DoesNothing", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "nonexistent"
-		done, result, err := s.handleInput(eventEnter, 0)
-		if done || result != "" || err != nil {
-			t.Errorf("expected (false, '', nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
-		s := newSelectState(items)
-		done, result, err := s.handleInput(eventEscape, 0)
-		if !done || result != "" || err != errCancelled {
-			t.Errorf("expected (true, '', errCancelled), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Down_MovesSelection", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventDown, 0)
-		if s.selected != 1 {
-			t.Errorf("expected selected=1, got %d", s.selected)
-		}
-	})
-
-	t.Run("Down_AtBottom_StaysAtBottom", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventDown, 0)
-		if s.selected != 2 {
-			t.Errorf("expected selected=2 (stayed at bottom), got %d", s.selected)
-		}
-	})
-
-	t.Run("Up_MovesSelection", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventUp, 0)
-		if s.selected != 1 {
-			t.Errorf("expected selected=1, got %d", s.selected)
-		}
-	})
-
-	t.Run("Up_AtTop_StaysAtTop", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventUp, 0)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 (stayed at top), got %d", s.selected)
-		}
-	})
-
-	t.Run("Char_AppendsToFilter", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventChar, 'i')
-		s.handleInput(eventChar, 't')
-		s.handleInput(eventChar, 'e')
-		s.handleInput(eventChar, 'm')
-		s.handleInput(eventChar, '2')
-		if s.filter != "item2" {
-			t.Errorf("expected filter='item2', got %q", s.filter)
-		}
-		filtered := s.filtered()
-		if len(filtered) != 1 || filtered[0].Name != "item2" {
-			t.Errorf("expected [item2], got %v", filtered)
-		}
-	})
-
-	t.Run("Char_ResetsSelectionToZero", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventChar, 'x')
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 after typing, got %d", s.selected)
-		}
-	})
-
-	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "test"
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "tes" {
-			t.Errorf("expected filter='tes', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_EmptyFilter_DoesNothing", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "" {
-			t.Errorf("expected filter='', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_ResetsSelectionToZero", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "test"
-		s.selected = 2
-		s.handleInput(eventBackspace, 0)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 after backspace, got %d", s.selected)
-		}
-	})
-
-	t.Run("Scroll_DownPastVisibleItems_ScrollsViewport", func(t *testing.T) {
-		// maxDisplayedItems is 10, so with 15 items we need to scroll
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-
-		// move down 12 times (past the 10-item viewport)
-		for range 12 {
-			s.handleInput(eventDown, 0)
-		}
-
-		if s.selected != 12 {
-			t.Errorf("expected selected=12, got %d", s.selected)
-		}
-		if s.scrollOffset != 3 {
-			t.Errorf("expected scrollOffset=3 (12-10+1), got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Scroll_UpPastScrollOffset_ScrollsViewport", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-		s.selected = 5
-		s.scrollOffset = 5
-
-		s.handleInput(eventUp, 0)
-
-		if s.selected != 4 {
-			t.Errorf("expected selected=4, got %d", s.selected)
-		}
-		if s.scrollOffset != 4 {
-			t.Errorf("expected scrollOffset=4, got %d", s.scrollOffset)
-		}
-	})
-}
-
-func TestMultiSelectState(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-		{Name: "item3"},
-	}
-
-	t.Run("InitialState_NoPrechecked", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-		if s.selectedCount() != 0 {
-			t.Errorf("expected 0 selected, got %d", s.selectedCount())
-		}
-		if s.focusOnButton {
-			t.Error("expected focusOnButton=false initially")
-		}
-	})
-
-	t.Run("InitialState_WithPrechecked", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item3"})
-		if s.selectedCount() != 2 {
-			t.Errorf("expected 2 selected, got %d", s.selectedCount())
-		}
-		if !s.checked[1] || !s.checked[2] {
-			t.Error("expected item2 and item3 to be checked")
-		}
-	})
-
-	t.Run("Prechecked_PreservesSelectionOrder", func(t *testing.T) {
-		// order matters: first checked = default model
-		s := newMultiSelectState(items, []string{"item3", "item1"})
-		if len(s.checkOrder) != 2 {
-			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
-		}
-		if s.checkOrder[0] != 2 || s.checkOrder[1] != 0 {
-			t.Errorf("expected checkOrder=[2,0] (item3 first), got %v", s.checkOrder)
-		}
-	})
-
-	t.Run("Prechecked_IgnoresInvalidNames", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "nonexistent"})
-		if s.selectedCount() != 1 {
-			t.Errorf("expected 1 selected (nonexistent ignored), got %d", s.selectedCount())
-		}
-	})
-
-	t.Run("Toggle_ChecksUncheckedItem", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.toggleItem()
-		if !s.checked[0] {
-			t.Error("expected item1 to be checked after toggle")
-		}
-	})
-
-	t.Run("Toggle_UnchecksCheckedItem", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.toggleItem()
-		if s.checked[0] {
-			t.Error("expected item1 to be unchecked after toggle")
-		}
-	})
-
-	t.Run("Toggle_RemovesFromCheckOrder", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "item2", "item3"})
-		s.highlighted = 1 // toggle item2
-		s.toggleItem()
-
-		if len(s.checkOrder) != 2 {
-			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
-		}
-		// should be [0, 2] (item1, item3) with item2 removed
-		if s.checkOrder[0] != 0 || s.checkOrder[1] != 2 {
-			t.Errorf("expected checkOrder=[0,2], got %v", s.checkOrder)
-		}
-	})
-
-	t.Run("Enter_TogglesWhenNotOnButton", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventEnter, 0)
-		if !s.checked[0] {
-			t.Error("expected item1 to be checked after enter")
-		}
-	})
-
-	t.Run("Enter_OnButton_ReturnsSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item1"})
-		s.focusOnButton = true
-
-		done, result, err := s.handleInput(eventEnter, 0)
-
-		if !done || err != nil {
-			t.Errorf("expected done=true, err=nil, got done=%v, err=%v", done, err)
-		}
-		// result should preserve selection order
-		if len(result) != 2 || result[0] != "item2" || result[1] != "item1" {
-			t.Errorf("expected [item2, item1], got %v", result)
-		}
-	})
-
-	t.Run("Enter_OnButton_EmptySelection_DoesNothing", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.focusOnButton = true
-		done, result, err := s.handleInput(eventEnter, 0)
-		if done || result != nil || err != nil {
-			t.Errorf("expected (false, nil, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Tab_SwitchesToButton_WhenHasSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.handleInput(eventTab, 0)
-		if !s.focusOnButton {
-			t.Error("expected focus on button after tab")
-		}
-	})
-
-	t.Run("Tab_DoesNothing_WhenNoSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventTab, 0)
-		if s.focusOnButton {
-			t.Error("tab should not focus button when nothing selected")
-		}
-	})
-
-	t.Run("Tab_TogglesButtonFocus", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.handleInput(eventTab, 0)
-		if !s.focusOnButton {
-			t.Error("expected focus on button after first tab")
-		}
-		s.handleInput(eventTab, 0)
-		if s.focusOnButton {
-			t.Error("expected focus back on list after second tab")
-		}
-	})
-
-	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		done, result, err := s.handleInput(eventEscape, 0)
-		if !done || result != nil || err != errCancelled {
-			t.Errorf("expected (true, nil, errCancelled), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("IsDefault_TrueForFirstChecked", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item1"})
-		if !(len(s.checkOrder) > 0 && s.checkOrder[0] == 1) {
-			t.Error("expected item2 (idx 1) to be default (first checked)")
-		}
-		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
-			t.Error("expected item1 (idx 0) to NOT be default")
-		}
-	})
-
-	t.Run("IsDefault_FalseWhenNothingChecked", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
-			t.Error("expected isDefault=false when nothing checked")
-		}
-	})
-
-	t.Run("Down_MovesHighlight", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventDown, 0)
-		if s.highlighted != 1 {
-			t.Errorf("expected highlighted=1, got %d", s.highlighted)
-		}
-	})
-
-	t.Run("Up_MovesHighlight", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.highlighted = 1
-		s.handleInput(eventUp, 0)
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-	})
-
-	t.Run("Arrow_ReturnsFocusFromButton", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.focusOnButton = true
-		s.handleInput(eventDown, 0)
-		if s.focusOnButton {
-			t.Error("expected focus to return to list on arrow key")
-		}
-	})
-
-	t.Run("Char_AppendsToFilter", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventChar, 'x')
-		if s.filter != "x" {
-			t.Errorf("expected filter='x', got %q", s.filter)
-		}
-	})
-
-	t.Run("Char_ResetsHighlightAndScroll", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newMultiSelectState(manyItems, nil)
-		s.highlighted = 10
-		s.scrollOffset = 5
-
-		s.handleInput(eventChar, 'x')
-
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-		if s.scrollOffset != 0 {
-			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.filter = "test"
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "tes" {
-			t.Errorf("expected filter='tes', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_RemovesFocusFromButton", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.filter = "x"
-		s.focusOnButton = true
-		s.handleInput(eventBackspace, 0)
-		if s.focusOnButton {
-			t.Error("expected focusOnButton=false after backspace")
-		}
-	})
-}
-
-func TestParseInput(t *testing.T) {
-	t.Run("Enter", func(t *testing.T) {
-		event, char, err := parseInput(bytes.NewReader([]byte{13}))
-		if err != nil || event != eventEnter || char != 0 {
-			t.Errorf("expected (eventEnter, 0, nil), got (%v, %v, %v)", event, char, err)
-		}
-	})
-
-	t.Run("Escape", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27}))
-		if err != nil || event != eventEscape {
-			t.Errorf("expected eventEscape, got %v", event)
-		}
-	})
-
-	t.Run("CtrlC_TreatedAsEscape", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{3}))
-		if err != nil || event != eventEscape {
-			t.Errorf("expected eventEscape for Ctrl+C, got %v", event)
-		}
-	})
-
-	t.Run("Tab", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{9}))
-		if err != nil || event != eventTab {
-			t.Errorf("expected eventTab, got %v", event)
-		}
-	})
-
-	t.Run("Backspace", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{127}))
-		if err != nil || event != eventBackspace {
-			t.Errorf("expected eventBackspace, got %v", event)
-		}
-	})
-
-	t.Run("UpArrow", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 65}))
-		if err != nil || event != eventUp {
-			t.Errorf("expected eventUp, got %v", event)
-		}
-	})
-
-	t.Run("DownArrow", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 66}))
-		if err != nil || event != eventDown {
-			t.Errorf("expected eventDown, got %v", event)
-		}
-	})
-
-	t.Run("PrintableChars", func(t *testing.T) {
-		tests := []struct {
-			name string
-			char byte
-		}{
-			{"lowercase", 'a'},
-			{"uppercase", 'Z'},
-			{"digit", '5'},
-			{"space", ' '},
-			{"tilde", '~'},
-		}
-		for _, tt := range tests {
-			t.Run(tt.name, func(t *testing.T) {
-				event, char, err := parseInput(bytes.NewReader([]byte{tt.char}))
-				if err != nil || event != eventChar || char != tt.char {
-					t.Errorf("expected (eventChar, %q), got (%v, %q)", tt.char, event, char)
-				}
-			})
-		}
-	})
-}
-
-func TestRenderSelect(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1", Description: "first item"},
-		{Name: "item2"},
-	}
-
-	t.Run("ShowsPromptAndItems", func(t *testing.T) {
-		s := newSelectState(items)
-		var buf bytes.Buffer
-		lineCount := renderSelect(&buf, "Select:", s)
-
-		output := buf.String()
-		if !strings.Contains(output, "Select:") {
-			t.Error("expected prompt in output")
-		}
-		if !strings.Contains(output, "item1") {
-			t.Error("expected item1 in output")
-		}
-		if !strings.Contains(output, "first item") {
-			t.Error("expected description in output")
-		}
-		if !strings.Contains(output, "item2") {
-			t.Error("expected item2 in output")
-		}
-		if lineCount != 3 { // 1 prompt + 2 items
-			t.Errorf("expected 3 lines, got %d", lineCount)
-		}
-	})
-
-	t.Run("EmptyFilteredList_ShowsNoMatches", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "xyz"
-		var buf bytes.Buffer
-		renderSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "no matches") {
-			t.Error("expected 'no matches' message")
-		}
-	})
-
-	t.Run("LongList_ShowsRemainingCount", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-		var buf bytes.Buffer
-		renderSelect(&buf, "Select:", s)
-
-		// 15 items - 10 displayed = 5 more
-		if !strings.Contains(buf.String(), "5 more") {
-			t.Error("expected '5 more' indicator")
-		}
-	})
-}
-
-func TestRenderMultiSelect(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-	}
-
-	t.Run("ShowsCheckboxes", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		output := buf.String()
-		if !strings.Contains(output, "[x]") {
-			t.Error("expected checked checkbox [x]")
-		}
-		if !strings.Contains(output, "[ ]") {
-			t.Error("expected unchecked checkbox [ ]")
-		}
-	})
-
-	t.Run("ShowsDefaultMarker", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "(default)") {
-			t.Error("expected (default) marker for first checked item")
-		}
-	})
-
-	t.Run("ShowsSelectedCount", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "item2"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "2 selected") {
-			t.Error("expected '2 selected' in output")
-		}
-	})
-
-	t.Run("NoSelection_ShowsHelperText", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "Select at least one") {
-			t.Error("expected 'Select at least one' helper text")
-		}
-	})
-}
-
-func TestErrCancelled(t *testing.T) {
-	t.Run("NotNil", func(t *testing.T) {
-		if errCancelled == nil {
-			t.Error("errCancelled should not be nil")
-		}
-	})
-
-	t.Run("Message", func(t *testing.T) {
-		if errCancelled.Error() != "cancelled" {
-			t.Errorf("expected 'cancelled', got %q", errCancelled.Error())
-		}
-	})
-}
-
-// Edge case tests for selector.go
-
-// TestSelectState_SingleItem verifies that single item list works without crash.
-// List with only one item should still work.
-func TestSelectState_SingleItem(t *testing.T) {
-	items := []selectItem{{Name: "only-one"}}
-
-	s := newSelectState(items)
-
-	// Down should do nothing (already at bottom)
-	s.handleInput(eventDown, 0)
-	if s.selected != 0 {
-		t.Errorf("down on single item: expected selected=0, got %d", s.selected)
-	}
-
-	// Up should do nothing (already at top)
-	s.handleInput(eventUp, 0)
-	if s.selected != 0 {
-		t.Errorf("up on single item: expected selected=0, got %d", s.selected)
-	}
-
-	// Enter should select the only item
-	done, result, err := s.handleInput(eventEnter, 0)
-	if !done || result != "only-one" || err != nil {
-		t.Errorf("enter on single item: expected (true, 'only-one', nil), got (%v, %q, %v)", done, result, err)
-	}
-}
-
-// TestSelectState_ExactlyMaxItems verifies boundary condition at maxDisplayedItems.
-// List with exactly maxDisplayedItems items should not scroll.
-func TestSelectState_ExactlyMaxItems(t *testing.T) {
-	items := make([]selectItem, maxDisplayedItems)
-	for i := range items {
-		items[i] = selectItem{Name: string(rune('a' + i))}
-	}
-
-	s := newSelectState(items)
-
-	// Move to last item
-	for range maxDisplayedItems - 1 {
-		s.handleInput(eventDown, 0)
-	}
-
-	if s.selected != maxDisplayedItems-1 {
-		t.Errorf("expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
-	}
-
-	// Should not scroll when exactly at max
-	if s.scrollOffset != 0 {
-		t.Errorf("expected scrollOffset=0 for exactly maxDisplayedItems, got %d", s.scrollOffset)
-	}
-
-	// One more down should do nothing
-	s.handleInput(eventDown, 0)
-	if s.selected != maxDisplayedItems-1 {
-		t.Errorf("down at max: expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
-	}
-}
-
-// TestFilterItems_RegexSpecialChars verifies that filter is literal, not regex.
-// User typing "model.v1" shouldn't match "modelsv1".
-func TestFilterItems_RegexSpecialChars(t *testing.T) {
-	items := []selectItem{
-		{Name: "model.v1"},
-		{Name: "modelsv1"},
-		{Name: "model-v1"},
-	}
-
-	// Filter with dot should only match literal dot
-	result := filterItems(items, "model.v1")
-	if len(result) != 1 {
-		t.Errorf("expected 1 exact match, got %d", len(result))
-	}
-	if len(result) > 0 && result[0].Name != "model.v1" {
-		t.Errorf("expected 'model.v1', got %s", result[0].Name)
-	}
-
-	// Other regex special chars should be literal too
-	items2 := []selectItem{
-		{Name: "test[0]"},
-		{Name: "test0"},
-		{Name: "test(1)"},
-	}
-
-	result2 := filterItems(items2, "test[0]")
-	if len(result2) != 1 || result2[0].Name != "test[0]" {
-		t.Errorf("expected only 'test[0]', got %v", result2)
-	}
-}
-
-// TestMultiSelectState_DuplicateNames documents handling of duplicate item names.
-// itemIndex uses name as key - duplicates cause collision. This documents
-// the current behavior: the last index for a duplicate name is stored
-func TestMultiSelectState_DuplicateNames(t *testing.T) {
-	// Duplicate names - this is an edge case that shouldn't happen in practice
-	items := []selectItem{
-		{Name: "duplicate"},
-		{Name: "duplicate"},
-		{Name: "unique"},
-	}
-
-	s := newMultiSelectState(items, nil)
-
-	// DOCUMENTED BEHAVIOR: itemIndex maps name to LAST index
-	// When there are duplicates, only the last occurrence's index is stored
-	if s.itemIndex["duplicate"] != 1 {
-		t.Errorf("itemIndex should map 'duplicate' to last index (1), got %d", s.itemIndex["duplicate"])
-	}
-
-	// Toggle item at highlighted=0 (first "duplicate")
-	// Due to name collision, toggleItem uses itemIndex["duplicate"] = 1
-	// So it actually toggles the SECOND duplicate item, not the first
-	s.toggleItem()
-
-	// This documents the potentially surprising behavior:
-	// We toggled at highlighted=0, but itemIndex lookup returned 1
-	if !s.checked[1] {
-		t.Error("toggle should check index 1 (due to name collision in itemIndex)")
-	}
-	if s.checked[0] {
-		t.Log("Note: index 0 is NOT checked, even though highlighted=0 (name collision behavior)")
-	}
-}
-
-// TestSelectState_FilterReducesBelowSelection verifies selection resets when filter reduces list.
-// Prevents index-out-of-bounds on next keystroke
-func TestSelectState_FilterReducesBelowSelection(t *testing.T) {
-	items := []selectItem{
-		{Name: "apple"},
-		{Name: "banana"},
-		{Name: "cherry"},
-	}
-
-	s := newSelectState(items)
-	s.selected = 2 // Select "cherry"
-
-	// Type a filter that removes cherry from results
-	s.handleInput(eventChar, 'a') // Filter to "a" - matches "apple" and "banana"
-
-	// Selection should reset to 0
-	if s.selected != 0 {
-		t.Errorf("expected selected=0 after filter, got %d", s.selected)
-	}
-
-	filtered := s.filtered()
-	if len(filtered) != 2 {
-		t.Errorf("expected 2 filtered items, got %d", len(filtered))
-	}
-}
-
-// TestFilterItems_UnicodeCharacters verifies filtering works with UTF-8.
-// Model names might contain unicode characters
-func TestFilterItems_UnicodeCharacters(t *testing.T) {
-	items := []selectItem{
-		{Name: "llama-日本語"},
-		{Name: "模型-chinese"},
-		{Name: "émoji-🦙"},
-		{Name: "regular-model"},
-	}
-
-	t.Run("filter japanese", func(t *testing.T) {
-		result := filterItems(items, "日本")
-		if len(result) != 1 || result[0].Name != "llama-日本語" {
-			t.Errorf("expected llama-日本語, got %v", result)
-		}
-	})
-
-	t.Run("filter chinese", func(t *testing.T) {
-		result := filterItems(items, "模型")
-		if len(result) != 1 || result[0].Name != "模型-chinese" {
-			t.Errorf("expected 模型-chinese, got %v", result)
-		}
-	})
-
-	t.Run("filter emoji", func(t *testing.T) {
-		result := filterItems(items, "🦙")
-		if len(result) != 1 || result[0].Name != "émoji-🦙" {
-			t.Errorf("expected émoji-🦙, got %v", result)
-		}
-	})
-
-	t.Run("filter accented char", func(t *testing.T) {
-		result := filterItems(items, "émoji")
-		if len(result) != 1 || result[0].Name != "émoji-🦙" {
-			t.Errorf("expected émoji-🦙, got %v", result)
-		}
-	})
-}
-
-// TestMultiSelectState_FilterReducesBelowHighlight verifies highlight resets when filter reduces list.
-func TestMultiSelectState_FilterReducesBelowHighlight(t *testing.T) {
-	items := []selectItem{
-		{Name: "apple"},
-		{Name: "banana"},
-		{Name: "cherry"},
-	}
-
-	s := newMultiSelectState(items, nil)
-	s.highlighted = 2 // Highlight "cherry"
-
-	// Type a filter that removes cherry
-	s.handleInput(eventChar, 'a')
-
-	if s.highlighted != 0 {
-		t.Errorf("expected highlighted=0 after filter, got %d", s.highlighted)
-	}
-}
-
-// TestMultiSelectState_EmptyItems verifies handling of empty item list.
-// Empty list should be handled gracefully.
-func TestMultiSelectState_EmptyItems(t *testing.T) {
-	s := newMultiSelectState([]selectItem{}, nil)
-
-	// Toggle should not panic on empty list
-	s.toggleItem()
-
-	if s.selectedCount() != 0 {
-		t.Errorf("expected 0 selected for empty list, got %d", s.selectedCount())
-	}
-
-	// Render should handle empty list
-	var buf bytes.Buffer
-	lineCount := renderMultiSelect(&buf, "Select:", s)
-	if lineCount == 0 {
-		t.Error("renderMultiSelect should produce output even for empty list")
-	}
-	if !strings.Contains(buf.String(), "no matches") {
-		t.Error("expected 'no matches' for empty list")
-	}
-}
-
-// TestSelectState_RenderWithDescriptions verifies rendering items with descriptions.
-func TestSelectState_RenderWithDescriptions(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1", Description: "First item description"},
-		{Name: "item2", Description: ""},
-		{Name: "item3", Description: "Third item"},
-	}
-
-	s := newSelectState(items)
-	var buf bytes.Buffer
-	renderSelect(&buf, "Select:", s)
-
-	output := buf.String()
-	if !strings.Contains(output, "First item description") {
-		t.Error("expected description to be rendered")
-	}
-	if !strings.Contains(output, "item2") {
-		t.Error("expected item without description to be rendered")
-	}
-}
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -159,7 +159,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			sb.WriteString(before)
 			if !ok {
 				fmt.Fprintln(&sb)
-				scanner.Prompt.UseAlt = true
 				continue
 			}

--- a/convert/convert.go
+++ b/convert/convert.go
@@ -311,10 +311,6 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &deepseekocr{}
 	case "DeepseekV3ForCausalLM":
 		conv = &deepseek2Model{}
-	case "Glm4MoeLiteForCausalLM":
-		conv = &glm4MoeLiteModel{}
-	case "Lfm2ForCausalLM":
-		conv = &lfm2Model{}
 	default:
 		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
--- a/convert/convert_glm4moelite.go
+++ b/convert/convert_glm4moelite.go
@@ -1,150 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"fmt"
-	"log/slog"
-	"regexp"
-	"strconv"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type glm4MoeLiteModel struct {
-	ModelParameters
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	HiddenSize            uint32  `json:"hidden_size"`
-	HiddenLayers          uint32  `json:"num_hidden_layers"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	RMSNormEPS            float32 `json:"rms_norm_eps"`
-
-	RopeTheta     float32 `json:"rope_theta"`
-	QKNopeHeadDim uint32  `json:"qk_nope_head_dim"`
-	QKRopeHeadDim uint32  `json:"qk_rope_head_dim"`
-	KVLoraRank    uint32  `json:"kv_lora_rank"`
-	QLoraRank     uint32  `json:"q_lora_rank"`
-	VHeadDim      uint32  `json:"v_head_dim"`
-
-	ExpertCount            uint32  `json:"n_routed_experts"`
-	ExpertSharedCount      uint32  `json:"n_shared_experts"`
-	ExpertIntermediateSize uint32  `json:"moe_intermediate_size"`
-	ExpertUsedCount        uint32  `json:"num_experts_per_tok"`
-	ExpertWeightsNorm      bool    `json:"norm_topk_prob"`
-	ExpertWeightsScale     float32 `json:"routed_scaling_factor"`
-
-	LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
-}
-
-func (p *glm4MoeLiteModel) KV(t *Tokenizer) KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "glm4moelite"
-	kv["general.type"] = "model"
-	kv["glm4moelite.block_count"] = p.HiddenLayers
-
-	numHeads := p.NumAttentionHeads
-	numKVHeads := p.NumKeyValueHeads
-
-	kv["glm4moelite.attention.head_count"] = numHeads
-	kv["glm4moelite.attention.head_count_kv"] = numKVHeads
-	kv["glm4moelite.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
-	kv["glm4moelite.attention.kv_lora_rank"] = p.KVLoraRank
-	kv["glm4moelite.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	kv["glm4moelite.attention.q_lora_rank"] = p.QLoraRank
-	kv["glm4moelite.attention.value_length"] = p.VHeadDim
-	kv["glm4moelite.context_length"] = p.MaxPositionEmbeddings
-	kv["glm4moelite.embedding_length"] = p.HiddenSize
-	kv["glm4moelite.expert_count"] = p.ExpertCount
-	kv["glm4moelite.expert_feed_forward_length"] = p.ExpertIntermediateSize
-	kv["glm4moelite.expert_shared_count"] = p.ExpertSharedCount
-
-	kv["glm4moelite.expert_gating_func"] = uint32(2)
-	kv["glm4moelite.expert_used_count"] = p.ExpertUsedCount
-	kv["glm4moelite.expert_weights_norm"] = p.ExpertWeightsNorm
-	kv["glm4moelite.expert_weights_scale"] = p.ExpertWeightsScale
-	kv["glm4moelite.feed_forward_length"] = p.IntermediateSize
-	kv["glm4moelite.leading_dense_block_count"] = p.LeadingDenseBlockCount
-
-	kv["glm4moelite.rope.dimension_count"] = p.QKRopeHeadDim
-	kv["glm4moelite.rope.freq_base"] = cmp.Or(p.RopeTheta, float32(1000000.0))
-
-	kv["tokenizer.ggml.pre"] = "glm4"
-
-	return kv
-}
-
-func (p *glm4MoeLiteModel) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
-		"self_attn.kv_a_layernorm", "attn_kv_a_norm",
-		"self_attn.kv_b_proj", "attn_kv_b",
-		"self_attn.q_a_proj", "attn_q_a",
-		"self_attn.q_a_layernorm", "attn_q_a_norm",
-		"self_attn.q_b_proj", "attn_q_b",
-		"self_attn.o_proj", "attn_output",
-		"post_attention_layernorm", "ffn_norm",
-		"mlp.shared_experts.down_proj", "ffn_down_shexp",
-		"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
-		"mlp.shared_experts.up_proj", "ffn_up_shexp",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
-		"mlp.gate", "ffn_gate_inp",
-	}
-}
-
-func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
-	merges := make([]merge, p.HiddenLayers*3)
-	for i := range p.HiddenLayers {
-		merges[i*3+0] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
-		}
-		merges[i*3+1] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
-		}
-		merges[i*3+2] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
-		}
-	}
-
-	skipLayer := func(n string, minValue uint32) bool {
-		re := regexp.MustCompile(`^blk\.(\d+)`)
-		matches := re.FindStringSubmatch(n)
-		if matches == nil {
-			return false
-		}
-
-		blkNum, err := strconv.Atoi(matches[1])
-		if err != nil {
-			return false
-		}
-
-		return uint32(blkNum) >= minValue
-	}
-
-	out, s = mergeTensors(s, merges...)
-	for _, t := range s {
-		// skip any additional layers (such as the Multi-Token Prediction layer)
-		if skipLayer(t.Name(), p.HiddenLayers) {
-			slog.Debug("skipping layer", "name", t.Name())
-			continue
-		}
-		out = append(out, &ggml.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-	return out
-}
--- a/convert/convert_lfm2.go
+++ b/convert/convert_lfm2.go
@@ -1,100 +0,0 @@
-package convert
-
-import (
-	"slices"
-	"strings"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type lfm2Model struct {
-	ModelParameters
-	HiddenSize            uint32   `json:"hidden_size"`
-	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
-	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
-	IntermediateSize      uint32   `json:"intermediate_size"`
-	NumAttentionHeads     uint32   `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
-	RopeTheta             float32  `json:"rope_theta"`
-	NormEps               float32  `json:"norm_eps"`
-	ConvLCache            uint32   `json:"conv_L_cache"`
-	LayerTypes            []string `json:"layer_types"`
-	TieEmbedding          bool     `json:"tie_embedding"`
-}
-
-var _ ModelConverter = (*lfm2Model)(nil)
-
-func (p *lfm2Model) KV(t *Tokenizer) KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "lfm2"
-	kv["lfm2.vocab_size"] = p.VocabSize
-	kv["lfm2.block_count"] = p.NumHiddenLayers
-	kv["lfm2.embedding_length"] = p.HiddenSize
-	kv["lfm2.feed_forward_length"] = p.IntermediateSize
-	kv["lfm2.context_length"] = p.MaxPositionEmbeddings
-
-	// Build per-layer KV head count array based on layer_types
-	// (0 = shortconv layer, non-zero = attention layer with that many KV heads)
-	kvHeadCounts := make([]uint32, p.NumHiddenLayers)
-	for i := range p.NumHiddenLayers {
-		if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
-			kvHeadCounts[i] = p.NumKeyValueHeads
-		}
-	}
-
-	kv["lfm2.attention.head_count"] = p.NumAttentionHeads
-	kv["lfm2.attention.head_count_kv"] = kvHeadCounts
-	kv["lfm2.attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.layer_norm_rms_epsilon"] = p.NormEps
-	kv["lfm2.rope.freq_base"] = p.RopeTheta
-	kv["lfm2.shortconv.l_cache"] = p.ConvLCache
-
-	return kv
-}
-
-func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
-	var out []*ggml.Tensor
-
-	for _, t := range ts {
-		shape := t.Shape()
-
-		// Squeeze conv weights: [D, 1, K] -> [D, K]
-		if strings.HasSuffix(t.Name(), "shortconv.conv.weight") {
-			if len(shape) == 3 && shape[1] == 1 {
-				shape = []uint64{shape[0], shape[2]}
-			}
-		}
-
-		out = append(out, &ggml.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    slices.Clone(shape),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *lfm2Model) Replacements() []string {
-	return []string{
-		"model.embed_tokens", "token_embd",
-		"model.embedding_norm", "output_norm",
-		"model.layers", "blk",
-		"operator_norm", "attn_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.out_proj", "attn_output",
-		"self_attn.q_layernorm", "attn_q_norm",
-		"self_attn.k_layernorm", "attn_k_norm",
-		"conv.conv", "shortconv.conv",
-		"conv.in_proj", "shortconv.in_proj",
-		"conv.out_proj", "shortconv.out_proj",
-		"feed_forward.w1", "ffn_gate",
-		"feed_forward.w2", "ffn_down",
-		"feed_forward.w3", "ffn_up",
-		"ffn_norm", "ffn_norm",
-	}
-}
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -40,7 +40,6 @@ const (
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		strings.HasSuffix(t.name, ".bias") ||
-		strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.tile_position_embd.weight" ||
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -269,8 +269,6 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
-		"glm4moelite",
-		"lfm2",
 	}, kv.Architecture())
 }

@@ -858,9 +856,7 @@ func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"bert",
 		"gemma3",
-		"glm4moelite",
 		"gptoss", "gpt-oss",
-		"lfm2",
 		"mistral3",
 		"olmo3",
 		"qwen3", "qwen3moe",
--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -1,148 +0,0 @@
-//go:build integration
-
-package integration
-
-import (
-	"context"
-	"encoding/base64"
-	"fmt"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestImageGeneration(t *testing.T) {
-	skipUnderMinVRAM(t, 8)
-
-	type testCase struct {
-		imageGenModel string
-		visionModel   string
-		prompt        string
-		expectedWords []string
-	}
-
-	testCases := []testCase{
-		{
-			imageGenModel: "jmorgan/z-image-turbo",
-			visionModel:   "llama3.2-vision",
-			prompt:        "A cartoon style llama flying like a superhero through the air with clouds in the background",
-			expectedWords: []string{"llama", "flying", "cartoon", "cloud", "sky", "superhero", "air", "animal", "camelid"},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("%s->%s", tc.imageGenModel, tc.visionModel), func(t *testing.T) {
-			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
-			defer cancel()
-
-			client, _, cleanup := InitServerConnection(ctx, t)
-			defer cleanup()
-
-			// Pull both models
-			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
-				t.Fatalf("failed to pull image gen model: %v", err)
-			}
-			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
-				t.Fatalf("failed to pull vision model: %v", err)
-			}
-
-			// Generate the image
-			t.Logf("Generating image with prompt: %s", tc.prompt)
-			imageBase64, err := generateImage(ctx, client, tc.imageGenModel, tc.prompt)
-			if err != nil {
-				if strings.Contains(err.Error(), "image generation not available") {
-					t.Skip("Target system does not support image generation")
-				} else if strings.Contains(err.Error(), "executable file not found in") { // Windows pattern, not yet supported
-					t.Skip("Windows does not support image generation yet")
-				} else if strings.Contains(err.Error(), "CUDA driver version is insufficient") {
-					t.Skip("Driver is too old")
-				} else if strings.Contains(err.Error(), "insufficient memory for image generation") {
-					t.Skip("insufficient memory for image generation")
-				} else if strings.Contains(err.Error(), "error while loading shared libraries: libcuda.so.1") { // AMD GPU or CPU
-					t.Skip("CUDA GPU is not available")
-				} else if strings.Contains(err.Error(), "ollama-mlx: no such file or directory") {
-					// most likely linux arm - not supported yet
-					t.Skip("unsupported architecture")
-				}
-				t.Fatalf("failed to generate image: %v", err)
-			}
-
-			imageData, err := base64.StdEncoding.DecodeString(imageBase64)
-			if err != nil {
-				t.Fatalf("failed to decode image: %v", err)
-			}
-			t.Logf("Generated image: %d bytes", len(imageData))
-
-			// Preload vision model and check GPU loading
-			err = client.Generate(ctx, &api.GenerateRequest{Model: tc.visionModel}, func(response api.GenerateResponse) error { return nil })
-			if err != nil {
-				t.Fatalf("failed to load vision model: %v", err)
-			}
-
-			// Use vision model to describe the image
-			chatReq := api.ChatRequest{
-				Model: tc.visionModel,
-				Messages: []api.Message{
-					{
-						Role:    "user",
-						Content: "Describe this image in detail. What is shown? What style is it? What is the main subject doing?",
-						Images:  []api.ImageData{imageData},
-					},
-				},
-				Stream: &stream,
-				Options: map[string]any{
-					"seed":        42,
-					"temperature": 0.0,
-				},
-			}
-
-			// Verify the vision model's response contains expected keywords
-			response := DoChat(ctx, t, client, chatReq, tc.expectedWords, 240*time.Second, 30*time.Second)
-			if response != nil {
-				t.Logf("Vision model response: %s", response.Content)
-
-				// Additional detailed check for keywords
-				content := strings.ToLower(response.Content)
-				foundWords := []string{}
-				missingWords := []string{}
-				for _, word := range tc.expectedWords {
-					if strings.Contains(content, word) {
-						foundWords = append(foundWords, word)
-					} else {
-						missingWords = append(missingWords, word)
-					}
-				}
-				t.Logf("Found keywords: %v", foundWords)
-				if len(missingWords) > 0 {
-					t.Logf("Missing keywords (at least one was found so test passed): %v", missingWords)
-				}
-			}
-		})
-	}
-}
-
-// generateImage calls the Ollama API to generate an image and returns the base64 image data
-func generateImage(ctx context.Context, client *api.Client, model, prompt string) (string, error) {
-	var imageBase64 string
-
-	err := client.Generate(ctx, &api.GenerateRequest{
-		Model:  model,
-		Prompt: prompt,
-	}, func(resp api.GenerateResponse) error {
-		if resp.Image != "" {
-			imageBase64 = resp.Image
-		}
-		return nil
-	})
-	if err != nil {
-		return "", fmt.Errorf("failed to generate image: %w", err)
-	}
-
-	if imageBase64 == "" {
-		return "", fmt.Errorf("no image data in response")
-	}
-
-	return imageBase64, nil
-}
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -38,7 +38,6 @@ var (

 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
-		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
 		"gpt-oss:20b",
@@ -144,7 +143,6 @@ var (
 		"granite3.3",
 		"hermes3",
 		"internlm2",
-		"lfm2.5-thinking",
 		"llama-guard3",
 		"llama-pro",
 		"llama2-chinese",
@@ -265,7 +263,6 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
-		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
 		"gpt-oss:120b",
--- a/manifest/paths.go
+++ b/manifest/paths.go
@@ -1,95 +0,0 @@
-package manifest
-
-import (
-	"errors"
-	"fmt"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/types/model"
-)
-
-var ErrInvalidDigestFormat = errors.New("invalid digest format")
-
-func Path() (string, error) {
-	path := filepath.Join(envconfig.Models(), "manifests")
-	if err := os.MkdirAll(path, 0o755); err != nil {
-		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
-	}
-
-	return path, nil
-}
-
-// PathForName returns the path to the manifest file for a specific model name.
-func PathForName(n model.Name) (string, error) {
-	if !n.IsValid() {
-		return "", os.ErrNotExist
-	}
-
-	manifests, err := Path()
-	if err != nil {
-		return "", err
-	}
-
-	return filepath.Join(manifests, n.Filepath()), nil
-}
-
-func BlobsPath(digest string) (string, error) {
-	// only accept actual sha256 digests
-	pattern := "^sha256[:-][0-9a-fA-F]{64}$"
-	re := regexp.MustCompile(pattern)
-
-	if digest != "" && !re.MatchString(digest) {
-		return "", ErrInvalidDigestFormat
-	}
-
-	digest = strings.ReplaceAll(digest, ":", "-")
-	path := filepath.Join(envconfig.Models(), "blobs", digest)
-	dirPath := filepath.Dir(path)
-	if digest == "" {
-		dirPath = path
-	}
-
-	if err := os.MkdirAll(dirPath, 0o755); err != nil {
-		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
-	}
-
-	return path, nil
-}
-
-// PruneDirectory removes empty directories recursively.
-func PruneDirectory(path string) error {
-	info, err := os.Lstat(path)
-	if err != nil {
-		return err
-	}
-
-	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
-		entries, err := os.ReadDir(path)
-		if err != nil {
-			return err
-		}
-
-		for _, entry := range entries {
-			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
-				return err
-			}
-		}
-
-		entries, err = os.ReadDir(path)
-		if err != nil {
-			return err
-		}
-
-		if len(entries) > 0 {
-			return nil
-		}
-
-		return os.Remove(path)
-	}
-
-	return nil
-}
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -609,49 +609,3 @@ func ImageGenerationsMiddleware() gin.HandlerFunc {
 		c.Next()
 	}
 }
-
-func ImageEditsMiddleware() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		var req openai.ImageEditRequest
-		if err := c.ShouldBindJSON(&req); err != nil {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
-			return
-		}
-
-		if req.Prompt == "" {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
-			return
-		}
-
-		if req.Model == "" {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
-			return
-		}
-
-		if req.Image == "" {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "image is required"))
-			return
-		}
-
-		genReq, err := openai.FromImageEditRequest(req)
-		if err != nil {
-			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
-			return
-		}
-
-		var b bytes.Buffer
-		if err := json.NewEncoder(&b).Encode(genReq); err != nil {
-			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
-			return
-		}
-
-		c.Request.Body = io.NopCloser(&b)
-
-		w := &ImageWriter{
-			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
-		}
-
-		c.Writer = w
-		c.Next()
-	}
-}
--- a/middleware/openai_test.go
+++ b/middleware/openai_test.go
@@ -1112,129 +1112,3 @@ func TestImageWriterResponse(t *testing.T) {
 		t.Errorf("expected image data 'dGVzdC1pbWFnZS1kYXRh', got %s", imageResp.Data[0].B64JSON)
 	}
 }
-
-func TestImageEditsMiddleware(t *testing.T) {
-	type testCase struct {
-		name string
-		body string
-		req  api.GenerateRequest
-		err  openai.ErrorResponse
-	}
-
-	var capturedRequest *api.GenerateRequest
-
-	// Base64-encoded test image (1x1 pixel PNG)
-	testImage := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII="
-	decodedImage, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=")
-
-	testCases := []testCase{
-		{
-			name: "image edit basic",
-			body: `{
-				"model": "test-model",
-				"prompt": "make it blue",
-				"image": "` + testImage + `"
-			}`,
-			req: api.GenerateRequest{
-				Model:  "test-model",
-				Prompt: "make it blue",
-				Images: []api.ImageData{decodedImage},
-			},
-		},
-		{
-			name: "image edit with size",
-			body: `{
-				"model": "test-model",
-				"prompt": "make it blue",
-				"image": "` + testImage + `",
-				"size": "512x768"
-			}`,
-			req: api.GenerateRequest{
-				Model:  "test-model",
-				Prompt: "make it blue",
-				Images: []api.ImageData{decodedImage},
-				Width:  512,
-				Height: 768,
-			},
-		},
-		{
-			name: "image edit missing prompt",
-			body: `{
-				"model": "test-model",
-				"image": "` + testImage + `"
-			}`,
-			err: openai.ErrorResponse{
-				Error: openai.Error{
-					Message: "prompt is required",
-					Type:    "invalid_request_error",
-				},
-			},
-		},
-		{
-			name: "image edit missing model",
-			body: `{
-				"prompt": "make it blue",
-				"image": "` + testImage + `"
-			}`,
-			err: openai.ErrorResponse{
-				Error: openai.Error{
-					Message: "model is required",
-					Type:    "invalid_request_error",
-				},
-			},
-		},
-		{
-			name: "image edit missing image",
-			body: `{
-				"model": "test-model",
-				"prompt": "make it blue"
-			}`,
-			err: openai.ErrorResponse{
-				Error: openai.Error{
-					Message: "image is required",
-					Type:    "invalid_request_error",
-				},
-			},
-		},
-	}
-
-	endpoint := func(c *gin.Context) {
-		c.Status(http.StatusOK)
-	}
-
-	gin.SetMode(gin.TestMode)
-	router := gin.New()
-	router.Use(ImageEditsMiddleware(), captureRequestMiddleware(&capturedRequest))
-	router.Handle(http.MethodPost, "/api/generate", endpoint)
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
-			req.Header.Set("Content-Type", "application/json")
-
-			defer func() { capturedRequest = nil }()
-
-			resp := httptest.NewRecorder()
-			router.ServeHTTP(resp, req)
-
-			if tc.err.Error.Message != "" {
-				var errResp openai.ErrorResponse
-				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
-					t.Fatal(err)
-				}
-				if diff := cmp.Diff(tc.err, errResp); diff != "" {
-					t.Fatalf("errors did not match:\n%s", diff)
-				}
-				return
-			}
-
-			if resp.Code != http.StatusOK {
-				t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
-			}
-
-			if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
-				t.Fatalf("requests did not match:\n%s", diff)
-			}
-		})
-	}
-}
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -162,7 +162,6 @@ type Tensor interface {
 	AvgPool2D(ctx Context, k, s int, p float32) Tensor
 	Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
 	Conv3D(ctx Context, weight Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) Tensor
-	SSMConv(ctx Context, kernel Tensor) Tensor

 	IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor

--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1641,13 +1641,6 @@ func (t *Tensor) Conv3D(ctx ml.Context, t2 ml.Tensor, c, s0, s1, s2, p0, p1, p2,
 	return tt
 }

-func (t *Tensor) SSMConv(ctx ml.Context, kernel ml.Tensor) ml.Tensor {
-	return &Tensor{
-		b: t.b,
-		t: C.ggml_ssm_conv(ctx.(*Context).ctx, t.t, kernel.(*Tensor).t),
-	}
-}
-
 func (t *Tensor) AvgPool2D(ctx ml.Context, k, s int, p float32) ml.Tensor {
 	return &Tensor{
 		b: t.b,
--- a/model/models/glm4moelite/model.go
+++ b/model/models/glm4moelite/model.go
@@ -1,304 +0,0 @@
-package glm4moelite
-
-import (
-	"math"
-
-	"github.com/ollama/ollama/fs"
-	"github.com/ollama/ollama/kvcache"
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/ml/nn"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/model/input"
-)
-
-type Options struct {
-	numExpertsUsed      int
-	numExperts          int
-	normTopKProb        bool
-	routedScalingFactor float32
-
-	kvLoraRank,
-	qkNopeHeadDim,
-	qkRopeHeadDim,
-	kqNopeHeadDim,
-	qkHeadDim int
-	qLoraRank int
-	vHeadDim  int
-
-	hiddenSize,
-	numHeads,
-	numKVHeads int
-
-	eps,
-	ropeBase float32
-	kqScale float64
-}
-
-func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, t, p ml.Tensor) ml.Tensor {
-	return nn.RoPE(ctx, t, p, o.qkRopeHeadDim, o.ropeBase, 1.0)
-}
-
-type Attention struct {
-	Q *nn.Linear `gguf:"attn_q"`
-
-	QA     *nn.Linear  `gguf:"attn_q_a"`
-	QANorm *nn.RMSNorm `gguf:"attn_q_a_norm"`
-	QB     *nn.Linear  `gguf:"attn_q_b"`
-
-	KVA     *nn.Linear  `gguf:"attn_kv_a_mqa"`
-	KVANorm *nn.RMSNorm `gguf:"attn_kv_a_norm"`
-	KVB     *nn.Linear  `gguf:"attn_kv_b"`
-
-	Output *nn.Linear `gguf:"attn_out,alt:attn_output"`
-}
-
-func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
-	seqLength := hiddenStates.Dim(1)
-
-	var query ml.Tensor
-	if opts.qLoraRank == 0 {
-		query = attn.Q.Forward(ctx, hiddenStates)
-	} else {
-		query = attn.QA.Forward(ctx, hiddenStates)
-		query = attn.QANorm.Forward(ctx, query, opts.eps)
-		query = attn.QB.Forward(ctx, query)
-	}
-
-	query = query.Reshape(ctx, query.Dim(0)/opts.numHeads, opts.numHeads, seqLength)
-	queryChunks := query.ChunkSections(ctx, 0, opts.qkNopeHeadDim, opts.qkRopeHeadDim)
-
-	compressedKV := attn.KVA.Forward(ctx, hiddenStates)
-	kPass := compressedKV.Slice(ctx, 0, 0, opts.kvLoraRank, 1)
-	kRot := compressedKV.View(ctx,
-		opts.kvLoraRank*compressedKV.Stride(0), opts.qkRopeHeadDim,
-		compressedKV.Stride(1), 1,
-		compressedKV.Stride(1), compressedKV.Dim(1),
-	)
-
-	qRot := opts.applyRotaryPositionEmbeddings(ctx, queryChunks[1], positions)
-	kRot = opts.applyRotaryPositionEmbeddings(ctx, kRot, positions)
-	kPass = attn.KVANorm.Forward(ctx, kPass, opts.eps)
-	kPass = attn.KVB.Forward(ctx, kPass)
-
-	kv := kPass.Reshape(ctx, kPass.Dim(0)/opts.numKVHeads, opts.numKVHeads, seqLength)
-	kvChunks := kv.ChunkSections(ctx, 0, opts.kqNopeHeadDim, opts.vHeadDim)
-
-	kRot = kRot.Repeat(ctx, 1, queryChunks[0].Dim(1))
-	query = qRot.Concat(ctx, queryChunks[0], 0)
-	key := kRot.Concat(ctx, kvChunks[0], 0)
-	attention := nn.Attention(ctx, query, key, kvChunks[1], opts.kqScale, cache)
-
-	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), seqLength)
-	return attn.Output.Forward(ctx, attention)
-}
-
-type MLP interface {
-	Forward(ml.Context, ml.Tensor, *Options) ml.Tensor
-}
-
-type sparse struct {
-	Router       *nn.Linear `gguf:"ffn_gate_inp"`
-	Gate         *nn.Linear `gguf:"ffn_gate_exps"`
-	Up           *nn.Linear `gguf:"ffn_up_exps"`
-	Down         *nn.Linear `gguf:"ffn_down_exps"`
-	SharedExpert *dense     `gguf:",suf:_shexp"`
-	ExpProbsBias ml.Tensor  `gguf:"exp_probs_b.bias,alt:exp_probs_b"`
-}
-
-func (moe *sparse) Moe(ctx ml.Context, hiddenStates, topKIndices, topKWeights ml.Tensor, opts *Options) ml.Tensor {
-	hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0), 1, hiddenStates.Dim(1))
-
-	upStates := moe.Up.Weight.MulmatID(ctx, hiddenStates, topKIndices)
-	hiddenStates = moe.Gate.Weight.MulmatID(ctx, hiddenStates, topKIndices)
-	hiddenStates = hiddenStates.SILU(ctx, upStates)
-
-	experts := moe.Down.Weight.MulmatID(ctx, hiddenStates, topKIndices)
-	experts = experts.Mul(ctx, topKWeights)
-
-	nextStates := experts.View(ctx, 0, experts.Dim(0), experts.Stride(2), experts.Dim(2))
-	for i := 1; i < opts.numExpertsUsed; i++ {
-		nextStates = nextStates.Add(ctx, experts.View(ctx, i*experts.Stride(1), experts.Dim(0), experts.Stride(2), experts.Dim(2)))
-	}
-	return nextStates
-}
-
-func (moe *sparse) topKIndices(ctx ml.Context, scores ml.Tensor, opts *Options) ml.Tensor {
-	if moe.ExpProbsBias != nil {
-		scores = scores.Add(ctx, moe.ExpProbsBias)
-	}
-	topKIndices := scores.TopK(ctx, opts.numExpertsUsed)
-	return topKIndices
-}
-
-func (moe *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
-	residuals := hiddenStates
-
-	routerLogits := moe.Router.Forward(ctx, hiddenStates)
-	scores := routerLogits.Sigmoid(ctx)
-	topKIndices := moe.topKIndices(ctx, scores, opts)
-	topKWeights := scores.Reshape(ctx, 1, opts.numExperts, hiddenStates.Dim(1)).Rows(ctx, topKIndices)
-
-	if opts.normTopKProb {
-		topKWeights = topKWeights.Reshape(ctx, opts.numExpertsUsed, hiddenStates.Dim(1))
-		topKWeights = topKWeights.Div(ctx, topKWeights.SumRows(ctx))
-		topKWeights = topKWeights.Reshape(ctx, 1, opts.numExpertsUsed, hiddenStates.Dim(1))
-	}
-
-	topKWeights = topKWeights.Scale(ctx, float64(opts.routedScalingFactor))
-	hiddenStates = moe.Moe(ctx, hiddenStates, topKIndices, topKWeights, opts)
-	sharedExpertResult := moe.SharedExpert.Forward(ctx, residuals, opts)
-
-	hiddenStates = hiddenStates.Add(ctx, sharedExpertResult)
-	return hiddenStates
-}
-
-type dense struct {
-	Gate *nn.Linear `gguf:"ffn_gate"`
-	Up   *nn.Linear `gguf:"ffn_up"`
-	Down *nn.Linear `gguf:"ffn_down"`
-}
-
-func (mlp *dense) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
-	hiddenStates = mlp.Gate.Forward(ctx, hiddenStates).SILU(ctx, mlp.Up.Forward(ctx, hiddenStates))
-	return mlp.Down.Forward(ctx, hiddenStates)
-}
-
-type Layer struct {
-	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
-	Attention     *Attention
-
-	MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
-	MLP     MLP
-}
-
-func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
-	residual := hiddenStates
-	hiddenStates = t.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
-	hiddenStates = t.Attention.Forward(ctx, hiddenStates, positions, cache, opts)
-
-	if outputs != nil {
-		hiddenStates = hiddenStates.Rows(ctx, outputs)
-		residual = residual.Rows(ctx, outputs)
-	}
-
-	hiddenStates = hiddenStates.Add(ctx, residual)
-	residual = hiddenStates
-
-	hiddenStates = t.MLPNorm.Forward(ctx, hiddenStates, opts.eps)
-	hiddenStates = t.MLP.Forward(ctx, hiddenStates, opts)
-	hiddenStates = hiddenStates.Add(ctx, residual)
-	return hiddenStates
-}
-
-type Model struct {
-	model.Base
-	model.BytePairEncoding
-
-	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
-	Layers         []Layer       `gguf:"blk"`
-
-	OutputNorm *nn.RMSNorm `gguf:"output_norm"`
-	Output     *nn.Linear  `gguf:"output,alt:token_embd"`
-
-	*Options
-}
-
-func New(c fs.Config) (model.Model, error) {
-	layers := make([]Layer, c.Uint("block_count"))
-
-	firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))
-	for i := range layers {
-		if i < firstDenseLayerIndex {
-			layers[i].MLP = &dense{}
-		} else {
-			layers[i].MLP = &sparse{}
-		}
-	}
-
-	keyLength := int(c.Uint("attention.key_length"))
-	valueLength := int(c.Uint("attention.value_length"))
-
-	kqScale := 1.0 / math.Sqrt(float64(keyLength))
-
-	var pre []string
-	switch c.String("tokenizer.ggml.pre") {
-	case "glm4":
-		pre = []string{
-			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
-		}
-	default:
-		return nil, model.ErrUnsupportedTokenizer
-	}
-
-	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-				EOS: append(
-					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
-					c.Ints("tokenizer.ggml.eos_token_ids")...,
-				),
-			},
-			pre...,
-		),
-		Layers: layers,
-		Options: &Options{
-			hiddenSize:     int(c.Uint("embedding_length")),
-			numHeads:       int(c.Uint("attention.head_count")),
-			numKVHeads:     int(c.Uint("attention.head_count_kv")),
-			eps:            c.Float("attention.layer_norm_rms_epsilon"),
-			ropeBase:       c.Float("rope.freq_base"),
-			numExperts:     int(c.Uint("expert_count")),
-			numExpertsUsed: int(c.Uint("expert_used_count")),
-			normTopKProb:   c.Bool("expert_weights_norm", true),
-
-			qLoraRank:     int(c.Uint("attention.q_lora_rank")),
-			kvLoraRank:    int(c.Uint("attention.kv_lora_rank")),
-			qkHeadDim:     keyLength,
-			vHeadDim:      valueLength,
-			qkRopeHeadDim: int(c.Uint("rope.dimension_count")),
-			qkNopeHeadDim: keyLength - int(c.Uint("rope.dimension_count")),
-			kqNopeHeadDim: keyLength - int(c.Uint("rope.dimension_count")),
-
-			routedScalingFactor: c.Float("expert_weights_scale"),
-
-			kqScale: kqScale,
-		},
-	}
-
-	m.Cache = kvcache.NewCausalCache(m.Shift)
-	return &m, nil
-}
-
-func (m Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
-	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
-}
-
-func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
-	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
-
-	hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs)
-
-	for i, layer := range m.Layers {
-		m.Cache.SetLayer(i)
-
-		var outputs ml.Tensor
-		if i == len(m.Layers)-1 {
-			outputs = batch.Outputs
-		}
-
-		hiddenStates = layer.Forward(ctx, hiddenStates, positions, outputs, m.Cache, m.Options)
-	}
-
-	hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps)
-	return m.Output.Forward(ctx, hiddenStates), nil
-}
-
-func init() {
-	model.Register("glm4moelite", New)
-}
--- a/model/models/lfm2/cache.go
+++ b/model/models/lfm2/cache.go
@@ -1,410 +0,0 @@
-package lfm2
-
-import (
-	"slices"
-
-	"github.com/ollama/ollama/kvcache"
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/model/input"
-)
-
-var _ kvcache.Cache = (*HybridCache)(nil)
-
-// HybridCache stores:
-// - a standard causal KV cache for attention layers
-// - a per-sequence recurrent conv state for shortconv layers
-//
-// Conv state shape (per layer, per sequence): [dConv, hiddenSize] where dConv = L_cache - 1.
-// Stored internally as a tensor of shape [dConv * hiddenSize, maxSlots].
-type HybridCache struct {
-	kv *kvcache.Causal
-
-	backend      ml.Backend
-	dtype        ml.DType
-	maxSequences int
-
-	hiddenSize int
-	dConv      int
-
-	// slot mapping for recurrent state
-	slotForSeq map[int]int
-	refCount   []int
-	freeSlots  []int
-
-	// per-layer conv state buffers (allocated lazily)
-	convCtxs   map[int]ml.Context
-	convStates map[int]ml.Tensor // [dConv*hiddenSize, maxSlots]
-
-	// current forward batch (derived in StartForward)
-	curSeqs       []int
-	curSlots      []int
-	curSlotsInput ml.Tensor
-	curSeqTokens  int
-
-	// track if EnsureWritable has been called for this forward pass
-	writableEnsured bool
-	// track any error from EnsureWritable to propagate later
-	writableError error
-}
-
-func NewHybridCache(shift func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error), hiddenSize, dConv int) *HybridCache {
-	return &HybridCache{
-		kv:         kvcache.NewCausalCache(shift),
-		hiddenSize: hiddenSize,
-		dConv:      dConv,
-		slotForSeq: make(map[int]int),
-		convCtxs:   make(map[int]ml.Context),
-		convStates: make(map[int]ml.Tensor),
-	}
-}
-
-func (c *HybridCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
-	c.backend = backend
-	c.dtype = dtype
-	c.maxSequences = maxSequences
-
-	// initialize slot allocator
-	c.refCount = make([]int, maxSequences)
-	c.freeSlots = c.freeSlots[:0]
-	for i := maxSequences - 1; i >= 0; i-- {
-		c.freeSlots = append(c.freeSlots, i)
-	}
-
-	c.kv.Init(backend, dtype, maxSequences, capacity, maxBatch)
-}
-
-func (c *HybridCache) Close() {
-	for _, ctx := range c.convCtxs {
-		ctx.Close()
-	}
-	c.kv.Close()
-}
-
-func (c *HybridCache) SetConfig(config ml.CacheConfig) {
-	c.kv.SetConfig(config)
-}
-
-func (c *HybridCache) SetLayer(layer int) {
-	c.kv.SetLayer(layer)
-}
-
-func (c *HybridCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
-	return c.kv.Get(ctx)
-}
-
-func (c *HybridCache) Put(ctx ml.Context, key, value ml.Tensor) {
-	c.kv.Put(ctx, key, value)
-}
-
-func (c *HybridCache) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
-	if err := c.kv.StartForward(ctx, batch, reserve); err != nil {
-		return err
-	}
-
-	// Derive equal-length sequence layout for shortconv.
-	// LFM2 shortconv assumes tokens form a [seq_tokens, seqs] grid.
-	seqCounts := make(map[int]int)
-	c.curSeqs = c.curSeqs[:0]
-	for _, s := range batch.Sequences {
-		if _, ok := seqCounts[s]; !ok {
-			c.curSeqs = append(c.curSeqs, s)
-		}
-		seqCounts[s]++
-	}
-
-	if len(c.curSeqs) == 0 {
-		return nil
-	}
-
-	nTokens := len(batch.Sequences)
-	nSeqs := len(c.curSeqs)
-	want := nTokens / nSeqs
-	for _, s := range c.curSeqs {
-		if seqCounts[s] != want {
-			return kvcache.ErrNotSupported
-		}
-	}
-
-	c.curSeqTokens = want
-
-	// When reserving memory for estimation, use fake slot assignments
-	// without modifying permanent state (slotForSeq, refCount)
-	if reserve {
-		c.curSlots = c.curSlots[:0]
-		slots := make([]int32, nSeqs)
-		for i := range nSeqs {
-			c.curSlots = append(c.curSlots, i)
-			slots[i] = int32(i)
-		}
-		c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
-		return nil
-	}
-
-	// Ensure slots exist for sequences in this batch
-	c.curSlots = c.curSlots[:0]
-	var newSlots []int // track newly allocated slots that need zeroing
-	for _, s := range c.curSeqs {
-		slot, ok := c.slotForSeq[s]
-		if !ok {
-			var err error
-			slot, err = c.allocSlot()
-			if err != nil {
-				return err
-			}
-			c.slotForSeq[s] = slot
-			c.refCount[slot] = 1
-			newSlots = append(newSlots, slot)
-		}
-		c.curSlots = append(c.curSlots, slot)
-	}
-
-	// Zero conv state for newly allocated slots to clear stale data from previous sequences
-	if len(newSlots) > 0 {
-		c.zeroConvSlots(ctx, newSlots)
-	}
-
-	// Create a tensor for the current slots
-	slots := make([]int32, len(c.curSlots))
-	for i, v := range c.curSlots {
-		slots[i] = int32(v)
-	}
-	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
-
-	// Reset writable state for new forward pass
-	c.writableEnsured = false
-	c.writableError = nil
-
-	return nil
-}
-
-func (c *HybridCache) allocSlot() (int, error) {
-	if len(c.freeSlots) == 0 {
-		return 0, kvcache.ErrKvCacheFull
-	}
-	slot := c.freeSlots[len(c.freeSlots)-1]
-	c.freeSlots = c.freeSlots[:len(c.freeSlots)-1]
-	return slot, nil
-}
-
-func (c *HybridCache) freeSlot(slot int) {
-	// Bounds check before freeing
-	if slot >= 0 && slot < c.maxSequences {
-		c.freeSlots = append(c.freeSlots, slot)
-	}
-}
-
-// zeroConvSlots zeros the conv state for the given slots across all layers.
-// This must be called when recycling slots to prevent stale state from affecting new sequences.
-func (c *HybridCache) zeroConvSlots(ctx ml.Context, slots []int) {
-	if len(slots) == 0 || len(c.convStates) == 0 {
-		return
-	}
-
-	// Use input context for creating tensors
-	inputCtx := ctx.Input()
-
-	// Create slot indices tensor
-	slotIndices := make([]int32, len(slots))
-	for i, s := range slots {
-		slotIndices[i] = int32(s)
-	}
-	slotsTensor := inputCtx.FromInts(slotIndices, len(slotIndices))
-
-	// Create zero tensor for the slots (SetRows requires F32 source)
-	zeros := inputCtx.Zeros(ml.DTypeF32, c.dConv*c.hiddenSize, len(slots))
-
-	// Zero each layer's conv state for these slots
-	for _, buf := range c.convStates {
-		ctx.Forward(buf.SetRows(ctx, zeros, slotsTensor))
-	}
-}
-
-// EnsureWritable ensures that sequences in the current batch have private (non-shared) conv slots.
-// Returns an error if slot allocation fails.
-func (c *HybridCache) EnsureWritable(ctx ml.Context) error {
-	for i, seq := range c.curSeqs {
-		slot, ok := c.slotForSeq[seq]
-		if !ok {
-			continue
-		}
-
-		// Bounds check
-		if slot < 0 || slot >= len(c.refCount) {
-			continue
-		}
-
-		if c.refCount[slot] <= 1 {
-			continue
-		}
-
-		newSlot, err := c.allocSlot()
-		if err != nil {
-			return err
-		}
-		c.refCount[slot]--
-		c.refCount[newSlot] = 1
-		c.slotForSeq[seq] = newSlot
-		c.curSlots[i] = newSlot
-
-		// Copy existing conv state for all initialized layers
-		for _, buf := range c.convStates {
-			// buf: [dConv*hiddenSize, maxSlots]
-			src := buf.Rows(ctx, ctx.Input().FromInts([]int32{int32(slot)}, 1))
-			// SetRows requires F32 source
-			srcF32 := src.Cast(ctx, ml.DTypeF32)
-			ctx.Forward(buf.SetRows(ctx, srcF32, ctx.Input().FromInts([]int32{int32(newSlot)}, 1)))
-		}
-	}
-
-	// Rebuild current slots tensor
-	slots := make([]int32, len(c.curSlots))
-	for i, v := range c.curSlots {
-		slots[i] = int32(v)
-	}
-	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
-
-	return nil
-}
-
-func (c *HybridCache) CopyPrefix(srcSeq, dstSeq int, prefixLen int32) {
-	// KV cache shares prefix metadata (no copy) which is correct for prefix reuse.
-	c.kv.CopyPrefix(srcSeq, dstSeq, prefixLen)
-
-	// For shortconv state we implement copy-on-write: dst shares the same slot as src.
-	// On the first write to dst, EnsureWritable will create a private slot.
-	if dstSlot, ok := c.slotForSeq[dstSeq]; ok {
-		// Bounds check before decrementing
-		if dstSlot >= 0 && dstSlot < len(c.refCount) {
-			c.refCount[dstSlot]--
-			if c.refCount[dstSlot] <= 0 {
-				c.refCount[dstSlot] = 0
-				c.freeSlot(dstSlot)
-			}
-		}
-		delete(c.slotForSeq, dstSeq)
-	}
-
-	srcSlot, ok := c.slotForSeq[srcSeq]
-	if !ok {
-		// src may not have a slot yet; dst will allocate on demand
-		return
-	}
-
-	// Bounds check before incrementing
-	if srcSlot >= 0 && srcSlot < len(c.refCount) {
-		c.slotForSeq[dstSeq] = srcSlot
-		c.refCount[srcSlot]++
-	}
-}
-
-func (c *HybridCache) CanResume(seq int, pos int32) bool {
-	return c.kv.CanResume(seq, pos)
-}
-
-func (c *HybridCache) Remove(seq int, beginIndex, endIndex int32) error {
-	if err := c.kv.Remove(seq, beginIndex, endIndex); err != nil {
-		return err
-	}
-
-	// For recurrent state, any removal invalidates the state because
-	// the state at position N depends on all previous positions.
-	// Drop the slot mapping so it resets on next use.
-	slot, ok := c.slotForSeq[seq]
-	if !ok {
-		return nil
-	}
-
-	// Bounds check
-	if slot < 0 || slot >= len(c.refCount) {
-		delete(c.slotForSeq, seq)
-		return nil
-	}
-
-	c.refCount[slot]--
-	if c.refCount[slot] <= 0 {
-		c.refCount[slot] = 0
-		c.freeSlot(slot)
-	}
-	delete(c.slotForSeq, seq)
-
-	return nil
-}
-
-func (c *HybridCache) slotsTensor() ml.Tensor {
-	return c.curSlotsInput
-}
-
-func (c *HybridCache) seqTokens() int {
-	return c.curSeqTokens
-}
-
-func (c *HybridCache) numSeqs() int {
-	return len(c.curSeqs)
-}
-
-func (c *HybridCache) convBuffer(ctx ml.Context, layer int) ml.Tensor {
-	if buf, ok := c.convStates[layer]; ok {
-		return buf
-	}
-
-	if _, ok := c.convCtxs[layer]; !ok {
-		c.convCtxs[layer] = c.backend.NewContextSize(1).Layer(layer)
-	}
-
-	buf := c.convCtxs[layer].Zeros(c.dtype, c.dConv*c.hiddenSize, c.maxSequences)
-	c.convStates[layer] = buf
-	return buf
-}
-
-// ConvState returns the conv state for current batch sequences as shape [dConv, hiddenSize, nSeqs].
-// Returns an error if copy-on-write allocation fails.
-func (c *HybridCache) ConvState(ctx ml.Context, layer int) (ml.Tensor, error) {
-	if !c.writableEnsured {
-		needsWritable := false
-		for _, seq := range c.curSeqs {
-			slot, ok := c.slotForSeq[seq]
-			if !ok {
-				continue
-			}
-			if slot >= 0 && slot < len(c.refCount) && c.refCount[slot] > 1 {
-				needsWritable = true
-				break
-			}
-		}
-
-		if needsWritable {
-			if err := c.EnsureWritable(ctx); err != nil {
-				c.writableError = err
-			}
-		}
-		c.writableEnsured = true
-	}
-
-	if c.writableError != nil {
-		return nil, c.writableError
-	}
-
-	buf := c.convBuffer(ctx, layer)
-	cur := buf.Rows(ctx, c.slotsTensor())
-	return cur.Reshape(ctx, c.dConv, c.hiddenSize, c.numSeqs()), nil
-}
-
-// UpdateConvState writes a new conv state for current batch sequences.
-// newState must have shape [dConv, hiddenSize, nSeqs].
-func (c *HybridCache) UpdateConvState(ctx ml.Context, layer int, newState ml.Tensor) {
-	buf := c.convBuffer(ctx, layer)
-	src := newState.Reshape(ctx, c.dConv*c.hiddenSize, c.numSeqs())
-	// SetRows requires F32 source
-	srcF32 := src.Cast(ctx, ml.DTypeF32)
-	ctx.Forward(buf.SetRows(ctx, srcF32, c.slotsTensor()))
-}
-
-// IsSupportedForBatch returns true if the current batch layout supports shortconv.
-func (c *HybridCache) IsSupportedForBatch() bool {
-	return c.curSeqTokens > 0 && len(c.curSeqs) > 0
-}
-
-// Seqs returns the ordered unique sequences for the current forward pass.
-func (c *HybridCache) Seqs() []int {
-	return slices.Clone(c.curSeqs)
-}
--- a/model/models/lfm2/cache_test.go
+++ b/model/models/lfm2/cache_test.go
@@ -1,444 +0,0 @@
-package lfm2
-
-import (
-	"testing"
-
-	"github.com/ollama/ollama/kvcache"
-	"github.com/ollama/ollama/ml"
-)
-
-// TestHybridCache tests verify the slot management logic of HybridCache.
-// These tests focus on the recurrent state slot allocation, reference counting,
-// and copy-on-write semantics without requiring a full ML backend.
-
-// createSlotOnlyCache creates a HybridCache with only the slot management
-// fields initialized. Used to test slot logic in isolation.
-func createSlotOnlyCache(maxSequences int) *HybridCache {
-	return &HybridCache{
-		hiddenSize:   256,
-		dConv:        3,
-		maxSequences: maxSequences,
-		refCount:     make([]int, maxSequences),
-		freeSlots:    initFreeSlots(maxSequences),
-		slotForSeq:   make(map[int]int),
-		convCtxs:     make(map[int]ml.Context),
-		convStates:   make(map[int]ml.Tensor),
-	}
-}
-
-func initFreeSlots(n int) []int {
-	slots := make([]int, 0, n)
-	for i := n - 1; i >= 0; i-- {
-		slots = append(slots, i)
-	}
-	return slots
-}
-
-func TestHybridCache_SlotAllocation(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Verify initial state
-	if len(cache.freeSlots) != 4 {
-		t.Errorf("expected 4 free slots, got %d", len(cache.freeSlots))
-	}
-
-	// Allocate all slots
-	for range 4 {
-		slot, err := cache.allocSlot()
-		if err != nil {
-			t.Fatalf("allocSlot failed: %v", err)
-		}
-		cache.refCount[slot] = 1
-	}
-
-	// Should be full now
-	if len(cache.freeSlots) != 0 {
-		t.Errorf("expected 0 free slots, got %d", len(cache.freeSlots))
-	}
-
-	// Trying to allocate another should fail
-	_, err := cache.allocSlot()
-	if err != kvcache.ErrKvCacheFull {
-		t.Errorf("expected ErrKvCacheFull, got %v", err)
-	}
-}
-
-func TestHybridCache_SlotReuse(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Allocate a slot
-	slot1, _ := cache.allocSlot()
-	cache.refCount[slot1] = 1
-
-	// Free it
-	cache.refCount[slot1] = 0
-	cache.freeSlot(slot1)
-
-	// Allocate again - should get the same slot back (LIFO)
-	slot2, _ := cache.allocSlot()
-	if slot2 != slot1 {
-		t.Errorf("expected slot %d to be reused, got %d", slot1, slot2)
-	}
-}
-
-func TestHybridCache_SlotRefCounting_ShareSlot(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Allocate slot for seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Simulate sharing slot with seq 2 (copy-on-write style)
-	cache.slotForSeq[2] = slot1
-	cache.refCount[slot1]++
-
-	// Should share the same slot
-	if cache.slotForSeq[2] != slot1 {
-		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
-	}
-
-	// Ref count should be 2
-	if cache.refCount[slot1] != 2 {
-		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
-	}
-}
-
-func TestHybridCache_SlotRefCounting_DecRef(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Allocate slot for seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Share with seq 2
-	cache.slotForSeq[2] = slot1
-	cache.refCount[slot1]++
-
-	// Unshare seq 2
-	cache.refCount[slot1]--
-	delete(cache.slotForSeq, 2)
-
-	// Ref count should be back to 1
-	if cache.refCount[slot1] != 1 {
-		t.Errorf("expected refCount 1 after unshare, got %d", cache.refCount[slot1])
-	}
-
-	// Seq 2 should no longer have a slot
-	if _, ok := cache.slotForSeq[2]; ok {
-		t.Error("seq 2 should not have a slot after unshare")
-	}
-}
-
-func TestHybridCache_SlotFreeWhenUnused(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	initialFreeSlots := len(cache.freeSlots)
-
-	// Allocate slot for seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Free the slot when refCount drops to 0
-	cache.refCount[slot1]--
-	if cache.refCount[slot1] <= 0 {
-		cache.refCount[slot1] = 0
-		cache.freeSlot(slot1)
-	}
-	delete(cache.slotForSeq, 1)
-
-	// Slot should be freed
-	if len(cache.freeSlots) != initialFreeSlots {
-		t.Errorf("expected %d free slots, got %d", initialFreeSlots, len(cache.freeSlots))
-	}
-
-	// Ref count should be 0
-	if cache.refCount[slot1] != 0 {
-		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
-	}
-}
-
-func TestHybridCache_SlotOverwrite(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Allocate slots for seq 1 and seq 2
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	slot2, _ := cache.allocSlot()
-	cache.slotForSeq[2] = slot2
-	cache.refCount[slot2] = 1
-
-	initialFreeSlots := len(cache.freeSlots)
-
-	// Simulate overwriting seq 2's slot with slot1 (sharing)
-	// First free the old slot
-	cache.refCount[slot2]--
-	if cache.refCount[slot2] <= 0 {
-		cache.refCount[slot2] = 0
-		cache.freeSlot(slot2)
-	}
-	// Then share slot1
-	cache.slotForSeq[2] = slot1
-	cache.refCount[slot1]++
-
-	// Seq 2 should now share slot1
-	if cache.slotForSeq[2] != slot1 {
-		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
-	}
-
-	// Old slot2 should be freed
-	if len(cache.freeSlots) != initialFreeSlots+1 {
-		t.Errorf("expected %d free slots, got %d", initialFreeSlots+1, len(cache.freeSlots))
-	}
-}
-
-func TestHybridCache_BoundsChecking(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Test freeing invalid slot (should not panic)
-	cache.freeSlot(-1)
-	cache.freeSlot(100) // out of bounds
-
-	// freeSlot does bounds checking, so invalid slots should be ignored
-	if len(cache.freeSlots) != 4 {
-		t.Errorf("invalid slots should not affect free list, got %d slots", len(cache.freeSlots))
-	}
-}
-
-func TestHybridCache_MultipleSequences_RefCounting(t *testing.T) {
-	cache := createSlotOnlyCache(8)
-
-	// Allocate slot for seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Fork to seq 2, 3, 4 (all share slot1)
-	for _, seq := range []int{2, 3, 4} {
-		cache.slotForSeq[seq] = slot1
-		cache.refCount[slot1]++
-	}
-
-	// Ref count should be 4
-	if cache.refCount[slot1] != 4 {
-		t.Errorf("expected refCount 4, got %d", cache.refCount[slot1])
-	}
-
-	// Remove seq 2, 3
-	for _, seq := range []int{2, 3} {
-		delete(cache.slotForSeq, seq)
-		cache.refCount[slot1]--
-	}
-
-	if cache.refCount[slot1] != 2 {
-		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
-	}
-
-	// Slot should still be allocated (not in free list)
-	found := false
-	for _, s := range cache.freeSlots {
-		if s == slot1 {
-			found = true
-			break
-		}
-	}
-	if found {
-		t.Error("slot1 should not be in free list yet")
-	}
-
-	// Remove remaining sequences
-	for _, seq := range []int{1, 4} {
-		delete(cache.slotForSeq, seq)
-		cache.refCount[slot1]--
-	}
-
-	if cache.refCount[slot1] != 0 {
-		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
-	}
-}
-
-func TestHybridCache_ChainedSharing(t *testing.T) {
-	cache := createSlotOnlyCache(8)
-
-	// Create seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Share 1 -> 2
-	cache.slotForSeq[2] = slot1
-	cache.refCount[slot1]++
-
-	// Share 2 -> 3 (should still share slot1)
-	cache.slotForSeq[3] = cache.slotForSeq[2] // which is slot1
-	cache.refCount[slot1]++
-
-	// All should share slot1
-	if cache.slotForSeq[1] != slot1 || cache.slotForSeq[2] != slot1 || cache.slotForSeq[3] != slot1 {
-		t.Error("all sequences should share slot1")
-	}
-
-	if cache.refCount[slot1] != 3 {
-		t.Errorf("expected refCount 3, got %d", cache.refCount[slot1])
-	}
-}
-
-func TestHybridCache_CacheParameters(t *testing.T) {
-	cache := NewHybridCache(nil, 512, 5) // hiddenSize=512, dConv=5
-
-	if cache.hiddenSize != 512 {
-		t.Errorf("expected hiddenSize 512, got %d", cache.hiddenSize)
-	}
-	if cache.dConv != 5 {
-		t.Errorf("expected dConv 5, got %d", cache.dConv)
-	}
-}
-
-func TestHybridCache_NumSeqs(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Initially no sequences
-	if cache.numSeqs() != 0 {
-		t.Errorf("expected 0 seqs, got %d", cache.numSeqs())
-	}
-
-	// Manually set up current batch state
-	cache.curSeqs = []int{1, 2, 3}
-
-	if cache.numSeqs() != 3 {
-		t.Errorf("expected 3 seqs, got %d", cache.numSeqs())
-	}
-}
-
-func TestHybridCache_SeqTokens(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Initially 0
-	if cache.seqTokens() != 0 {
-		t.Errorf("expected 0 seqTokens, got %d", cache.seqTokens())
-	}
-
-	// Manually set up current batch state
-	cache.curSeqTokens = 16
-
-	if cache.seqTokens() != 16 {
-		t.Errorf("expected 16 seqTokens, got %d", cache.seqTokens())
-	}
-}
-
-// Test that Seqs returns a clone of curSeqs
-func TestHybridCache_Seqs_ReturnsClone(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	cache.curSeqs = []int{1, 2, 3}
-
-	seqs := cache.Seqs()
-
-	// Modify returned slice
-	seqs[0] = 999
-
-	// Original should be unchanged
-	if cache.curSeqs[0] != 1 {
-		t.Error("Seqs should return a clone, not the original slice")
-	}
-}
-
-func TestHybridCache_IsSupportedForBatch(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Initially not supported (no batch set up)
-	if cache.IsSupportedForBatch() {
-		t.Error("expected IsSupportedForBatch to be false initially")
-	}
-
-	// Set up a valid batch
-	cache.curSeqTokens = 1
-	cache.curSeqs = []int{1}
-
-	if !cache.IsSupportedForBatch() {
-		t.Error("expected IsSupportedForBatch to be true with valid batch")
-	}
-}
-
-func TestHybridCache_ZeroConvSlots_EmptyInputs(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// zeroConvSlots should handle empty slots without panicking
-	cache.zeroConvSlots(nil, nil)
-	cache.zeroConvSlots(nil, []int{})
-
-	// zeroConvSlots should handle empty convStates without panicking
-	cache.zeroConvSlots(nil, []int{0, 1, 2})
-}
-
-func TestHybridCache_SlotRecycling_TracksNewSlots(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Allocate slot for seq 1
-	slot1, _ := cache.allocSlot()
-	cache.slotForSeq[1] = slot1
-	cache.refCount[slot1] = 1
-
-	// Free the slot (simulating sequence removal)
-	cache.refCount[slot1]--
-	cache.freeSlot(slot1)
-	delete(cache.slotForSeq, 1)
-
-	// Verify slot is in free list
-	if len(cache.freeSlots) != 4 {
-		t.Errorf("expected 4 free slots after freeing, got %d", len(cache.freeSlots))
-	}
-
-	// Allocate for new seq 2 - should get recycled slot
-	slot2, _ := cache.allocSlot()
-	if slot2 != slot1 {
-		t.Errorf("expected recycled slot %d, got %d", slot1, slot2)
-	}
-
-	// This recycled slot would need zeroing in the real implementation
-	// The actual zeroing is tested via integration tests since it requires ML context
-}
-
-func TestHybridCache_NewSequence_GetsTrackedForZeroing(t *testing.T) {
-	cache := createSlotOnlyCache(4)
-
-	// Simulate the slot allocation flow from StartForward
-	// When a sequence doesn't have a slot, it gets allocated and tracked as "new"
-
-	newSlots := []int{}
-
-	// Seq 1 doesn't have a slot - allocate and track
-	seq := 1
-	if _, ok := cache.slotForSeq[seq]; !ok {
-		slot, err := cache.allocSlot()
-		if err != nil {
-			t.Fatalf("allocSlot failed: %v", err)
-		}
-		cache.slotForSeq[seq] = slot
-		cache.refCount[slot] = 1
-		newSlots = append(newSlots, slot)
-	}
-
-	// Verify newSlots contains the allocated slot
-	if len(newSlots) != 1 {
-		t.Errorf("expected 1 new slot, got %d", len(newSlots))
-	}
-
-	// Seq 1 already has a slot - should NOT be tracked as new
-	newSlots2 := []int{}
-	if _, ok := cache.slotForSeq[seq]; !ok {
-		slot, _ := cache.allocSlot()
-		cache.slotForSeq[seq] = slot
-		cache.refCount[slot] = 1
-		newSlots2 = append(newSlots2, slot)
-	}
-
-	// Verify no new slots for existing sequence
-	if len(newSlots2) != 0 {
-		t.Errorf("expected 0 new slots for existing sequence, got %d", len(newSlots2))
-	}
-}
--- a/model/models/lfm2/model.go
+++ b/model/models/lfm2/model.go
@@ -1,253 +0,0 @@
-package lfm2
-
-import (
-	"cmp"
-	"math"
-
-	"github.com/ollama/ollama/fs"
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/ml/nn"
-	"github.com/ollama/ollama/ml/nn/rope"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/model/input"
-)
-
-type Options struct {
-	hiddenSize       int
-	headDim, ropeDim int
-
-	eps, ropeBase, ropeScale float32
-
-	ropeType              string
-	originalContextLength int
-
-	// per-layer head counts (LFM2 alternates attention and recurrent layers)
-	numHeadsByLayer   []int
-	numKVHeadsByLayer []int
-}
-
-func (o Options) headDimValue() int {
-	// Head dim is shared across layers; fall back to first attention layer head count.
-	for _, h := range o.numHeadsByLayer {
-		if h > 0 {
-			return cmp.Or(o.headDim, o.hiddenSize/h)
-		}
-	}
-	return cmp.Or(o.headDim, o.hiddenSize)
-}
-
-func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions ml.Tensor) ml.Tensor {
-	opts := []func(*rope.Options){rope.WithTypeNeoX()}
-	if o.ropeType == "yarn" {
-		attnFactor := float32(1.0 / (1.0 + 0.1*math.Log(float64(o.ropeScale))))
-		opts = append(opts,
-			rope.WithOriginalContextLength(o.originalContextLength),
-			rope.WithExtrapolationFactor(1.),
-			rope.WithAttentionFactor(attnFactor),
-		)
-	}
-
-	headCount := 1
-	for _, h := range o.numHeadsByLayer {
-		if h > 0 {
-			headCount = h
-			break
-		}
-	}
-	return nn.RoPE(ctx, states, positions, cmp.Or(o.ropeDim, o.headDim, o.hiddenSize/headCount), o.ropeBase, 1./o.ropeScale, opts...)
-}
-
-type Model struct {
-	model.Base
-	model.TextProcessor
-
-	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
-	Layers         []Layer       `gguf:"blk"`
-	OutputNorm     *nn.RMSNorm   `gguf:"output_norm,alt:token_embd_norm"`
-	Output         *nn.Linear    `gguf:"output,alt:token_embd"`
-
-	Options
-}
-
-func New(c fs.Config) (model.Model, error) {
-	if c.Uint("expert_count") > 0 {
-		return nil, model.ErrUnsupportedModel
-	}
-
-	if c.String("tokenizer.ggml.model") != "gpt2" {
-		return nil, model.ErrUnsupportedTokenizer
-	}
-
-	vocabulary := model.Vocabulary{
-		Values: c.Strings("tokenizer.ggml.tokens"),
-		Scores: c.Floats("tokenizer.ggml.scores"),
-		Types:  c.Ints("tokenizer.ggml.token_type"),
-		Merges: c.Strings("tokenizer.ggml.merges"),
-		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-		BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-		EOS: append(
-			[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
-			c.Ints("tokenizer.ggml.eos_token_ids")...,
-		),
-	}
-
-	var pretokenizers []string
-	switch c.String("tokenizer.ggml.pre") {
-	case "default":
-		// use default BPE pretokenizer
-	default:
-		// llama-bpe style (default for LFM2)
-		pretokenizers = []string{
-			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
-		}
-	}
-
-	m := Model{
-		TextProcessor: model.NewBytePairEncoding(&vocabulary, pretokenizers...),
-		Layers:        make([]Layer, c.Uint("block_count")),
-		Options: Options{
-			hiddenSize:            int(c.Uint("embedding_length")),
-			headDim:               int(c.Uint("attention.key_length")),
-			ropeDim:               int(c.Uint("rope.dimension_count")),
-			eps:                   c.Float("attention.layer_norm_rms_epsilon"),
-			ropeType:              c.String("rope.scaling.type"),
-			ropeBase:              c.Float("rope.freq_base"),
-			ropeScale:             c.Float("rope.scaling.factor", 1),
-			originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
-		},
-	}
-
-	type headCounts interface {
-		HeadCount() []uint64
-		HeadCountKV() []uint64
-	}
-	hc, ok := c.(headCounts)
-	if !ok {
-		return nil, model.ErrUnsupportedModel
-	}
-
-	headCount := hc.HeadCount()
-	headCountKV := hc.HeadCountKV()
-
-	m.numHeadsByLayer = make([]int, len(m.Layers))
-	m.numKVHeadsByLayer = make([]int, len(m.Layers))
-	for i := range m.Layers {
-		m.numHeadsByLayer[i] = int(headCount[i])
-		m.numKVHeadsByLayer[i] = int(headCountKV[i])
-
-		if m.numKVHeadsByLayer[i] == 0 {
-			m.Layers[i].Operator = &ShortConv{}
-		} else {
-			m.Layers[i].Operator = &Attention{}
-		}
-	}
-
-	lCache := int(c.Uint("shortconv.l_cache"))
-	dConv := max(0, lCache-1)
-	m.Cache = NewHybridCache(m.Shift, m.hiddenSize, dConv)
-	return &m, nil
-}
-
-type Operator interface {
-	Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor
-}
-
-type Attention struct {
-	Query     *nn.Linear  `gguf:"attn_q"`
-	QueryNorm *nn.RMSNorm `gguf:"attn_q_norm"`
-	Key       *nn.Linear  `gguf:"attn_k"`
-	KeyNorm   *nn.RMSNorm `gguf:"attn_k_norm"`
-	Value     *nn.Linear  `gguf:"attn_v"`
-	Output    *nn.Linear  `gguf:"attn_output,alt:attn_out"`
-}
-
-func (sa *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
-	batchSize := hiddenStates.Dim(1)
-	headDim := opts.headDimValue()
-	numHeads := opts.numHeadsByLayer[layer]
-	numKVHeads := opts.numKVHeadsByLayer[layer]
-
-	query := sa.Query.Forward(ctx, hiddenStates)
-	key := sa.Key.Forward(ctx, hiddenStates)
-	value := sa.Value.Forward(ctx, hiddenStates)
-
-	query = query.Reshape(ctx, headDim, numHeads, batchSize)
-	key = key.Reshape(ctx, headDim, numKVHeads, batchSize)
-	value = value.Reshape(ctx, headDim, numKVHeads, batchSize)
-
-	query = sa.QueryNorm.Forward(ctx, query, opts.eps)
-	key = sa.KeyNorm.Forward(ctx, key, opts.eps)
-
-	query = opts.applyRotaryPositionEmbeddings(ctx, query, positions)
-	key = opts.applyRotaryPositionEmbeddings(ctx, key, positions)
-
-	attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(headDim)), cache)
-	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), batchSize)
-	return sa.Output.Forward(ctx, attention)
-}
-
-type MLP struct {
-	Up   *nn.Linear `gguf:"ffn_up"`
-	Down *nn.Linear `gguf:"ffn_down"`
-	Gate *nn.Linear `gguf:"ffn_gate"`
-}
-
-func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
-	hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
-	return mlp.Down.Forward(ctx, hiddenState)
-}
-
-type Layer struct {
-	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
-	Operator      Operator
-	MLPNorm       *nn.RMSNorm `gguf:"ffn_norm"`
-	MLP           *MLP
-}
-
-func (l *Layer) Forward(ctx ml.Context, layer int, hiddenState, positions, outputs ml.Tensor, cache *HybridCache, opts *Options) ml.Tensor {
-	residual := hiddenState
-
-	hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
-	hiddenState = l.Operator.Forward(ctx, hiddenState, positions, cache, layer, opts)
-
-	if outputs != nil {
-		hiddenState = hiddenState.Rows(ctx, outputs)
-		residual = residual.Rows(ctx, outputs)
-	}
-
-	hiddenState = hiddenState.Add(ctx, residual)
-	residual = hiddenState
-
-	hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
-	hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
-	return hiddenState.Add(ctx, residual)
-}
-
-func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
-	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
-}
-
-func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
-	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
-
-	hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
-
-	for i, layer := range m.Layers {
-		m.Cache.SetLayer(i)
-
-		var outputs ml.Tensor
-		if i == len(m.Layers)-1 {
-			outputs = batch.Outputs
-		}
-
-		hiddenState = layer.Forward(ctx, i, hiddenState, positions, outputs, m.Cache.(*HybridCache), &m.Options)
-	}
-
-	hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
-	return m.Output.Forward(ctx, hiddenState), nil
-}
-
-func init() {
-	model.Register("lfm2", New)
-}
--- a/model/models/lfm2/shortconv.go
+++ b/model/models/lfm2/shortconv.go
@@ -1,50 +0,0 @@
-package lfm2
-
-import (
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/ml/nn"
-)
-
-type shortConvKernel struct {
-	Weight ml.Tensor `gguf:"weight"`
-}
-
-// ShortConv implements the LFM2 short-convolution block (GGML_OP_SSM_CONV) with a recurrent
-// state stored in the HybridCache.
-type ShortConv struct {
-	Conv    *shortConvKernel `gguf:"shortconv.conv"`
-	InProj  *nn.Linear       `gguf:"shortconv.in_proj"`
-	OutProj *nn.Linear       `gguf:"shortconv.out_proj"`
-}
-
-func (sc *ShortConv) Forward(ctx ml.Context, hiddenStates ml.Tensor, _ ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
-	nSeqs := cache.numSeqs()
-	seqTokens := cache.seqTokens()
-	hiddenSize := hiddenStates.Dim(0)
-	if nSeqs <= 0 || seqTokens <= 0 || hiddenStates.Dim(1) != nSeqs*seqTokens {
-		panic("lfm2: unsupported batch layout for shortconv")
-	}
-
-	bcx := sc.InProj.Forward(ctx, hiddenStates).Reshape(ctx, 3*hiddenSize, seqTokens, nSeqs)
-
-	elementSize := bcx.Stride(0)
-	b := bcx.View(ctx, 0*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
-	c := bcx.View(ctx, 1*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
-	x := bcx.View(ctx, 2*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
-
-	bx := b.Mul(ctx, x).Permute(ctx, 1, 0, 2, 3)
-
-	state, err := cache.ConvState(ctx, layer)
-	if err != nil {
-		panic("lfm2: failed to get conv state: " + err.Error())
-	}
-	sx := state.Concat(ctx, bx, 0)
-
-	convOut := sx.SSMConv(ctx, sc.Conv.Weight)
-	y := c.Mul(ctx, convOut)
-
-	dConv := sx.Dim(0) - seqTokens
-	cache.UpdateConvState(ctx, layer, sx.Slice(ctx, 0, sx.Dim(0)-dConv, sx.Dim(0), 1))
-
-	return sc.OutProj.Forward(ctx, y.Reshape(ctx, hiddenSize, seqTokens*nSeqs))
-}
--- a/model/models/models.go
+++ b/model/models/models.go
@@ -7,9 +7,7 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma2"
 	_ "github.com/ollama/ollama/model/models/gemma3"
 	_ "github.com/ollama/ollama/model/models/gemma3n"
-	_ "github.com/ollama/ollama/model/models/glm4moelite"
 	_ "github.com/ollama/ollama/model/models/gptoss"
-	_ "github.com/ollama/ollama/model/models/lfm2"
 	_ "github.com/ollama/ollama/model/models/llama"
 	_ "github.com/ollama/ollama/model/models/llama4"
 	_ "github.com/ollama/ollama/model/models/mistral3"
--- a/model/parsers/glm46.go
+++ b/model/parsers/glm46.go
@@ -1,410 +0,0 @@
-package parsers
-
-import (
-	"context"
-	"encoding/xml"
-	"fmt"
-	"log/slog"
-	"strings"
-	"unicode"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/logutil"
-)
-
-type glm46ParserState int
-
-const (
-	glm46ParserState_LookingForThinkingOpen glm46ParserState = iota
-	glm46ParserState_ThinkingStartedEatingWhitespace
-	glm46ParserState_CollectingThinking
-	glm46ParserState_ThinkingDoneEatingWhitespace
-	glm46ParserState_CollectingContent
-	glm46ParserState_ToolStartedEatingWhitespace
-	glm46ParserState_CollectingToolContent
-)
-
-const (
-	glm46ThinkingOpenTag  = "<think>"
-	glm46ThinkingCloseTag = "</think>"
-	glm46ToolOpenTag      = "<tool_call>"
-	glm46ToolCloseTag     = "</tool_call>"
-)
-
-type GLM46Parser struct {
-	state  glm46ParserState
-	buffer strings.Builder
-	tools  []api.Tool
-}
-
-func (p *GLM46Parser) HasToolSupport() bool {
-	return true
-}
-
-func (p *GLM46Parser) HasThinkingSupport() bool {
-	return true
-}
-
-// func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
-func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.tools = tools
-	return tools
-}
-
-type glm46Event interface {
-	isGLM46Event()
-}
-
-type glm46EventContent struct {
-	content string
-}
-
-func (glm46EventContent) isGLM46Event() {}
-
-type glm46EventRawToolCall struct {
-	raw string
-}
-
-func (glm46EventRawToolCall) isGLM46Event() {}
-
-type glm46EventThinkingContent struct {
-	content string
-}
-
-func (glm46EventThinkingContent) isGLM46Event() {}
-
-func (p *GLM46Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
-	events := p.parseEvents()
-
-	var toolCalls []api.ToolCall
-	var contentSb strings.Builder
-	var thinkingSb strings.Builder
-
-	for _, event := range events {
-		switch event := event.(type) {
-		case glm46EventRawToolCall:
-			toolCall, err := parseGLM46ToolCall(event, p.tools)
-			if err != nil {
-				slog.Warn("glm-4.6 tool call parsing failed", "error", err)
-				return "", "", nil, err
-			}
-			toolCalls = append(toolCalls, toolCall)
-		case glm46EventThinkingContent:
-			thinkingSb.WriteString(event.content)
-		case glm46EventContent:
-			// TODO(drifkin): if the same turn contains multiple interleaved content
-			// events, we naively append them together here.
-			contentSb.WriteString(event.content)
-		}
-	}
-
-	return contentSb.String(), thinkingSb.String(), toolCalls, nil
-}
-
-func (p *GLM46Parser) parseEvents() []glm46Event {
-	var all []glm46Event
-
-	keepLooping := true
-	for keepLooping {
-		var events []glm46Event
-		events, keepLooping = p.eat()
-		if len(events) > 0 {
-			all = append(all, events...)
-		}
-	}
-
-	if len(all) > 0 {
-		slog.Log(context.TODO(), logutil.LevelTrace, "glm-4.6 events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
-	}
-
-	return all
-}
-
-// eatLeadingWhitespaceAndTransitionTo consumes leading whitespace from the buffer
-// and transitions to the next state. Returns (nil, false) if only whitespace remains
-// in the buffer (needs more input), or (nil, true) if we successfully transitioned.
-func (p *GLM46Parser) eatLeadingWhitespaceAndTransitionTo(nextState glm46ParserState) ([]glm46Event, bool) {
-	trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
-	p.buffer.Reset()
-	if trimmed == "" {
-		return nil, false // Still only whitespace, keep waiting for more input
-	}
-	p.state = nextState
-	p.buffer.WriteString(trimmed)
-	return nil, true // Successfully transitioned
-}
-
-// glm46SplitAtTag splits the buffer at the given tag, returns the content before (trimmed of trailing whitespace),
-// the content after (optionally trimmed of leading whitespace), and updates the buffer
-func glm46SplitAtTag(p *GLM46Parser, tag string, trimAfter bool) (string, string) {
-	split := strings.SplitN(p.buffer.String(), tag, 2)
-	before := split[0]
-	before = strings.TrimRightFunc(before, unicode.IsSpace)
-	after := split[1]
-	if trimAfter {
-		after = strings.TrimLeftFunc(after, unicode.IsSpace)
-	}
-	p.buffer.Reset()
-	p.buffer.WriteString(after)
-	return before, after
-}
-
-func (p *GLM46Parser) eat() ([]glm46Event, bool) {
-	var events []glm46Event
-
-	switch p.state {
-	case glm46ParserState_LookingForThinkingOpen:
-		trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
-		if strings.HasPrefix(trimmed, glm46ThinkingOpenTag) {
-			// Found <think> opening tag
-			after := strings.TrimPrefix(trimmed, glm46ThinkingOpenTag)
-			after = strings.TrimLeftFunc(after, unicode.IsSpace)
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			if after == "" {
-				p.state = glm46ParserState_ThinkingStartedEatingWhitespace
-			} else {
-				p.state = glm46ParserState_CollectingThinking
-			}
-			return events, true
-		} else if strings.HasPrefix(glm46ThinkingOpenTag, trimmed) {
-			// Partial opening tag seen, keep accumulating
-			return events, false
-		} else if trimmed == "" {
-			// Only whitespace, keep accumulating
-			return events, false
-		} else {
-			// No thinking tag found, skip to content collection
-			p.state = glm46ParserState_CollectingContent
-			// Don't trim - we want to keep the original content
-			return events, true
-		}
-
-	case glm46ParserState_ThinkingStartedEatingWhitespace:
-		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingThinking)
-
-	case glm46ParserState_CollectingThinking:
-		acc := p.buffer.String()
-		if strings.Contains(acc, glm46ThinkingCloseTag) {
-			thinking, remaining := glm46SplitAtTag(p, glm46ThinkingCloseTag, true)
-			if len(thinking) > 0 {
-				events = append(events, glm46EventThinkingContent{content: thinking})
-			}
-			if remaining == "" {
-				p.state = glm46ParserState_ThinkingDoneEatingWhitespace
-			} else {
-				p.state = glm46ParserState_CollectingContent
-			}
-			return events, true
-		} else if overlapLen := overlap(acc, glm46ThinkingCloseTag); overlapLen > 0 {
-			// Partial closing tag - withhold it along with any trailing whitespace before it
-			beforePartialTag := acc[:len(acc)-overlapLen]
-			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
-
-			unambiguous := acc[:ambiguousStart]
-			ambiguous := acc[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, glm46EventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		} else {
-			// Pure thinking content - withhold trailing whitespace (might precede closing tag)
-			whitespaceLen := trailingWhitespaceLen(acc)
-			ambiguousStart := len(acc) - whitespaceLen
-
-			unambiguous := acc[:ambiguousStart]
-			ambiguous := acc[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, glm46EventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-	case glm46ParserState_ThinkingDoneEatingWhitespace:
-		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingContent)
-
-	case glm46ParserState_CollectingContent:
-		if strings.Contains(p.buffer.String(), glm46ToolOpenTag) {
-			before, after := glm46SplitAtTag(p, glm46ToolOpenTag, true)
-			if len(before) > 0 {
-				events = append(events, glm46EventContent{content: before})
-			}
-			if after == "" {
-				p.state = glm46ParserState_ToolStartedEatingWhitespace
-			} else {
-				p.state = glm46ParserState_CollectingToolContent
-			}
-			return events, true
-		} else if overlapLen := overlap(p.buffer.String(), glm46ToolOpenTag); overlapLen > 0 {
-			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
-			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
-
-			unambiguous := p.buffer.String()[:ambiguousStart]
-			ambiguous := p.buffer.String()[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, glm46EventContent{content: unambiguous})
-			}
-			return events, false
-		} else {
-			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
-			ambiguousStart := len(p.buffer.String()) - whitespaceLen
-
-			unambiguous := p.buffer.String()[:ambiguousStart]
-			ambiguous := p.buffer.String()[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, glm46EventContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-	case glm46ParserState_ToolStartedEatingWhitespace:
-		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingToolContent)
-
-	case glm46ParserState_CollectingToolContent:
-		acc := p.buffer.String()
-		if strings.Contains(acc, glm46ToolCloseTag) {
-			toolContent, _ := glm46SplitAtTag(p, glm46ToolCloseTag, true)
-			if len(toolContent) == 0 {
-				slog.Warn("glm46 tool call closing tag found but no content before it")
-			}
-			events = append(events, glm46EventRawToolCall{raw: toolContent})
-			p.state = glm46ParserState_CollectingContent
-			return events, true
-		} else {
-			// Keep accumulating - tool calls are not streamed
-			// We just wait for the closing tag
-			return events, false
-		}
-
-	default:
-		panic("unreachable")
-	}
-}
-
-// GLMToolCallXML represents the structure of a GLM-4.6 tool call for XML parsing
-type GLMToolCallXML struct {
-	XMLName xml.Name `xml:"tool_call"`
-	Content string   `xml:",chardata"` // Function name (text nodes between tags)
-	Keys    []string `xml:"arg_key"`   // All arg_key elements in document order
-	Values  []string `xml:"arg_value"` // All arg_value elements in document order
-}
-
-// escapeGLM46Content escapes XML entities in text content while preserving arg_key/arg_value tags
-func escapeGLM46Content(s string) string {
-	var result strings.Builder
-	inTag := false
-
-	for i := range len(s) {
-		ch := s[i]
-
-		if ch == '<' {
-			// Check if this is a known tag
-			if strings.HasPrefix(s[i:], "<arg_key>") ||
-				strings.HasPrefix(s[i:], "</arg_key>") ||
-				strings.HasPrefix(s[i:], "<arg_value>") ||
-				strings.HasPrefix(s[i:], "</arg_value>") {
-				inTag = true
-			}
-		}
-
-		if inTag {
-			result.WriteByte(ch)
-			if ch == '>' {
-				inTag = false
-			}
-		} else {
-			// Escape special characters in text content
-			switch ch {
-			case '&':
-				result.WriteString("&amp;")
-			case '<':
-				result.WriteString("&lt;")
-			case '>':
-				result.WriteString("&gt;")
-			default:
-				result.WriteByte(ch)
-			}
-		}
-	}
-
-	return result.String()
-}
-
-func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
-	// Escape any unescaped entities in text content
-	// We need to escape text between tags, but not the tags themselves
-	escaped := escapeGLM46Content(raw.raw)
-
-	// Wrap the content in a root element to make it valid XML
-	xmlString := "<tool_call>" + escaped + "</tool_call>"
-
-	// Parse XML into struct
-	var parsed GLMToolCallXML
-	if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
-		return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
-	}
-
-	// Extract and trim function name
-	functionName := strings.TrimSpace(parsed.Content)
-	if functionName == "" {
-		return api.ToolCall{}, fmt.Errorf("empty function name")
-	}
-
-	// Verify keys and values are paired correctly
-	if len(parsed.Keys) != len(parsed.Values) {
-		return api.ToolCall{}, fmt.Errorf("mismatched arg_key and arg_value counts: %d keys, %d values", len(parsed.Keys), len(parsed.Values))
-	}
-
-	// Find the matching tool to get parameter types
-	var matchedTool *api.Tool
-	for i := range tools {
-		if tools[i].Function.Name == functionName {
-			matchedTool = &tools[i]
-			break
-		}
-	}
-
-	// Build arguments map by pairing keys and values
-	toolCall := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name:      functionName,
-			Arguments: api.NewToolCallFunctionArguments(),
-		},
-	}
-
-	for i := range parsed.Keys {
-		key := strings.TrimSpace(parsed.Keys[i])
-		value := parsed.Values[i] // Don't trim here - parseValue handles it
-
-		// Look up parameter type
-		var paramType api.PropertyType
-		if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
-			if prop, ok := matchedTool.Function.Parameters.Properties.Get(key); ok {
-				// Handle anyOf by collecting all types from the union
-				if len(prop.AnyOf) > 0 {
-					for _, anyOfProp := range prop.AnyOf {
-						paramType = append(paramType, anyOfProp.Type...)
-					}
-				} else {
-					paramType = prop.Type
-				}
-			}
-		}
-
-		// Parse value with type coercion
-		toolCall.Function.Arguments.Set(key, parseValue(value, paramType))
-	}
-
-	return toolCall, nil
-}
--- a/model/parsers/glm46_test.go
+++ b/model/parsers/glm46_test.go
@@ -1,862 +0,0 @@
-package parsers
-
-import (
-	"encoding/xml"
-	"reflect"
-	"testing"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestGLM46ParserStreaming(t *testing.T) {
-	type step struct {
-		input      string
-		wantEvents []glm46Event
-	}
-
-	cases := []struct {
-		desc  string
-		steps []step
-		only  bool
-	}{
-		{
-			desc: "leading whitespace before think tag",
-			steps: []step{
-				{
-					input:      "   \n\t  ",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input:      "<think>thinking</think>",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "thinking"}},
-				},
-			},
-		},
-		{
-			desc: "think tag with whitespace inside",
-			steps: []step{
-				{
-					input: "<think>  \n  thinking content  \n  </think>regular content",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking content"},
-						glm46EventContent{content: "regular content"},
-					},
-				},
-			},
-		},
-		{
-			desc: "tool call with leading whitespace after opening tag",
-			steps: []step{
-				{
-					input: "<think></think><tool_call>  \n  test  \n  </tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventRawToolCall{raw: "test"},
-					},
-				},
-			},
-		},
-		{
-			desc: "simple thinking then content",
-			steps: []step{
-				{
-					input: "<think>I am thinking</think>Now I respond",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "I am thinking"},
-						glm46EventContent{content: "Now I respond"},
-					},
-				},
-			},
-		},
-		{
-			desc: "streamed thinking content",
-			steps: []step{
-				{
-					input:      "<think>hello",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "hello"}},
-				},
-				{
-					input:      " world",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: " world"}},
-				},
-				{
-					input: "</think>content",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "content"},
-					},
-				},
-			},
-		},
-		{
-			desc: "content before tool call",
-			steps: []step{
-				{
-					input: "<think>Let me call a tool</think>here is text<tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "Let me call a tool"},
-						glm46EventContent{content: "here is text"},
-					},
-				},
-				{
-					input: "function_name\n<arg_key>param</arg_key>\n<arg_value>value</arg_value>\n</tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventRawToolCall{raw: "function_name\n<arg_key>param</arg_key>\n<arg_value>value</arg_value>"},
-					},
-				},
-			},
-		},
-		{
-			desc: "tool call with content after",
-			steps: []step{
-				{
-					input: "<think>thinking</think><tool_call>test</tool_call>after tool",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-						glm46EventRawToolCall{raw: "test"},
-						glm46EventContent{content: "after tool"},
-					},
-				},
-			},
-		},
-		{
-			desc: "trailing whitespace between content and tool call is trimmed",
-			steps: []step{
-				{
-					input: "<think>thinking</think>content\n  \t  <tool_call>test</tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-						glm46EventContent{content: "content"},
-						glm46EventRawToolCall{raw: "test"},
-					},
-				},
-			},
-		},
-		{
-			desc: "trailing whitespace between tool call and content is trimmed",
-			steps: []step{
-				{
-					input: "<think>think</think><tool_call>test</tool_call>\n\t  after",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "think"},
-						glm46EventRawToolCall{raw: "test"},
-						glm46EventContent{content: "after"},
-					},
-				},
-			},
-		},
-		{
-			desc: "split thinking close tag",
-			steps: []step{
-				{
-					input:      "<think>thinking content</th",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "thinking content"}},
-				},
-				{
-					input: "ink>after",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "after"},
-					},
-				},
-			},
-		},
-		{
-			desc: "split thinking open tag",
-			steps: []step{
-				{
-					input:      "  <thi",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input:      "nk>content</think>",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "content"}},
-				},
-			},
-		},
-		{
-			desc: "split tool open tag",
-			steps: []step{
-				{
-					input:      "<think>think</think>content<tool",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "think"}, glm46EventContent{content: "content"}},
-				},
-				{
-					input:      "_call>inside",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input: "</tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventRawToolCall{raw: "inside"},
-					},
-				},
-			},
-		},
-		{
-			desc: "partial thinking close tag fakeout",
-			steps: []step{
-				{
-					input:      "<think>content</th",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "content"}},
-				},
-				{
-					input:      "ought more",
-					wantEvents: []glm46Event{glm46EventThinkingContent{content: "</thought more"}},
-				},
-			},
-		},
-		{
-			desc: "partial thinking open tag fakeout",
-			steps: []step{
-				{
-					input:      "  <thi",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input: "nking is fun",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "  <thinking is fun"},
-					},
-				},
-			},
-		},
-		{
-			desc: "partial tool open tag fakeout",
-			steps: []step{
-				{
-					input: "<think></think>content\n<tool",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "content"},
-					},
-				},
-				{
-					input: " fakeout",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "\n<tool fakeout"},
-					},
-				},
-			},
-		},
-		{
-			desc: "partial tool close tag fakeout",
-			steps: []step{
-				{
-					input:      "<think></think><tool_call>content</tool",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input:      " fakeout",
-					wantEvents: []glm46Event{},
-				},
-				{
-					input: "</tool_call>",
-					wantEvents: []glm46Event{
-						glm46EventRawToolCall{raw: "content</tool fakeout"},
-					},
-				},
-			},
-		},
-		{
-			desc: "empty thinking tag",
-			steps: []step{
-				{
-					input: "<think></think>content here",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "content here"},
-					},
-				},
-			},
-		},
-		{
-			desc: "multiple tool calls in sequence",
-			steps: []step{
-				{
-					input: "<think>think</think><tool_call>first</tool_call>between<tool_call>second</tool_call>end",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "think"},
-						glm46EventRawToolCall{raw: "first"},
-						glm46EventContent{content: "between"},
-						glm46EventRawToolCall{raw: "second"},
-						glm46EventContent{content: "end"},
-					},
-				},
-			},
-		},
-		{
-			desc: "no thinking tag - direct to content",
-			steps: []step{
-				{
-					input: "just content here",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "just content here"},
-					},
-				},
-			},
-		},
-		{
-			desc: "no thinking tag - skip to content then tool call",
-			steps: []step{
-				{
-					input: "Here's the answer:<tool_call>test</tool_call>done",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "Here's the answer:"},
-						glm46EventRawToolCall{raw: "test"},
-						glm46EventContent{content: "done"},
-					},
-				},
-			},
-		},
-		{
-			desc: "no thinking tag - whitespace preserved when no tags",
-			steps: []step{
-				{
-					input: "  \n  content with leading whitespace",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "  \n  content with leading whitespace"},
-					},
-				},
-			},
-		},
-		{
-			desc: "whitespace after think close tag gets eaten",
-			steps: []step{
-				{
-					input: "<think>thinking</think>  \n\t  content",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-						glm46EventContent{content: "content"},
-					},
-				},
-			},
-		},
-		{
-			desc: "whitespace after tool_call close tag gets eaten",
-			steps: []step{
-				{
-					input: "<think></think><tool_call>test</tool_call>  \n\t  content",
-					wantEvents: []glm46Event{
-						glm46EventRawToolCall{raw: "test"},
-						glm46EventContent{content: "content"},
-					},
-				},
-			},
-		},
-		{
-			desc: "thinking content withholds trailing whitespace (single chunk)",
-			steps: []step{
-				{
-					input: "<think>thinking content   ",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking content"},
-					},
-				},
-				{
-					input: "</think>after",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "after"},
-					},
-				},
-			},
-		},
-		{
-			desc: "thinking content withholds trailing whitespace with newlines",
-			steps: []step{
-				{
-					input: "<think>thinking\n\n  ",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-					},
-				},
-				{
-					input: "</think>content",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "content"},
-					},
-				},
-			},
-		},
-		{
-			desc: "thinking content trailing whitespace emitted when more content arrives",
-			steps: []step{
-				{
-					input: "<think>thinking   ",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-					},
-				},
-				{
-					input: "more thinking",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "   more thinking"},
-					},
-				},
-				{
-					input:      "</think>",
-					wantEvents: []glm46Event{},
-				},
-			},
-		},
-		{
-			desc: "thinking content withholds trailing whitespace before partial close tag",
-			steps: []step{
-				{
-					input: "<think>thinking   </th",
-					wantEvents: []glm46Event{
-						glm46EventThinkingContent{content: "thinking"},
-					},
-				},
-				{
-					input: "ink>content",
-					wantEvents: []glm46Event{
-						glm46EventContent{content: "content"},
-					},
-				},
-			},
-		},
-	}
-
-	anyOnlies := false
-	for _, tc := range cases {
-		if tc.only {
-			anyOnlies = true
-		}
-	}
-
-	for _, tc := range cases {
-		if anyOnlies && !tc.only {
-			continue
-		}
-
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := GLM46Parser{}
-
-			for i, step := range tc.steps {
-				parser.buffer.WriteString(step.input)
-				gotEvents := parser.parseEvents()
-
-				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
-					// avoid deep equal on empty vs. nil slices
-					continue
-				}
-
-				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
-					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
-				}
-			}
-		})
-	}
-}
-
-// TestGLMToolCallXMLOrderPreservation verifies that xml.Unmarshal preserves
-// document order when collecting multiple elements with the same tag name into slices.
-// This is a critical assumption for the GLM-4.6 parser's struct-based approach.
-func TestGLMToolCallXMLOrderPreservation(t *testing.T) {
-	testCases := []struct {
-		name       string
-		xml        string
-		wantKeys   []string
-		wantValues []string
-	}{
-		{
-			name: "alternating keys and values",
-			xml: `<tool_call>
-function_name
-<arg_key>first</arg_key>
-<arg_value>A</arg_value>
-<arg_key>second</arg_key>
-<arg_value>B</arg_value>
-<arg_key>third</arg_key>
-<arg_value>C</arg_value>
-</tool_call>`,
-			wantKeys:   []string{"first", "second", "third"},
-			wantValues: []string{"A", "B", "C"},
-		},
-		{
-			name: "all keys then all values",
-			xml: `<tool_call>
-function_name
-<arg_key>key1</arg_key>
-<arg_key>key2</arg_key>
-<arg_key>key3</arg_key>
-<arg_value>val1</arg_value>
-<arg_value>val2</arg_value>
-<arg_value>val3</arg_value>
-</tool_call>`,
-			wantKeys:   []string{"key1", "key2", "key3"},
-			wantValues: []string{"val1", "val2", "val3"},
-		},
-		{
-			name: "mixed grouping",
-			xml: `<tool_call>
-function_name
-<arg_key>a</arg_key>
-<arg_value>1</arg_value>
-<arg_key>b</arg_key>
-<arg_key>c</arg_key>
-<arg_value>2</arg_value>
-<arg_value>3</arg_value>
-</tool_call>`,
-			wantKeys:   []string{"a", "b", "c"},
-			wantValues: []string{"1", "2", "3"},
-		},
-		{
-			name: "reverse order - all values then all keys",
-			xml: `<tool_call>
-function_name
-<arg_value>X</arg_value>
-<arg_value>Y</arg_value>
-<arg_value>Z</arg_value>
-<arg_key>x</arg_key>
-<arg_key>y</arg_key>
-<arg_key>z</arg_key>
-</tool_call>`,
-			wantKeys:   []string{"x", "y", "z"},
-			wantValues: []string{"X", "Y", "Z"},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			var parsed GLMToolCallXML
-			err := xml.Unmarshal([]byte(tc.xml), &parsed)
-			if err != nil {
-				t.Fatalf("failed to unmarshal XML: %v", err)
-			}
-
-			if !reflect.DeepEqual(parsed.Keys, tc.wantKeys) {
-				t.Errorf("Keys order mismatch:\ngot:  %v\nwant: %v", parsed.Keys, tc.wantKeys)
-			}
-
-			if !reflect.DeepEqual(parsed.Values, tc.wantValues) {
-				t.Errorf("Values order mismatch:\ngot:  %v\nwant: %v", parsed.Values, tc.wantValues)
-			}
-		})
-	}
-}
-
-func TestGLM46ToolCallParsing(t *testing.T) {
-	type testCase struct {
-		name         string
-		rawToolCall  string
-		tools        []api.Tool
-		wantToolCall api.ToolCall
-	}
-
-	cases := []testCase{
-		{
-			name:  "simple tool call",
-			tools: []api.Tool{},
-			rawToolCall: `get-current-weather
-<arg_key>location</arg_key>
-<arg_value>New York, NY</arg_value>
-<arg_key>unit</arg_key>
-<arg_value>celsius</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "get-current-weather",
-					Arguments: args(`{"location": "New York, NY", "unit": "celsius"}`),
-				},
-			},
-		},
-		{
-			name: "tool call with typed parameters",
-			tools: []api.Tool{
-				tool("calculate", map[string]api.ToolProperty{
-					"x":       {Type: api.PropertyType{"number"}},
-					"y":       {Type: api.PropertyType{"integer"}},
-					"enabled": {Type: api.PropertyType{"boolean"}},
-					"items":   {Type: api.PropertyType{"array"}},
-				}),
-			},
-			rawToolCall: `calculate
-<arg_key>x</arg_key>
-<arg_value>3.14</arg_value>
-<arg_key>y</arg_key>
-<arg_value>42</arg_value>
-<arg_key>enabled</arg_key>
-<arg_value>true</arg_value>
-<arg_key>items</arg_key>
-<arg_value>["a", "b", "c"]</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "calculate",
-					Arguments: args(`{"enabled": true, "items": ["a", "b", "c"], "x": 3.14, "y": 42}`),
-				},
-			},
-		},
-		{
-			name:  "function name with whitespace",
-			tools: []api.Tool{},
-			rawToolCall: `  get-weather
-<arg_key>city</arg_key>
-<arg_value>Paris</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "get-weather",
-					Arguments: args(`{"city": "Paris"}`),
-				},
-			},
-		},
-		{
-			name:  "values with special characters",
-			tools: []api.Tool{},
-			rawToolCall: `execute-command
-<arg_key>command</arg_key>
-<arg_value>ls && echo "done"</arg_value>
-<arg_key>message</arg_key>
-<arg_value>a < b and c > d</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "execute-command",
-					Arguments: args(`{"command": "ls && echo \"done\"", "message": "a < b and c > d"}`),
-				},
-			},
-		},
-		{
-			name:  "unicode in function names and values",
-			tools: []api.Tool{},
-			rawToolCall: `获取天气
-<arg_key>城市</arg_key>
-<arg_value>北京</arg_value>
-<arg_key>message</arg_key>
-<arg_value>Hello! 你好! 🌟</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "获取天气",
-					Arguments: args(`{"message": "Hello! 你好! 🌟", "城市": "北京"}`),
-				},
-			},
-		},
-		{
-			name:  "empty value",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>param1</arg_key>
-<arg_value></arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"param1": ""}`),
-				},
-			},
-		},
-		{
-			name:  "special chars in arg_key names",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>param<1></arg_key>
-<arg_value>value1</arg_value>
-<arg_key>a&b</arg_key>
-<arg_value>value2</arg_value>
-<arg_key>x>y</arg_key>
-<arg_value>value3</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"a&b": "value2", "param<1>": "value1", "x>y": "value3"}`),
-				},
-			},
-		},
-		{
-			name:  "multiple consecutive ampersands",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>param</arg_key>
-<arg_value>test &&&& more</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"param": "test &&&& more"}`),
-				},
-			},
-		},
-		{
-			name:  "mixed special chars together",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>param</arg_key>
-<arg_value><>&<>&</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"param": "<>&<>&"}`),
-				},
-			},
-		},
-		{
-			name:  "newlines and tabs in parameter values",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>multiline</arg_key>
-<arg_value>line1
-	indented line2
-line3</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"multiline": "line1\n\tindented line2\nline3"}`),
-				},
-			},
-		},
-		{
-			name:  "single and double quotes in values",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>quotes</arg_key>
-<arg_value>She said "Hello's there!"</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"quotes": "She said \"Hello's there!\""}`),
-				},
-			},
-		},
-		{
-			name:  "CDATA-like content that should be treated as text",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>cdata</arg_key>
-<arg_value><![CDATA[not actual cdata]]></arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"cdata": "<![CDATA[not actual cdata]]>"}`),
-				},
-			},
-		},
-		{
-			name:  "all special XML entities",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>entities</arg_key>
-<arg_value>&lt;&gt;&amp;&apos;&quot;</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"entities": "&lt;&gt;&amp;&apos;&quot;"}`),
-				},
-			},
-		},
-		{
-			name:  "order preservation with multiple parameters",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>first</arg_key>
-<arg_value>value1</arg_value>
-<arg_key>second</arg_key>
-<arg_value>value2</arg_value>
-<arg_key>third</arg_key>
-<arg_value>value3</arg_value>
-<arg_key>fourth</arg_key>
-<arg_value>value4</arg_value>
-<arg_key>fifth</arg_key>
-<arg_value>value5</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test-function",
-					Arguments: args(`{"fifth": "value5", "first": "value1", "fourth": "value4", "second": "value2", "third": "value3"}`),
-				},
-			},
-		},
-		{
-			name:  "order preservation with identical key names but different positions",
-			tools: []api.Tool{},
-			rawToolCall: `test-function
-<arg_key>param</arg_key>
-<arg_value>first occurrence</arg_value>
-<arg_key>other</arg_key>
-<arg_value>middle</arg_value>
-<arg_key>param</arg_key>
-<arg_value>second occurrence</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name: "test-function",
-					// Later occurrence should overwrite earlier one
-					Arguments: args(`{"other": "middle", "param": "second occurrence"}`),
-				},
-			},
-		},
-		{
-			name: "array with mixed types",
-			tools: []api.Tool{
-				tool("process", map[string]api.ToolProperty{
-					"items": {Type: api.PropertyType{"array"}},
-				}),
-			},
-			rawToolCall: `process
-<arg_key>items</arg_key>
-<arg_value>[1, "hello", true, null]</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "process",
-					Arguments: args(`{"items": [1, "hello", true, null]}`),
-				},
-			},
-		},
-		{
-			name: "empty array",
-			tools: []api.Tool{
-				tool("test", map[string]api.ToolProperty{
-					"tags": {Type: api.PropertyType{"array"}},
-				}),
-			},
-			rawToolCall: `test
-<arg_key>tags</arg_key>
-<arg_value>[]</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "test",
-					Arguments: args(`{"tags": []}`),
-				},
-			},
-		},
-		{
-			name: "anyOf array or string - with array of objects",
-			tools: []api.Tool{
-				tool("TodoWrite", map[string]api.ToolProperty{
-					"todos": {AnyOf: []api.ToolProperty{{Type: api.PropertyType{"array"}}, {Type: api.PropertyType{"string"}}}},
-				}),
-			},
-			// <tool_call>TodoWrite
-			// <arg_key>todos</arg_key>
-			// <arg_value>[{"content": "Set up HTML file and basic structure", "id": "1", "priority": "high", "status": "pending"}, {"content": "Create 3D scene with Three.js", "id": "2", "priority": "high", "status": "pending"}, {"content": "Implement terrain generation with blocks", "id": "3", "priority": "high", "status": "pending"}, {"content": "Add player controls (movement, camera)", "id": "4", "priority": "high", "status": "pending"}, {"content": "Implement block placement/destruction", "id": "5", "priority": "medium", "status": "pending"}, {"content": "Add lighting and textures", "id": "6", "priority": "medium", "status": "pending"}, {"content": "Test and optimize performance", "id": "7", "priority": "low", "status": "pending"}]</arg_value>
-			// </tool_call>
-			rawToolCall: `TodoWrite
-<arg_key>todos</arg_key>
-<arg_value>[{"content": "task 1", "status": "pending", "priority": "high", "id": "1"}, {"content": "task 2", "status": "completed", "priority": "low", "id": "2"}]</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "TodoWrite",
-					Arguments: args(`{"todos": [{"content": "task 1", "id": "1", "priority": "high", "status": "pending"}, {"content": "task 2", "id": "2", "priority": "low", "status": "completed"}]}`),
-				},
-			},
-		},
-		{
-			name: "anyOf array or string - with plain string",
-			tools: []api.Tool{
-				tool("TodoWrite", map[string]api.ToolProperty{
-					"todos": {Type: api.PropertyType{"array", "string"}},
-				}),
-			},
-			rawToolCall: `TodoWrite
-<arg_key>todos</arg_key>
-<arg_value>Error: could not load todos</arg_value>`,
-			wantToolCall: api.ToolCall{
-				Function: api.ToolCallFunction{
-					Name:      "TodoWrite",
-					Arguments: args(`{"todos": "Error: could not load todos"}`),
-				},
-			},
-		},
-	}
-
-	for i, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			gotToolCall, err := parseGLM46ToolCall(glm46EventRawToolCall{raw: tc.rawToolCall}, tc.tools)
-			if err != nil {
-				t.Errorf("case %d (%s): %v", i, tc.name, err)
-			}
-			if !toolCallEqual(gotToolCall, tc.wantToolCall) {
-				t.Errorf("case %d (%s): got tool call %#v, want %#v", i, tc.name, gotToolCall, tc.wantToolCall)
-			}
-		})
-	}
-}
--- a/model/parsers/glm47.go
+++ b/model/parsers/glm47.go
@@ -1,20 +0,0 @@
-package parsers
-
-import "github.com/ollama/ollama/api"
-
-// GLM47Parser extends GLM46Parser with thinking-aware initialization.
-// GLM-4.7's prompt ends with <think> when thinking is enabled, so the parser
-// must start in CollectingThinking state (the model outputs thinking content directly).
-type GLM47Parser struct {
-	GLM46Parser
-}
-
-func (p *GLM47Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.tools = tools
-	// When thinking is enabled (nil or true), the prompt ends with <think>,
-	// so model output starts directly with thinking content (no opening tag).
-	if thinkValue == nil || thinkValue.Bool() {
-		p.state = glm46ParserState_CollectingThinking
-	}
-	return tools
-}
--- a/model/parsers/glm47_test.go
+++ b/model/parsers/glm47_test.go
@@ -1,99 +0,0 @@
-package parsers
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestGLM47ParserAdd(t *testing.T) {
-	parser := GLM47Parser{}
-	parser.Init([]api.Tool{
-		tool("calculate", map[string]api.ToolProperty{
-			"count":   {Type: api.PropertyType{"integer"}},
-			"enabled": {Type: api.PropertyType{"boolean"}},
-		}),
-	}, nil, nil)
-
-	// When thinking is enabled (thinkValue nil), the prompt ends with <think>,
-	// so the model output does NOT include the opening <think> tag.
-	content, thinking, calls, err := parser.Add("plan</think>Answer<tool_call>calculate<arg_key>count</arg_key><arg_value>3</arg_value><arg_key>enabled</arg_key><arg_value>true</arg_value></tool_call>", true)
-	if err != nil {
-		t.Fatalf("parse failed: %v", err)
-	}
-	if thinking != "plan" {
-		t.Fatalf("expected thinking 'plan', got %q", thinking)
-	}
-	if content != "Answer" {
-		t.Fatalf("expected content 'Answer', got %q", content)
-	}
-	if len(calls) != 1 {
-		t.Fatalf("expected 1 tool call, got %d", len(calls))
-	}
-	expectedArgs := args(`{"count": 3, "enabled": true}`)
-	if !toolCallEqual(api.ToolCall{Function: api.ToolCallFunction{Arguments: calls[0].Function.Arguments}}, api.ToolCall{Function: api.ToolCallFunction{Arguments: expectedArgs}}) {
-		t.Fatalf("expected args %#v, got %#v", expectedArgs.ToMap(), calls[0].Function.Arguments.ToMap())
-	}
-}
-
-func TestGLM47ParserNoThinkingContent(t *testing.T) {
-	parser := GLM47Parser{}
-	parser.Init(nil, nil, nil)
-
-	// When thinking is enabled but model has no thinking to output,
-	// it should output </think> immediately followed by content.
-	content, thinking, calls, err := parser.Add("</think>Plain answer", true)
-	if err != nil {
-		t.Fatalf("parse failed: %v", err)
-	}
-	if thinking != "" {
-		t.Fatalf("expected empty thinking, got %q", thinking)
-	}
-	if content != "Plain answer" {
-		t.Fatalf("expected content 'Plain answer', got %q", content)
-	}
-	if len(calls) != 0 {
-		t.Fatalf("expected no tool calls, got %d", len(calls))
-	}
-}
-
-func TestGLM47ParserThinkingDisabled(t *testing.T) {
-	parser := GLM47Parser{}
-	// When thinking is disabled, parser stays in LookingForThinkingOpen state
-	parser.Init(nil, nil, &api.ThinkValue{Value: false})
-
-	// Model outputs plain content (prompt ended with </think>)
-	content, thinking, calls, err := parser.Add("Plain answer", true)
-	if err != nil {
-		t.Fatalf("parse failed: %v", err)
-	}
-	if thinking != "" {
-		t.Fatalf("expected empty thinking, got %q", thinking)
-	}
-	if content != "Plain answer" {
-		t.Fatalf("expected content 'Plain answer', got %q", content)
-	}
-	if len(calls) != 0 {
-		t.Fatalf("expected no tool calls, got %d", len(calls))
-	}
-}
-
-func TestGLM47ParserToolCallEscaping(t *testing.T) {
-	toolCall, err := parseGLM46ToolCall(glm46EventRawToolCall{raw: `exec
-<arg_key>expr</arg_key>
-<arg_value>a < b && c > d</arg_value>`}, nil)
-	if err != nil {
-		t.Fatalf("parse failed: %v", err)
-	}
-
-	expected := api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name:      "exec",
-			Arguments: args(`{"expr": "a < b && c > d"}`),
-		},
-	}
-	if !reflect.DeepEqual(toolCall, expected) {
-		t.Fatalf("expected %#v, got %#v", expected, toolCall)
-	}
-}
--- a/model/parsers/lfm2.go
+++ b/model/parsers/lfm2.go
@@ -1,498 +0,0 @@
-package parsers
-
-import (
-	"encoding/json"
-	"errors"
-	"log/slog"
-	"strconv"
-	"strings"
-	"unicode"
-
-	"github.com/ollama/ollama/api"
-)
-
-type LFM2ParserState int
-
-const (
-	LFM2CollectingThinking LFM2ParserState = iota
-	LFM2CollectingContent
-	LFM2CollectingToolCalls
-)
-
-const (
-	lfm2ThinkingOpenTag  = "<think>"
-	lfm2ThinkingCloseTag = "</think>"
-	lfm2ToolCallStartTag = "<|tool_call_start|>"
-	lfm2ToolCallEndTag   = "<|tool_call_end|>"
-)
-
-type LFM2Parser struct {
-	state                    LFM2ParserState
-	buffer                   strings.Builder
-	hasThinkingSupport       bool
-	needsThinkingLeadingTrim bool // trim leading whitespace after <think> tag
-	needsContentLeadingTrim  bool // trim leading whitespace after </think> tag
-}
-
-func (p *LFM2Parser) HasToolSupport() bool {
-	return true
-}
-
-func (p *LFM2Parser) HasThinkingSupport() bool {
-	return p.hasThinkingSupport
-}
-
-func (p *LFM2Parser) setInitialState(lastMessage *api.Message, thinkValue *api.ThinkValue) {
-	prefill := lastMessage != nil && lastMessage.Role == "assistant"
-
-	// Check both model capability AND request preference
-	thinkingEnabled := p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool())
-
-	if !thinkingEnabled {
-		p.state = LFM2CollectingContent
-		return
-	}
-
-	if prefill && lastMessage.Content != "" {
-		p.state = LFM2CollectingContent
-		return
-	}
-
-	p.state = LFM2CollectingThinking
-	p.needsThinkingLeadingTrim = true
-}
-
-func (p *LFM2Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.setInitialState(lastMessage, thinkValue)
-	return tools
-}
-
-type lfm2Event interface {
-	isLFM2Event()
-}
-
-type lfm2EventThinkingContent struct {
-	content string
-}
-
-type lfm2EventContent struct {
-	content string
-}
-
-type lfm2EventToolCall struct {
-	toolCall api.ToolCall
-}
-
-func (lfm2EventThinkingContent) isLFM2Event() {}
-func (lfm2EventContent) isLFM2Event()         {}
-func (lfm2EventToolCall) isLFM2Event()        {}
-
-func (p *LFM2Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
-	events := p.parseEvents()
-
-	var toolCalls []api.ToolCall
-	var contentSb strings.Builder
-	var thinkingSb strings.Builder
-	for _, event := range events {
-		switch event := event.(type) {
-		case lfm2EventToolCall:
-			toolCalls = append(toolCalls, event.toolCall)
-		case lfm2EventThinkingContent:
-			thinkingSb.WriteString(event.content)
-		case lfm2EventContent:
-			contentSb.WriteString(event.content)
-		}
-	}
-
-	return contentSb.String(), thinkingSb.String(), toolCalls, nil
-}
-
-func (p *LFM2Parser) parseEvents() []lfm2Event {
-	var all []lfm2Event
-
-	keepLooping := true
-	for keepLooping {
-		var events []lfm2Event
-		events, keepLooping = p.eat()
-		if len(events) > 0 {
-			all = append(all, events...)
-		}
-	}
-
-	return all
-}
-
-func (p *LFM2Parser) eat() ([]lfm2Event, bool) {
-	var events []lfm2Event
-	bufStr := p.buffer.String()
-	if bufStr == "" {
-		return events, false
-	}
-
-	switch p.state {
-	case LFM2CollectingThinking:
-		// Strip opening <think> tag if present
-		if strings.HasPrefix(bufStr, lfm2ThinkingOpenTag) {
-			bufStr = bufStr[len(lfm2ThinkingOpenTag):]
-			p.needsThinkingLeadingTrim = true
-			p.buffer.Reset()
-			p.buffer.WriteString(bufStr)
-		}
-
-		// Trim leading whitespace after <think> tag (may span multiple chunks)
-		if p.needsThinkingLeadingTrim {
-			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
-				bufStr = trimmed
-				p.buffer.Reset()
-				p.buffer.WriteString(bufStr)
-			}
-			// Clear flag once we have non-whitespace content or buffer is empty
-			if len(bufStr) > 0 {
-				p.needsThinkingLeadingTrim = false
-			}
-		}
-
-		if strings.Contains(bufStr, lfm2ThinkingCloseTag) { // thinking[</think>] -> content
-			split := strings.SplitN(bufStr, lfm2ThinkingCloseTag, 2)
-			thinking := split[0]
-			thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
-
-			remaining := split[1]
-			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = LFM2CollectingContent
-			p.needsThinkingLeadingTrim = false
-			// Set flag to trim any additional whitespace that may arrive in later chunks
-			p.needsContentLeadingTrim = len(remaining) == 0
-
-			if len(thinking) > 0 {
-				events = append(events, lfm2EventThinkingContent{content: thinking})
-			}
-			return events, true
-		} else if overlapLen := overlap(bufStr, lfm2ThinkingCloseTag); overlapLen > 0 { // partial </think>
-			beforePartialTag := bufStr[:len(bufStr)-overlapLen]
-			trailingLen := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingLen
-
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, lfm2EventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		} else { // otherwise its thinking content
-			whitespaceLen := trailingWhitespaceLen(bufStr)
-			ambiguousStart := len(bufStr) - whitespaceLen
-
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, lfm2EventThinkingContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-	case LFM2CollectingContent:
-		// Trim leading whitespace after </think> tag (may span multiple chunks)
-		if p.needsContentLeadingTrim {
-			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
-				bufStr = trimmed
-				p.buffer.Reset()
-				p.buffer.WriteString(bufStr)
-			}
-			// Clear flag once we have non-whitespace content
-			if len(bufStr) > 0 {
-				p.needsContentLeadingTrim = false
-			}
-		}
-
-		if strings.Contains(bufStr, lfm2ToolCallStartTag) { // content[<|tool_call_start|>] -> tool calls
-			split := strings.SplitN(bufStr, lfm2ToolCallStartTag, 2)
-			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			remaining := split[1]
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = LFM2CollectingToolCalls
-
-			if len(contentBefore) > 0 {
-				events = append(events, lfm2EventContent{content: contentBefore})
-			}
-			return events, true
-		} else { // otherwise its content
-			p.buffer.Reset()
-			if len(bufStr) > 0 {
-				events = append(events, lfm2EventContent{content: bufStr})
-			}
-			return events, false
-		}
-
-	case LFM2CollectingToolCalls:
-		// Look for complete tool call JSON between tags
-		if idx := strings.Index(bufStr, lfm2ToolCallEndTag); idx != -1 {
-			toolCallContent := bufStr[:idx]
-
-			if toolCalls, err := p.parseToolCallsContent(toolCallContent); err == nil && len(toolCalls) > 0 {
-				remaining := bufStr[idx+len(lfm2ToolCallEndTag):]
-
-				// Check if there's another tool call
-				if strings.HasPrefix(remaining, lfm2ToolCallStartTag) {
-					remaining = remaining[len(lfm2ToolCallStartTag):]
-				} else {
-					// No more tool calls, go back to content
-					remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
-					p.state = LFM2CollectingContent
-				}
-
-				p.buffer.Reset()
-				p.buffer.WriteString(remaining)
-
-				for _, tc := range toolCalls {
-					events = append(events, lfm2EventToolCall{toolCall: tc})
-				}
-				return events, true
-			} else if err != nil {
-				slog.Warn("lfm2 tool call parsing failed", "error", err, "content", toolCallContent)
-			}
-		}
-
-		return events, false
-	}
-
-	return events, false
-}
-
-// parseToolCallsContent parses one or more tool calls from content
-// Supports JSON format and Python-style format including multiple calls: [func1(...),func2(...)]
-func (p *LFM2Parser) parseToolCallsContent(content string) ([]api.ToolCall, error) {
-	content = strings.TrimSpace(content)
-
-	// Try JSON format first: {"name": "func", "arguments": {...}}
-	var parsed struct {
-		Name      string          `json:"name"`
-		Arguments json.RawMessage `json:"arguments"`
-	}
-
-	if err := json.Unmarshal([]byte(content), &parsed); err == nil && parsed.Name != "" {
-		var args api.ToolCallFunctionArguments
-		if len(parsed.Arguments) > 0 {
-			if err := json.Unmarshal(parsed.Arguments, &args); err != nil {
-				return nil, err
-			}
-		} else {
-			args = api.NewToolCallFunctionArguments()
-		}
-
-		return []api.ToolCall{{
-			Function: api.ToolCallFunction{
-				Name:      parsed.Name,
-				Arguments: args,
-			},
-		}}, nil
-	}
-
-	// Try Python-style format: [func(arg1='val1'),func2(arg2='val2')] or func(arg1='val1')
-	return p.parsePythonStyleToolCalls(content)
-}
-
-// parsePythonStyleToolCalls parses one or more Python-style tool calls
-// Examples: [bash(command='ls'),bash(command='pwd')] or bash(command='ls')
-func (p *LFM2Parser) parsePythonStyleToolCalls(content string) ([]api.ToolCall, error) {
-	content = strings.TrimSpace(content)
-
-	// Strip outer brackets if present: [func(...)] -> func(...)
-	if strings.HasPrefix(content, "[") && strings.HasSuffix(content, "]") {
-		content = content[1 : len(content)-1]
-	}
-
-	var toolCalls []api.ToolCall
-
-	// Parse multiple function calls separated by commas at the top level
-	for len(content) > 0 {
-		content = strings.TrimSpace(content)
-		if content == "" {
-			break
-		}
-
-		// Skip leading comma from previous iteration
-		if strings.HasPrefix(content, ",") {
-			content = strings.TrimSpace(content[1:])
-			if content == "" {
-				break
-			}
-		}
-
-		// Find function name
-		parenIdx := strings.Index(content, "(")
-		if parenIdx == -1 {
-			return nil, errors.New("invalid tool call: no opening parenthesis")
-		}
-
-		funcName := strings.TrimSpace(content[:parenIdx])
-		if funcName == "" {
-			return nil, errors.New("invalid tool call: empty function name")
-		}
-
-		// Find matching closing parenthesis
-		closeIdx := findMatchingParen(content, parenIdx)
-		if closeIdx == -1 {
-			return nil, errors.New("invalid tool call: no matching closing parenthesis")
-		}
-
-		argsStr := content[parenIdx+1 : closeIdx]
-		args := api.NewToolCallFunctionArguments()
-
-		if argsStr != "" {
-			if err := parsePythonArgs(argsStr, &args); err != nil {
-				return nil, err
-			}
-		}
-
-		toolCalls = append(toolCalls, api.ToolCall{
-			Function: api.ToolCallFunction{
-				Name:      funcName,
-				Arguments: args,
-			},
-		})
-
-		// Move past this function call
-		content = content[closeIdx+1:]
-	}
-
-	if len(toolCalls) == 0 {
-		return nil, errors.New("no tool calls found")
-	}
-
-	return toolCalls, nil
-}
-
-// findMatchingParen finds the index of the closing parenthesis matching the one at openIdx
-// Returns -1 if not found. Handles nested parentheses and quoted strings.
-func findMatchingParen(s string, openIdx int) int {
-	depth := 1
-	i := openIdx + 1
-	for i < len(s) && depth > 0 {
-		switch s[i] {
-		case '(':
-			depth++
-		case ')':
-			depth--
-			if depth == 0 {
-				return i
-			}
-		case '\'', '"':
-			// Skip quoted string
-			quote := s[i]
-			i++
-			for i < len(s) && s[i] != quote {
-				if s[i] == '\\' && i+1 < len(s) {
-					i++ // skip escaped char
-				}
-				i++
-			}
-		}
-		i++
-	}
-	return -1
-}
-
-// parseToolCallContent parses a single tool call (for backward compatibility with tests)
-func (p *LFM2Parser) parseToolCallContent(content string) (api.ToolCall, error) {
-	calls, err := p.parseToolCallsContent(content)
-	if err != nil {
-		return api.ToolCall{}, err
-	}
-	if len(calls) == 0 {
-		return api.ToolCall{}, errors.New("no tool call found")
-	}
-	return calls[0], nil
-}
-
-// parsePythonArgs parses Python-style keyword arguments: key='value', key2="value2"
-func parsePythonArgs(argsStr string, args *api.ToolCallFunctionArguments) error {
-	// Simple state machine to parse key='value' pairs
-	// Handles: command='ls', flag="-la", count=42, enabled=true
-	var key string
-	i := 0
-
-	for i < len(argsStr) {
-		// Skip whitespace
-		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
-			i++
-		}
-		if i >= len(argsStr) {
-			break
-		}
-
-		// Parse key
-		keyStart := i
-		for i < len(argsStr) && argsStr[i] != '=' && argsStr[i] != ',' {
-			i++
-		}
-		if i >= len(argsStr) || argsStr[i] != '=' {
-			return errors.New("invalid argument: expected '='")
-		}
-		key = strings.TrimSpace(argsStr[keyStart:i])
-		i++ // skip '='
-
-		// Skip whitespace after =
-		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t') {
-			i++
-		}
-
-		// Parse value
-		var value string
-		if i < len(argsStr) && (argsStr[i] == '\'' || argsStr[i] == '"') {
-			// Quoted string
-			quote := argsStr[i]
-			i++
-			valueStart := i
-			for i < len(argsStr) && argsStr[i] != quote {
-				if argsStr[i] == '\\' && i+1 < len(argsStr) {
-					i += 2 // skip escaped char
-				} else {
-					i++
-				}
-			}
-			value = argsStr[valueStart:i]
-			if i < len(argsStr) {
-				i++ // skip closing quote
-			}
-			args.Set(key, value)
-		} else {
-			// Unquoted value (number, bool, etc)
-			valueStart := i
-			for i < len(argsStr) && argsStr[i] != ',' {
-				i++
-			}
-			value = strings.TrimSpace(argsStr[valueStart:i])
-
-			// Try to parse as number or bool
-			if v, err := strconv.ParseInt(value, 10, 64); err == nil {
-				args.Set(key, v)
-			} else if v, err := strconv.ParseFloat(value, 64); err == nil {
-				args.Set(key, v)
-			} else if value == "true" {
-				args.Set(key, true)
-			} else if value == "false" {
-				args.Set(key, false)
-			} else {
-				args.Set(key, value)
-			}
-		}
-
-		// Skip comma and whitespace
-		for i < len(argsStr) && (argsStr[i] == ',' || argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
-			i++
-		}
-	}
-
-	return nil
-}
--- a/model/parsers/lfm2_test.go
+++ b/model/parsers/lfm2_test.go
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -68,12 +68,6 @@ func ParserForName(name string) Parser {
 		return &Nemotron3NanoParser{}
 	case "functiongemma":
 		return &FunctionGemmaParser{}
-	case "glm-4.7":
-		return &GLM47Parser{}
-	case "lfm2":
-		return &LFM2Parser{hasThinkingSupport: false}
-	case "lfm2-thinking":
-		return &LFM2Parser{hasThinkingSupport: true}
 	default:
 		return nil
 	}
--- a/model/parsers/testhelpers_test.go
+++ b/model/parsers/testhelpers_test.go
@@ -96,11 +96,3 @@ func testArgs(m map[string]any) api.ToolCallFunctionArguments {
 	}
 	return args
 }
-
-func args(s string) api.ToolCallFunctionArguments {
-	var result api.ToolCallFunctionArguments
-	if err := json.Unmarshal([]byte(s), &result); err != nil {
-		panic("invalid JSON in args(): " + err.Error())
-	}
-	return result
-}
--- a/model/renderers/glm46.go
+++ b/model/renderers/glm46.go
@@ -1,110 +0,0 @@
-package renderers
-
-import (
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-type GLM46Renderer struct{}
-
-func (r *GLM46Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
-	var sb strings.Builder
-
-	sb.WriteString("[gMASK]<sop>")
-
-	var lastUserIndex int
-	for i, message := range messages {
-		if message.Role == "user" {
-			lastUserIndex = i
-		}
-	}
-
-	if len(tools) > 0 {
-		sb.WriteString("<|system|>\n")
-		sb.WriteString("# Tools\n\n")
-		sb.WriteString("You may call one or more functions to assist with the user query.\n\n")
-		sb.WriteString("You are provided with function signatures within <tools></tools> XML tags:\n")
-		sb.WriteString("<tools>\n")
-		for _, tool := range tools {
-			d, _ := json.Marshal(tool)
-			sb.WriteString(string(d) + "\n")
-		}
-		sb.WriteString("</tools>\n\n")
-		sb.WriteString("For each function call, output the function name and arguments within the following XML format:\n")
-		sb.WriteString("<tool_call>{function-name}\n")
-		sb.WriteString("<arg_key>{arg-key-1}</arg_key>\n")
-		sb.WriteString("<arg_value>{arg-value-1}</arg_value>\n")
-		sb.WriteString("<arg_key>{arg-key-2}</arg_key>\n")
-		sb.WriteString("<arg_value>{arg-value-2}</arg_value>\n")
-		sb.WriteString("...\n")
-		sb.WriteString("</tool_call>")
-	}
-
-	for i, message := range messages {
-		switch message.Role {
-		case "user":
-			sb.WriteString("<|user|>\n")
-			sb.WriteString(message.Content)
-			if thinkValue != nil && !thinkValue.Bool() && !strings.HasSuffix(message.Content, "/nothink") {
-				sb.WriteString("/nothink")
-			}
-		case "assistant":
-			sb.WriteString("<|assistant|>")
-			if i > lastUserIndex {
-				if message.Thinking != "" {
-					sb.WriteString("\n<think>" + message.Thinking + "</think>")
-				} else {
-					sb.WriteString("\n<think></think>")
-				}
-			}
-			if message.Content != "" {
-				sb.WriteString("\n" + message.Content)
-			}
-			if len(message.ToolCalls) > 0 {
-				for _, toolCall := range message.ToolCalls {
-					sb.WriteString("\n<tool_call>" + toolCall.Function.Name + "\n")
-					for key, value := range toolCall.Function.Arguments.All() {
-						sb.WriteString("<arg_key>" + key + "</arg_key>\n")
-
-						var valueStr string
-						if str, ok := value.(string); ok {
-							valueStr = str
-						} else {
-							jsonBytes, err := json.Marshal(value)
-							if err != nil {
-								valueStr = fmt.Sprintf("%v", value)
-							} else {
-								valueStr = string(jsonBytes)
-							}
-						}
-
-						sb.WriteString("<arg_value>" + valueStr + "</arg_value>\n")
-					}
-
-					sb.WriteString("</tool_call>")
-				}
-			}
-		case "tool":
-			if i == 0 || messages[i-1].Role != "tool" {
-				sb.WriteString("<|observation|>")
-			}
-			sb.WriteString("\n<tool_response>\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("\n</tool_response>")
-		case "system":
-			sb.WriteString("<|system|>\n")
-			sb.WriteString(message.Content)
-		}
-	}
-
-	// Add generation prompt
-	sb.WriteString("<|assistant|>")
-	if thinkValue != nil && !thinkValue.Bool() {
-		sb.WriteString("\n<think></think>\n")
-	}
-
-	return sb.String(), nil
-}
--- a/model/renderers/glm46_test.go
+++ b/model/renderers/glm46_test.go
@@ -1,223 +0,0 @@
-package renderers
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/ollama/ollama/api"
-)
-
-func TestGLM46Renderer(t *testing.T) {
-	tests := []struct {
-		name       string
-		messages   []api.Message
-		tools      []api.Tool
-		thinkValue *api.ThinkValue
-		expected   string
-		skip       string
-	}{
-		{
-			name: "basic",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello, how are you?"},
-			},
-			expected: `[gMASK]<sop><|user|>
-Hello, how are you?<|assistant|>`,
-		},
-		{
-			name: "basic with system message",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "Hello, how are you?"},
-			},
-			expected: `[gMASK]<sop><|system|>
-You are a helpful assistant.<|user|>
-Hello, how are you?<|assistant|>`,
-		},
-		{
-			name: "basic with user assistant user",
-			messages: []api.Message{
-				{Role: "user", Content: "What is the capital of France?"},
-				{Role: "assistant", Thinking: "Let me analyze the request...", Content: "The capital of France is Paris."},
-				{Role: "user", Content: "Fantastic!"},
-			},
-			expected: `[gMASK]<sop><|user|>
-What is the capital of France?<|assistant|>
-The capital of France is Paris.<|user|>
-Fantastic!<|assistant|>`,
-		},
-		{
-			skip: "tool call ordering not guaranteed yet",
-			name: "tools",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant with access to tools."},
-				{Role: "user", Content: "What is the weather like in Tokyo?"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get the current weather in a given location",
-						Parameters: api.ToolFunctionParameters{
-							Type:       "object",
-							Required:   []string{"location"},
-							Properties: propsMap(`{"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}`),
-						},
-					},
-				},
-			},
-			expected: `[gMASK]<sop><|system|>
-# Tools
-
-You may call one or more functions to assist with the user query.
-
-You are provided with function signatures within <tools></tools> XML tags:
-<tools>
-{"type":"function","function":{"name":"get_weather","description":"Get the current weather in a given location","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"","enum":["celsius","fahrenheit"]}}}}}
-</tools>
-
-For each function call, output the function name and arguments within the following XML format:
-<tool_call>{function-name}
-<arg_key>{arg-key-1}</arg_key>
-<arg_value>{arg-value-1}</arg_value>
-<arg_key>{arg-key-2}</arg_key>
-<arg_value>{arg-value-2}</arg_value>
-...
-</tool_call><|system|>
-You are a helpful assistant with access to tools.<|user|>
-What is the weather like in Tokyo?<|assistant|>`,
-		},
-		{
-			skip: "tool call ordering not guaranteed yet",
-			name: "tool calls",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant with access to tools."},
-				{Role: "user", Content: "What is the weather like in Tokyo?"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Tokyo, Japan", "unit": "celsius"}`),
-							},
-						},
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Japan", "unit": "fahrenheit"}`),
-							},
-						},
-					},
-				},
-				{
-					Role:     "tool",
-					Content:  "{\"temperature\": 22, \"weather\": \"partly cloudy\", \"humidity\": 65}",
-					ToolName: "get_weather",
-				},
-				{
-					Role:     "tool",
-					Content:  "{\"temperature\": 68, \"weather\": \"sunny\", \"humidity\": 75}",
-					ToolName: "get_weather",
-				},
-				{
-					Role:    "assistant",
-					Content: "The weather in Tokyo is currently partly cloudy with a temperature of 22°C and 65% humidity. It's a pleasant day with moderate temperatures.",
-				},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get the current weather in a given location",
-						Parameters: api.ToolFunctionParameters{
-							Type:       "object",
-							Required:   []string{"location"},
-							Properties: propsMap(`{"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}`),
-						},
-					},
-				},
-			},
-			expected: `[gMASK]<sop><|system|>
-# Tools
-
-You may call one or more functions to assist with the user query.
-
-You are provided with function signatures within <tools></tools> XML tags:
-<tools>
-{"type":"function","function":{"name":"get_weather","description":"Get the current weather in a given location","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"","enum":["celsius","fahrenheit"]}}}}}
-</tools>
-
-For each function call, output the function name and arguments within the following XML format:
-<tool_call>{function-name}
-<arg_key>{arg-key-1}</arg_key>
-<arg_value>{arg-value-1}</arg_value>
-<arg_key>{arg-key-2}</arg_key>
-<arg_value>{arg-value-2}</arg_value>
-...
-</tool_call><|system|>
-You are a helpful assistant with access to tools.<|user|>
-What is the weather like in Tokyo?<|assistant|>
-<think></think>
-<tool_call>get_weather
-<arg_key>location</arg_key>
-<arg_value>Tokyo, Japan</arg_value>
-<arg_key>unit</arg_key>
-<arg_value>celsius</arg_value>
-</tool_call>
-<tool_call>get_weather
-<arg_key>location</arg_key>
-<arg_value>Japan</arg_value>
-<arg_key>unit</arg_key>
-<arg_value>fahrenheit</arg_value>
-</tool_call><|observation|>
-<tool_response>
-{"temperature": 22, "weather": "partly cloudy", "humidity": 65}
-</tool_response>
-<tool_response>
-{"temperature": 68, "weather": "sunny", "humidity": 75}
-</tool_response><|assistant|>
-<think></think>
-The weather in Tokyo is currently partly cloudy with a temperature of 22°C and 65% humidity. It's a pleasant day with moderate temperatures.<|assistant|>`,
-		},
-		{
-			name: "think true",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello, how are you?"},
-			},
-			thinkValue: &api.ThinkValue{Value: true},
-			expected: `[gMASK]<sop><|user|>
-Hello, how are you?<|assistant|>`,
-		},
-		{
-			name: "think false",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello, how are you?"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected: `[gMASK]<sop><|user|>
-Hello, how are you?/nothink<|assistant|>
-<think></think>
-`,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if tt.skip != "" {
-				t.Skip(tt.skip)
-			}
-			renderer := &GLM46Renderer{}
-			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
-			if err != nil {
-				t.Fatal(err)
-			}
-			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-				t.Logf("Got:\n%s", rendered)
-				t.Logf("Expected:\n%s", tt.expected)
-			}
-		})
-	}
-}
--- a/model/renderers/glm47.go
+++ b/model/renderers/glm47.go
@@ -1,170 +0,0 @@
-package renderers
-
-import (
-	"encoding/json"
-	"fmt"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-// GLM47Renderer renders messages for GLM-4.7 models.
-//
-// GLM-4.7 Thinking Modes (ref: https://docs.z.ai/guides/capabilities/thinking-mode):
-//
-//  1. INTERLEAVED THINKING
-//     The model thinks between tool calls and after receiving tool results.
-//     This enables complex step-by-step reasoning: interpreting each tool output
-//     before deciding what to do next. Thinking blocks are preserved and returned
-//     with tool results to maintain reasoning continuity.
-//
-//  2. PRESERVED THINKING
-//     The model retains reasoning content from previous assistant turns in context.
-//     This preserves reasoning continuity across multi-turn conversations. The
-//     upstream API has a "clear_thinking" parameter to control this:
-//     - clear_thinking=true:  clears reasoning from previous turns (outputs </think>)
-//     - clear_thinking=false: preserves <think>...</think> blocks from previous turns
-//
-//  3. TURN-LEVEL THINKING
-//     Controls whether the model should reason on each turn. The upstream API
-//     uses "enable_thinking" parameter:
-//     - enable_thinking=true:  outputs <think> to start reasoning
-//     - enable_thinking=false: outputs </think> to skip reasoning
-//
-// OLLAMA DEFAULTS:
-//   - Thinking is ENABLED by default (thinkValue=nil or true outputs <think>)
-//   - Thinking is PRESERVED by default (reasoning content from previous turns is always
-//     included in <think>...</think> blocks, equivalent to clear_thinking=false)
-//   - Users can disable thinking per-turn via thinkValue=false
-type GLM47Renderer struct{}
-
-func (r *GLM47Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
-	var sb strings.Builder
-
-	sb.WriteString("[gMASK]<sop>")
-
-	if len(tools) > 0 {
-		sb.WriteString("<|system|>\n")
-		sb.WriteString("# Tools\n\n")
-		sb.WriteString("You may call one or more functions to assist with the user query.\n\n")
-		sb.WriteString("You are provided with function signatures within <tools></tools> XML tags:\n")
-		sb.WriteString("<tools>\n")
-		for _, tool := range tools {
-			d, _ := json.Marshal(tool)
-			sb.WriteString(formatGLM47ToolJSON(d))
-			sb.WriteString("\n")
-		}
-		sb.WriteString("</tools>\n\n")
-		sb.WriteString("For each function call, output the function name and arguments within the following XML format:\n")
-		sb.WriteString("<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>")
-	}
-
-	think := true
-	if thinkValue != nil && !thinkValue.Bool() {
-		think = false
-	}
-
-	for i, message := range messages {
-		switch message.Role {
-		case "user":
-			sb.WriteString("<|user|>")
-			sb.WriteString(message.Content)
-		case "assistant":
-			sb.WriteString("<|assistant|>")
-			if message.Thinking != "" {
-				sb.WriteString("<think>" + message.Thinking + "</think>")
-			} else {
-				sb.WriteString("</think>")
-			}
-			if message.Content != "" {
-				sb.WriteString(message.Content)
-			}
-			if len(message.ToolCalls) > 0 {
-				for _, toolCall := range message.ToolCalls {
-					sb.WriteString("<tool_call>" + toolCall.Function.Name)
-					sb.WriteString(renderGLM47ToolArguments(toolCall.Function.Arguments))
-					sb.WriteString("</tool_call>")
-				}
-			}
-		case "tool":
-			if i == 0 || messages[i-1].Role != "tool" {
-				sb.WriteString("<|observation|>")
-			}
-			sb.WriteString("<tool_response>")
-			sb.WriteString(message.Content)
-			sb.WriteString("</tool_response>")
-		case "system":
-			sb.WriteString("<|system|>")
-			sb.WriteString(message.Content)
-		}
-	}
-
-	sb.WriteString("<|assistant|>")
-	if think {
-		sb.WriteString("<think>")
-	} else {
-		sb.WriteString("</think>")
-	}
-
-	return sb.String(), nil
-}
-
-func renderGLM47ToolArguments(args api.ToolCallFunctionArguments) string {
-	var sb strings.Builder
-	for key, value := range args.All() {
-		sb.WriteString("<arg_key>" + key + "</arg_key>")
-		var valueStr string
-		if str, ok := value.(string); ok {
-			valueStr = str
-		} else {
-			jsonBytes, err := json.Marshal(value)
-			if err != nil {
-				valueStr = fmt.Sprintf("%v", value)
-			} else {
-				valueStr = string(jsonBytes)
-			}
-		}
-
-		sb.WriteString("<arg_value>" + valueStr + "</arg_value>")
-	}
-
-	return sb.String()
-}
-
-func formatGLM47ToolJSON(raw []byte) string {
-	var sb strings.Builder
-	sb.Grow(len(raw) + len(raw)/10)
-
-	inString := false
-	escaped := false
-	for i := range raw {
-		ch := raw[i]
-		sb.WriteByte(ch)
-
-		if inString {
-			if escaped {
-				escaped = false
-				continue
-			}
-			if ch == '\\' {
-				escaped = true
-				continue
-			}
-			if ch == '"' {
-				inString = false
-			}
-			continue
-		}
-
-		if ch == '"' {
-			inString = true
-			continue
-		}
-
-		if ch == ':' || ch == ',' {
-			sb.WriteByte(' ')
-		}
-	}
-
-	return sb.String()
-}
--- a/model/renderers/glm47_test.go
+++ b/model/renderers/glm47_test.go
@@ -1,191 +0,0 @@
-package renderers
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/ollama/ollama/api"
-)
-
-func TestGLM47Renderer(t *testing.T) {
-	tests := []struct {
-		name       string
-		messages   []api.Message
-		tools      []api.Tool
-		thinkValue *api.ThinkValue
-		expected   string
-	}{
-		{
-			name: "basic user message",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello"},
-			},
-			expected: "[gMASK]<sop><|user|>Hello<|assistant|><think>",
-		},
-		{
-			name: "thinking disabled",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "[gMASK]<sop><|user|>Hello<|assistant|></think>",
-		},
-		{
-			name: "system and user",
-			messages: []api.Message{
-				{Role: "system", Content: "You are helpful."},
-				{Role: "user", Content: "Hello"},
-			},
-			expected: "[gMASK]<sop><|system|>You are helpful.<|user|>Hello<|assistant|><think>",
-		},
-		{
-			name: "multi-turn conversation",
-			messages: []api.Message{
-				{Role: "user", Content: "Hi"},
-				{Role: "assistant", Content: "Hello there"},
-				{Role: "user", Content: "How are you?"},
-			},
-			expected: "[gMASK]<sop><|user|>Hi<|assistant|></think>Hello there<|user|>How are you?<|assistant|><think>",
-		},
-		{
-			name: "assistant with reasoning_content",
-			messages: []api.Message{
-				{Role: "user", Content: "Answer with reasoning."},
-				{Role: "assistant", Thinking: "Plan.", Content: "Done."},
-			},
-			expected: "[gMASK]<sop><|user|>Answer with reasoning.<|assistant|><think>Plan.</think>Done.<|assistant|><think>",
-		},
-		{
-			name: "tool call with empty content",
-			messages: []api.Message{
-				{Role: "user", Content: "Weather?"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Tokyo", "unit": "celsius"}`),
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: `{"temperature":22}`},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:       "object",
-							Required:   []string{"location"},
-							Properties: propsMap(`{"location": {"type": "string"}}`),
-						},
-					},
-				},
-			},
-			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Weather?<|assistant|></think><tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value><arg_key>unit</arg_key><arg_value>celsius</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><|assistant|><think>",
-		},
-		{
-			name: "tool call with content",
-			messages: []api.Message{
-				{Role: "user", Content: "Weather?"},
-				{
-					Role:    "assistant",
-					Content: "Let me check",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Tokyo"}`),
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: `{"temperature":22}`},
-				{Role: "assistant", Content: "It is 22C."},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:       "object",
-							Required:   []string{"location"},
-							Properties: propsMap(`{"location": {"type": "string"}}`),
-						},
-					},
-				},
-			},
-			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Weather?<|assistant|></think>Let me check<tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><|assistant|></think>It is 22C.<|assistant|><think>",
-		},
-		{
-			name: "multiple tool calls and responses",
-			messages: []api.Message{
-				{Role: "user", Content: "Compare weather"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Tokyo"}`),
-							},
-						},
-						{
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: args(`{"location": "Paris"}`),
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: `{"temperature":22}`},
-				{Role: "tool", Content: `{"temperature":18}`},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:       "object",
-							Required:   []string{"location"},
-							Properties: propsMap(`{"location": {"type": "string"}}`),
-						},
-					},
-				},
-			},
-			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Compare weather<|assistant|></think><tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value></tool_call><tool_call>get_weather<arg_key>location</arg_key><arg_value>Paris</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><tool_response>{\"temperature\":18}</tool_response><|assistant|><think>",
-		},
-		{
-			name: "preserved thinking in multi-turn",
-			messages: []api.Message{
-				{Role: "user", Content: "Think step by step"},
-				{Role: "assistant", Thinking: "Let me think...", Content: "Here's my answer."},
-				{Role: "user", Content: "Continue"},
-			},
-			expected: "[gMASK]<sop><|user|>Think step by step<|assistant|><think>Let me think...</think>Here's my answer.<|user|>Continue<|assistant|><think>",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			renderer := &GLM47Renderer{}
-			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
-			if err != nil {
-				t.Fatal(err)
-			}
-			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-				t.Logf("Got:\n%s", rendered)
-				t.Logf("Expected:\n%s", tt.expected)
-			}
-		})
-	}
-}
--- a/model/renderers/lfm2.go
+++ b/model/renderers/lfm2.go
@@ -1,144 +0,0 @@
-package renderers
-
-import (
-	"encoding/json"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-type LFM2Renderer struct {
-	IsThinking bool
-}
-
-func (r *LFM2Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
-	var sb strings.Builder
-
-	// Note: BOS token is added by the tokenizer (add_bos_token: true), not the renderer
-
-	// Extract first system message if present (to combine with tools)
-	var firstSystemContent string
-	startIdx := 0
-	if len(messages) > 0 && messages[0].Role == "system" {
-		firstSystemContent = messages[0].Content
-		startIdx = 1
-	}
-
-	// Append tools to first system content
-	if len(tools) > 0 {
-		if firstSystemContent != "" {
-			firstSystemContent += "\n"
-		}
-		firstSystemContent += "List of tools: ["
-		for i, tool := range tools {
-			toolJSON, err := json.Marshal(tool)
-			if err != nil {
-				return "", err
-			}
-			firstSystemContent += string(toolJSON)
-			if i < len(tools)-1 {
-				firstSystemContent += ", "
-			}
-		}
-		firstSystemContent += "]"
-	}
-
-	// Output first system block if it has content
-	if firstSystemContent != "" {
-		sb.WriteString("<|im_start|>system\n")
-		sb.WriteString(firstSystemContent)
-		sb.WriteString("<|im_end|>\n")
-	}
-
-	// Find the index of the last assistant message for thinking stripping
-	lastAssistantIndex := -1
-	for i := len(messages) - 1; i >= startIdx; i-- {
-		if messages[i].Role == "assistant" {
-			lastAssistantIndex = i
-			break
-		}
-	}
-
-	// Track whether we need to add generation prompt
-	needsGenerationPrompt := len(messages) > 0
-
-	for i := startIdx; i < len(messages); i++ {
-		message := messages[i]
-		switch message.Role {
-		case "system":
-			// Additional system messages (after the first) are rendered normally
-			sb.WriteString("<|im_start|>system\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("<|im_end|>\n")
-
-		case "user":
-			sb.WriteString("<|im_start|>user\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("<|im_end|>\n")
-			needsGenerationPrompt = true
-
-		case "assistant":
-			sb.WriteString("<|im_start|>assistant\n")
-
-			// Check if this is the last assistant message
-			isLastAssistant := i == lastAssistantIndex
-
-			// Process content (may need thinking stripped)
-			content := message.Content
-
-			// Handle thinking tags in assistant content
-			keepPastThinking := r.IsThinking && (thinkValue != nil && thinkValue.Bool())
-			if strings.Contains(content, "</think>") {
-				parts := strings.SplitN(content, "</think>", 2)
-				if len(parts) > 1 {
-					if !isLastAssistant && !keepPastThinking {
-						// Strip thinking entirely for past assistant messages
-						content = strings.TrimSpace(parts[1])
-					} else {
-						// Preserve thinking but trim whitespace after </think>
-						content = parts[0] + "</think>" + strings.TrimLeft(parts[1], " \t\n\r")
-					}
-				}
-			}
-
-			if len(message.ToolCalls) > 0 {
-				// Assistant with tool calls - write content first (if any after stripping)
-				if content != "" {
-					sb.WriteString(content)
-				}
-
-				for _, toolCall := range message.ToolCalls {
-					sb.WriteString("<|tool_call_start|>")
-					toolCallJSON := map[string]any{
-						"name":      toolCall.Function.Name,
-						"arguments": toolCall.Function.Arguments,
-					}
-					callJSON, _ := json.Marshal(toolCallJSON)
-					sb.WriteString(string(callJSON))
-					sb.WriteString("<|tool_call_end|>")
-				}
-			} else {
-				sb.WriteString(content)
-			}
-
-			sb.WriteString("<|im_end|>\n")
-			needsGenerationPrompt = true // Always add gen prompt after assistant when add_generation_prompt=true
-
-		case "tool":
-			// Tool responses are rendered as plain messages per the chat template
-			sb.WriteString("<|im_start|>tool\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("<|im_end|>\n")
-			needsGenerationPrompt = true
-		}
-	}
-
-	// Add generation prompt
-	if needsGenerationPrompt {
-		sb.WriteString("<|im_start|>assistant\n")
-		// Note: Model is a "thinking-only" model - it will output <think> itself
-		// We don't add <think> tag to the prompt
-	}
-
-	return sb.String(), nil
-}
--- a/model/renderers/lfm2_test.go
+++ b/model/renderers/lfm2_test.go
@@ -1,427 +0,0 @@
-package renderers
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestLFM2Renderer(t *testing.T) {
-	tests := []struct {
-		name       string
-		messages   []api.Message
-		tools      []api.Tool
-		thinkValue *api.ThinkValue
-		expected   string
-	}{
-		{
-			name: "basic user message",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello!"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "basic with system message",
-			messages: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "Hello!"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "multiple system messages rendered separately",
-			messages: []api.Message{
-				{Role: "system", Content: "First instruction."},
-				{Role: "system", Content: "Second instruction."},
-				{Role: "user", Content: "Hello!"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>system\nFirst instruction.<|im_end|>\n<|im_start|>system\nSecond instruction.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "multi-turn conversation",
-			messages: []api.Message{
-				{Role: "user", Content: "What is 2+2?"},
-				{Role: "assistant", Content: "The answer is 4."},
-				{Role: "user", Content: "Thanks!"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\nThe answer is 4.<|im_end|>\n<|im_start|>user\nThanks!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "only system message",
-			messages: []api.Message{
-				{Role: "system", Content: "You are helpful."},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>system\nYou are helpful.<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// When assistant is the LAST assistant, thinking is preserved (even with keep_past_thinking=false)
-			name: "user-assistant-user: last assistant preserves thinking",
-			messages: []api.Message{
-				{Role: "user", Content: "Q1"},
-				{Role: "assistant", Content: "<think>reasoning</think>A1"},
-				{Role: "user", Content: "Q2"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reasoning</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// With two assistants, first is stripped (not last), second preserved (is last)
-			name: "multi-turn thinking: first stripped, second preserved",
-			messages: []api.Message{
-				{Role: "user", Content: "Q1"},
-				{Role: "assistant", Content: "<think>reason1</think>A1"},
-				{Role: "user", Content: "Q2"},
-				{Role: "assistant", Content: "<think>reason2</think>A2"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\nA1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// With thinking enabled (keep_past_thinking=true), both preserved
-			name: "multi-turn thinking: both preserved when thinking enabled",
-			messages: []api.Message{
-				{Role: "user", Content: "Q1"},
-				{Role: "assistant", Content: "<think>reason1</think>A1"},
-				{Role: "user", Content: "Q2"},
-				{Role: "assistant", Content: "<think>reason2</think>A2"},
-			},
-			thinkValue: &api.ThinkValue{Value: true},
-			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reason1</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "assistant with tool calls",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
-		},
-		{
-			name: "assistant with content and tool calls",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather in Paris?"},
-				{
-					Role:    "assistant",
-					Content: "Let me check.",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   `<|im_start|>user` + "\n" + `What's the weather in Paris?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `Let me check.<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
-		},
-		{
-			name: "tool response",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{Role: "assistant", Content: "Let me check."},
-				{Role: "tool", Content: "22C, Sunny"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<|im_end|>\n<|im_start|>tool\n22C, Sunny<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "multiple tool calls",
-			messages: []api.Message{
-				{Role: "user", Content: "Get weather for Paris and London"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "London",
-								}),
-							},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   `<|im_start|>user` + "\n" + `Get weather for Paris and London<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|tool_call_start|>{"arguments":{"location":"London"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
-		},
-		{
-			name: "tools definitions with system message",
-			messages: []api.Message{
-				{Role: "system", Content: "You are helpful."},
-				{Role: "user", Content: "What's the weather?"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get current weather",
-						Parameters: api.ToolFunctionParameters{
-							Type: "object",
-							Properties: testPropsMap(map[string]api.ToolProperty{
-								"location": {
-									Type:        api.PropertyType{"string"},
-									Description: "City name",
-								},
-							}),
-							Required: []string{"location"},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   `<|im_start|>system` + "\n" + `You are helpful.` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
-		},
-		{
-			name: "tools definitions without system message",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get current weather",
-						Parameters: api.ToolFunctionParameters{
-							Type: "object",
-							Properties: testPropsMap(map[string]api.ToolProperty{
-								"location": {
-									Type:        api.PropertyType{"string"},
-									Description: "City name",
-								},
-							}),
-							Required: []string{"location"},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   `<|im_start|>system` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
-		},
-		{
-			name: "multiple tools without system message",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get weather",
-					},
-				},
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_time",
-						Description: "Get time",
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>system\nList of tools: [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather\",\"parameters\":{\"type\":\"\",\"properties\":null}}}, {\"type\":\"function\",\"function\":{\"name\":\"get_time\",\"description\":\"Get time\",\"parameters\":{\"type\":\"\",\"properties\":null}}}]<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "user-tool sequence",
-			messages: []api.Message{
-				{Role: "user", Content: "Check weather"},
-				{Role: "tool", Content: "22C"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "full tool call cycle",
-			messages: []api.Message{
-				{Role: "user", Content: "Check weather"},
-				{Role: "assistant", Content: "Let me check"},
-				{Role: "tool", Content: "22C"},
-				{Role: "assistant", Content: "It's 22C"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>assistant\nLet me check<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\nIt's 22C<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "unicode content",
-			messages: []api.Message{
-				{Role: "user", Content: "你好世界! مرحبا 🌍"},
-				{Role: "assistant", Content: "Hello! 👋"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\n你好世界! مرحبا 🌍<|im_end|>\n<|im_start|>assistant\nHello! 👋<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "newlines in content",
-			messages: []api.Message{
-				{Role: "user", Content: "Line 1\nLine 2\n\nLine 4"},
-				{Role: "assistant", Content: "Response with\nmultiple\nlines"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nLine 1\nLine 2\n\nLine 4<|im_end|>\n<|im_start|>assistant\nResponse with\nmultiple\nlines<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			name: "empty assistant content",
-			messages: []api.Message{
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: ""},
-				{Role: "user", Content: "OK"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n<|im_end|>\n<|im_start|>user\nOK<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// Generation prompt does NOT include <think> - model outputs it
-			name: "generation prompt has no think tag",
-			messages: []api.Message{
-				{Role: "user", Content: "Think hard"},
-			},
-			thinkValue: &api.ThinkValue{Value: true},
-			expected:   "<|im_start|>user\nThink hard<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// Interleaved: thinking before tool call - last assistant preserves thinking
-			name: "thinking before tool call (last assistant)",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{
-					Role:    "assistant",
-					Content: "<think>I need to check the weather</think>",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>I need to check the weather</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// Two assistants with tool calls - first has thinking stripped
-			name: "two assistants with tools: first thinking stripped",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{
-					Role:    "assistant",
-					Content: "<think>checking</think>",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: "22C"},
-				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// Two assistants with tools - both preserved when thinking enabled
-			name: "two assistants with tools: both preserved when thinking enabled",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{
-					Role:    "assistant",
-					Content: "<think>checking</think>",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: "22C"},
-				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
-			},
-			thinkValue: &api.ThinkValue{Value: true},
-			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>checking</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
-		},
-		{
-			// Content before thinking before tool call
-			name: "content then thinking then tool call",
-			messages: []api.Message{
-				{Role: "user", Content: "What's the weather?"},
-				{
-					Role:    "assistant",
-					Content: "Let me check.<think>Using weather API</think>",
-					ToolCalls: []api.ToolCall{
-						{
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: testArgs(map[string]any{
-									"location": "Paris",
-								}),
-							},
-						},
-					},
-				},
-			},
-			thinkValue: &api.ThinkValue{Value: false},
-			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<think>Using weather API</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
-		},
-	}
-
-	renderer := &LFM2Renderer{IsThinking: true}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
-			if err != nil {
-				t.Fatalf("Render() error = %v", err)
-			}
-			if diff := cmp.Diff(tt.expected, rendered); diff != "" {
-				t.Errorf("Render() mismatch (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -80,12 +80,6 @@ func rendererForName(name string) Renderer {
 		return &Nemotron3NanoRenderer{}
 	case "functiongemma":
 		return &FunctionGemmaRenderer{}
-	case "glm-4.7":
-		return &GLM47Renderer{}
-	case "lfm2":
-		return &LFM2Renderer{IsThinking: false}
-	case "lfm2-thinking":
-		return &LFM2Renderer{IsThinking: true}
 	default:
 		return nil
 	}
--- a/model/renderers/testhelpers_test.go
+++ b/model/renderers/testhelpers_test.go
@@ -1,26 +1,6 @@
 package renderers

-import (
-	"encoding/json"
-
-	"github.com/ollama/ollama/api"
-)
-
-func args(s string) api.ToolCallFunctionArguments {
-	var result api.ToolCallFunctionArguments
-	if err := json.Unmarshal([]byte(s), &result); err != nil {
-		panic("invalid JSON in args(): " + err.Error())
-	}
-	return result
-}
-
-func propsMap(s string) *api.ToolPropertiesMap {
-	var result api.ToolPropertiesMap
-	if err := json.Unmarshal([]byte(s), &result); err != nil {
-		panic("invalid JSON in propsMap(): " + err.Error())
-	}
-	return &result
-}
+import "github.com/ollama/ollama/api"

 // testPropsMap creates a ToolPropertiesMap from a map (convenience function for tests, order not preserved)
 func testPropsMap(m map[string]api.ToolProperty) *api.ToolPropertiesMap {
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -794,47 +794,3 @@ func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationRespons
 		Data:    data,
 	}
 }
-
-// ImageEditRequest is an OpenAI-compatible image edit request.
-type ImageEditRequest struct {
-	Model  string `json:"model"`
-	Prompt string `json:"prompt"`
-	Image  string `json:"image"`          // Base64-encoded image data
-	Size   string `json:"size,omitempty"` // e.g., "1024x1024"
-	Seed   *int64 `json:"seed,omitempty"`
-}
-
-// FromImageEditRequest converts an OpenAI image edit request to an Ollama GenerateRequest.
-func FromImageEditRequest(r ImageEditRequest) (api.GenerateRequest, error) {
-	req := api.GenerateRequest{
-		Model:  r.Model,
-		Prompt: r.Prompt,
-	}
-
-	// Decode the input image
-	if r.Image != "" {
-		imgData, err := decodeImageURL(r.Image)
-		if err != nil {
-			return api.GenerateRequest{}, fmt.Errorf("invalid image: %w", err)
-		}
-		req.Images = append(req.Images, imgData)
-	}
-
-	// Parse size if provided (e.g., "1024x768")
-	if r.Size != "" {
-		var w, h int32
-		if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
-			req.Width = w
-			req.Height = h
-		}
-	}
-
-	if r.Seed != nil {
-		if req.Options == nil {
-			req.Options = map[string]any{}
-		}
-		req.Options["seed"] = *r.Seed
-	}
-
-	return req, nil
-}
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@@ -448,86 +448,3 @@ func TestFromChatRequest_TopLogprobsRange(t *testing.T) {
 		})
 	}
 }
-
-func TestFromImageEditRequest_Basic(t *testing.T) {
-	req := ImageEditRequest{
-		Model:  "test-model",
-		Prompt: "make it blue",
-		Image:  prefix + image,
-	}
-
-	result, err := FromImageEditRequest(req)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "test-model" {
-		t.Errorf("expected model 'test-model', got %q", result.Model)
-	}
-
-	if result.Prompt != "make it blue" {
-		t.Errorf("expected prompt 'make it blue', got %q", result.Prompt)
-	}
-
-	if len(result.Images) != 1 {
-		t.Fatalf("expected 1 image, got %d", len(result.Images))
-	}
-}
-
-func TestFromImageEditRequest_WithSize(t *testing.T) {
-	req := ImageEditRequest{
-		Model:  "test-model",
-		Prompt: "make it blue",
-		Image:  prefix + image,
-		Size:   "512x768",
-	}
-
-	result, err := FromImageEditRequest(req)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Width != 512 {
-		t.Errorf("expected width 512, got %d", result.Width)
-	}
-
-	if result.Height != 768 {
-		t.Errorf("expected height 768, got %d", result.Height)
-	}
-}
-
-func TestFromImageEditRequest_WithSeed(t *testing.T) {
-	seed := int64(12345)
-	req := ImageEditRequest{
-		Model:  "test-model",
-		Prompt: "make it blue",
-		Image:  prefix + image,
-		Seed:   &seed,
-	}
-
-	result, err := FromImageEditRequest(req)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Options == nil {
-		t.Fatal("expected options to be set")
-	}
-
-	if result.Options["seed"] != seed {
-		t.Errorf("expected seed %d, got %v", seed, result.Options["seed"])
-	}
-}
-
-func TestFromImageEditRequest_InvalidImage(t *testing.T) {
-	req := ImageEditRequest{
-		Model:  "test-model",
-		Prompt: "make it blue",
-		Image:  "not-valid-base64",
-	}
-
-	_, err := FromImageEditRequest(req)
-	if err == nil {
-		t.Error("expected error for invalid image")
-	}
-}
--- a/readline/readline.go
+++ b/readline/readline.go
@@ -95,21 +95,7 @@ func (i *Instance) Readline() (string, error) {

 	var currentLineBuf []rune

-	// draining tracks if we're processing buffered input from cooked mode.
-	// In cooked mode Enter sends \n, but in raw mode Ctrl+J sends \n.
-	// We treat \n from cooked mode as submit, not multiline.
-	// We check Buffered() after the first read since the bufio buffer is
-	// empty until then. This is compatible with """ multiline mode in
-	// interactive.go since each Readline() call is independent.
-	var draining, stopDraining bool
-
 	for {
-		// Apply deferred state change from previous iteration
-		if stopDraining {
-			draining = false
-			stopDraining = false
-		}
-
 		// don't show placeholder when pasting unless we're in multiline mode
 		showPlaceholder := !i.Pasting || i.Prompt.UseAlt
 		if buf.IsEmpty() && showPlaceholder {
@@ -119,15 +105,6 @@ func (i *Instance) Readline() (string, error) {

 		r, err := i.Terminal.Read()

-		// After reading, check if there's more buffered data. If so, we're
-		// processing cooked-mode input. Once buffer empties, the current
-		// char is the last buffered one (still drain it), then stop next iteration.
-		if i.Terminal.reader.Buffered() > 0 {
-			draining = true
-		} else if draining {
-			stopDraining = true
-		}
-
 		if buf.IsEmpty() {
 			fmt.Print(ClearToEOL)
 		}
@@ -255,20 +232,15 @@ func (i *Instance) Readline() (string, error) {
 			fd := os.Stdin.Fd()
 			return handleCharCtrlZ(fd, i.Terminal.termios)
 		case CharCtrlJ:
-			// If not draining cooked-mode input, treat as multiline
-			if !draining {
-				i.pastedLines = append(i.pastedLines, buf.String())
-				buf.Buf.Clear()
-				buf.Pos = 0
-				buf.DisplayPos = 0
-				buf.LineHasSpace.Clear()
-				fmt.Println()
-				fmt.Print(i.Prompt.AltPrompt)
-				i.Prompt.UseAlt = true
-				continue
-			}
-			// Draining cooked-mode input: treat \n as submit
-			fallthrough
+			i.pastedLines = append(i.pastedLines, buf.String())
+			buf.Buf.Clear()
+			buf.Pos = 0
+			buf.DisplayPos = 0
+			buf.LineHasSpace.Clear()
+			fmt.Println()
+			fmt.Print(i.Prompt.AltPrompt)
+			i.Prompt.UseAlt = true
+			continue
 		case CharEnter:
 			output := buf.String()
 			if len(i.pastedLines) > 0 {
--- a/server/create.go
+++ b/server/create.go
@@ -28,7 +28,6 @@ import (
 	"github.com/ollama/ollama/format"
 	ofs "github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/fs/ggml"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -91,7 +90,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
 			ch <- resp
 		}

-		oldManifest, _ := manifest.ParseNamedManifest(name)
+		oldManifest, _ := ParseNamedManifest(name)

 		var baseLayers []*layerGGML
 		var err error
@@ -124,9 +123,9 @@ func (s *Server) CreateHandler(c *gin.Context) {
 				}

 				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "") {
-					mf, mErr := manifest.ParseNamedManifest(fromName)
-					if mErr == nil && mf.Config.Digest != "" {
-						configPath, pErr := manifest.BlobsPath(mf.Config.Digest)
+					manifest, mErr := ParseNamedManifest(fromName)
+					if mErr == nil && manifest.Config.Digest != "" {
+						configPath, pErr := GetBlobsPath(manifest.Config.Digest)
 						if pErr == nil {
 							if cfgFile, fErr := os.Open(configPath); fErr == nil {
 								var baseConfig model.ConfigV2
@@ -343,7 +342,7 @@ func detectModelTypeFromFiles(files map[string]string) string {
 			return "gguf"
 		} else {
 			// try to see if we can find a gguf file even without the file extension
-			blobPath, err := manifest.BlobsPath(files[fn])
+			blobPath, err := GetBlobsPath(files[fn])
 			if err != nil {
 				slog.Error("error getting blobs path", "file", fn)
 				return ""
@@ -395,7 +394,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 			return nil, fmt.Errorf("%w: %s: %s", errFilePath, err, fp)
 		}

-		blobPath, err := manifest.BlobsPath(digest)
+		blobPath, err := GetBlobsPath(digest)
 		if err != nil {
 			return nil, err
 		}
@@ -433,7 +432,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 		return nil, err
 	}

-	layer, err := manifest.NewLayer(t, mediaType)
+	layer, err := NewLayer(t, mediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -466,7 +465,7 @@ func kvFromLayers(baseLayers []*layerGGML) (ofs.Config, error) {
 }

 func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, config *model.ConfigV2, fn func(resp api.ProgressResponse)) (err error) {
-	var layers []manifest.Layer
+	var layers []Layer
 	for _, layer := range baseLayers {
 		if layer.GGML != nil {
 			quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
@@ -551,13 +550,13 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
 	}

 	for _, layer := range layers {
-		if layer.Status != "" {
-			fn(api.ProgressResponse{Status: layer.Status})
+		if layer.status != "" {
+			fn(api.ProgressResponse{Status: layer.status})
 		}
 	}

 	fn(api.ProgressResponse{Status: "writing manifest"})
-	if err := manifest.WriteManifest(name, *configLayer, layers); err != nil {
+	if err := WriteManifest(name, *configLayer, layers); err != nil {
 		return err
 	}

@@ -578,7 +577,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 		return nil, err
 	}

-	blob, err := manifest.BlobsPath(layer.Digest)
+	blob, err := GetBlobsPath(layer.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -600,7 +599,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 	}
 	temp.Seek(0, io.SeekStart)
 	fn(api.ProgressResponse{Status: "verifying conversion"})
-	newLayer, err := manifest.NewLayer(temp, layer.MediaType)
+	newLayer, err := NewLayer(temp, layer.MediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -620,7 +619,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	var layers []*layerGGML

 	fn(api.ProgressResponse{Status: "parsing GGUF"})
-	blobPath, err := manifest.BlobsPath(digest)
+	blobPath, err := GetBlobsPath(digest)
 	if err != nil {
 		return nil, err
 	}
@@ -655,7 +654,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 		mediatype = "application/vnd.ollama.image.projector"
 	}

-	layer, err := manifest.NewLayerFromLayer(digest, mediatype, blob.Name())
+	layer, err := NewLayerFromLayer(digest, mediatype, blob.Name())
 	if err != nil {
 		slog.Debug("could not create new layer from layer", "error", err)
 		return nil, err
@@ -666,8 +665,8 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	return detectChatTemplate(layers)
 }

-func removeLayer(layers []manifest.Layer, mediatype string) []manifest.Layer {
-	return slices.DeleteFunc(layers, func(layer manifest.Layer) bool {
+func removeLayer(layers []Layer, mediatype string) []Layer {
+	return slices.DeleteFunc(layers, func(layer Layer) bool {
 		if layer.MediaType != mediatype {
 			return false
 		}
@@ -681,7 +680,7 @@ func removeLayer(layers []manifest.Layer, mediatype string) []manifest.Layer {
 	})
 }

-func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
+func setTemplate(layers []Layer, t string) ([]Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.template")
 	if _, err := template.Parse(t); err != nil {
 		return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
@@ -691,7 +690,7 @@ func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
 	}

 	blob := strings.NewReader(t)
-	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.template")
+	layer, err := NewLayer(blob, "application/vnd.ollama.image.template")
 	if err != nil {
 		return nil, err
 	}
@@ -700,11 +699,11 @@ func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
 	return layers, nil
 }

-func setSystem(layers []manifest.Layer, s string) ([]manifest.Layer, error) {
+func setSystem(layers []Layer, s string) ([]Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.system")
 	if s != "" {
 		blob := strings.NewReader(s)
-		layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.system")
+		layer, err := NewLayer(blob, "application/vnd.ollama.image.system")
 		if err != nil {
 			return nil, err
 		}
@@ -713,9 +712,9 @@ func setSystem(layers []manifest.Layer, s string) ([]manifest.Layer, error) {
 	return layers, nil
 }

-func setLicense(layers []manifest.Layer, l string) ([]manifest.Layer, error) {
+func setLicense(layers []Layer, l string) ([]Layer, error) {
 	blob := strings.NewReader(l)
-	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.license")
+	layer, err := NewLayer(blob, "application/vnd.ollama.image.license")
 	if err != nil {
 		return nil, err
 	}
@@ -723,7 +722,7 @@ func setLicense(layers []manifest.Layer, l string) ([]manifest.Layer, error) {
 	return layers, nil
 }

-func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer, error) {
+func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
 	if p == nil {
 		p = make(map[string]any)
 	}
@@ -732,7 +731,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 			continue
 		}

-		digestPath, err := manifest.BlobsPath(layer.Digest)
+		digestPath, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -766,7 +765,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 	if err := json.NewEncoder(&b).Encode(p); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.params")
+	layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
 	if err != nil {
 		return nil, err
 	}
@@ -774,7 +773,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 	return layers, nil
 }

-func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, error) {
+func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
 	// this leaves the old messages intact if no new messages were specified
 	// which may not be the correct behaviour
 	if len(m) == 0 {
@@ -787,7 +786,7 @@ func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, er
 	if err := json.NewEncoder(&b).Encode(m); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.messages")
+	layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
 	if err != nil {
 		return nil, err
 	}
@@ -795,7 +794,7 @@ func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, er
 	return layers, nil
 }

-func createConfigLayer(layers []manifest.Layer, config model.ConfigV2) (*manifest.Layer, error) {
+func createConfigLayer(layers []Layer, config model.ConfigV2) (*Layer, error) {
 	digests := make([]string, len(layers))
 	for i, layer := range layers {
 		digests[i] = layer.Digest
@@ -806,7 +805,7 @@ func createConfigLayer(layers []manifest.Layer, config model.ConfigV2) (*manifes
 	if err := json.NewEncoder(&b).Encode(config); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		return nil, err
 	}
--- a/server/create_test.go
+++ b/server/create_test.go
@@ -10,7 +10,6 @@ import (
 	"testing"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/manifest"
 )

 func TestConvertFromSafetensors(t *testing.T) {
@@ -18,7 +17,7 @@ func TestConvertFromSafetensors(t *testing.T) {

 	// Helper function to create a new layer and return its digest
 	makeTemp := func(content string) string {
-		l, err := manifest.NewLayer(strings.NewReader(content), "application/octet-stream")
+		l, err := NewLayer(strings.NewReader(content), "application/octet-stream")
 		if err != nil {
 			t.Fatalf("Failed to create layer: %v", err)
 		}
--- a/server/download.go
+++ b/server/download.go
@@ -24,8 +24,6 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
-	"github.com/ollama/ollama/manifest"
-	"github.com/ollama/ollama/types/model"
 )

 const maxRetries = 6
@@ -458,7 +456,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 }

 type downloadOpts struct {
-	n       model.Name
+	mp      ModelPath
 	digest  string
 	regOpts *registryOptions
 	fn      func(api.ProgressResponse)
@@ -467,10 +465,10 @@ type downloadOpts struct {
 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
 func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ error) {
 	if opts.digest == "" {
-		return false, fmt.Errorf(("%s: %s"), opts.n.DisplayNamespaceModel(), "digest is empty")
+		return false, fmt.Errorf(("%s: %s"), opts.mp.GetNamespaceRepository(), "digest is empty")
 	}

-	fp, err := manifest.BlobsPath(opts.digest)
+	fp, err := GetBlobsPath(opts.digest)
 	if err != nil {
 		return false, err
 	}
@@ -494,8 +492,8 @@ func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ erro
 	data, ok := blobDownloadManager.LoadOrStore(opts.digest, &blobDownload{Name: fp, Digest: opts.digest})
 	download := data.(*blobDownload)
 	if !ok {
-		requestURL := opts.n.BaseURL()
-		requestURL = requestURL.JoinPath("v2", opts.n.DisplayNamespaceModel(), "blobs", opts.digest)
+		requestURL := opts.mp.BaseURL()
+		requestURL = requestURL.JoinPath("v2", opts.mp.GetNamespaceRepository(), "blobs", opts.digest)
 		if err := download.Prepare(ctx, requestURL, opts.regOpts); err != nil {
 			blobDownloadManager.Delete(opts.digest)
 			return false, err
--- a/server/images.go
+++ b/server/images.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -23,7 +24,6 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/gguf"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/model/parsers"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/template"
@@ -75,6 +75,12 @@ type Model struct {
 func (m *Model) Capabilities() []model.Capability {
 	capabilities := []model.Capability{}

+	// Check for image generation model via config capabilities
+	if slices.Contains(m.Config.Capabilities, "image") {
+		return []model.Capability{model.CapabilityImage}
+	}
+
+	// Check for completion capability
 	if m.ModelPath != "" {
 		f, err := gguf.Open(m.ModelPath)
 		if err == nil {
@@ -268,22 +274,44 @@ func (m *Model) String() string {
 	return modelfile.String()
 }

+func GetManifest(mp ModelPath) (*Manifest, string, error) {
+	fp, err := mp.GetManifestPath()
+	if err != nil {
+		return nil, "", err
+	}
+
+	f, err := os.Open(fp)
+	if err != nil {
+		return nil, "", err
+	}
+	defer f.Close()
+
+	sha256sum := sha256.New()
+
+	var manifest Manifest
+	if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
+		return nil, "", err
+	}
+
+	return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
+}
+
 func GetModel(name string) (*Model, error) {
-	n := model.ParseName(name)
-	mf, err := manifest.ParseNamedManifest(n)
+	mp := ParseModelPath(name)
+	manifest, digest, err := GetManifest(mp)
 	if err != nil {
 		return nil, err
 	}

-	m := &Model{
-		Name:      n.String(),
-		ShortName: n.DisplayShortest(),
-		Digest:    mf.Digest(),
+	model := &Model{
+		Name:      mp.GetFullTagname(),
+		ShortName: mp.GetShortTagname(),
+		Digest:    digest,
 		Template:  template.DefaultTemplate,
 	}

-	if mf.Config.Digest != "" {
-		filename, err := manifest.BlobsPath(mf.Config.Digest)
+	if manifest.Config.Digest != "" {
+		filename, err := GetBlobsPath(manifest.Config.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -294,29 +322,29 @@ func GetModel(name string) (*Model, error) {
 		}
 		defer configFile.Close()

-		if err := json.NewDecoder(configFile).Decode(&m.Config); err != nil {
+		if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
 			return nil, err
 		}
 	}

-	for _, layer := range mf.Layers {
-		filename, err := manifest.BlobsPath(layer.Digest)
+	for _, layer := range manifest.Layers {
+		filename, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}

 		switch layer.MediaType {
 		case "application/vnd.ollama.image.model":
-			m.ModelPath = filename
-			m.ParentModel = layer.From
+			model.ModelPath = filename
+			model.ParentModel = layer.From
 		case "application/vnd.ollama.image.embed":
 			// Deprecated in versions  > 0.1.2
 			// TODO: remove this warning in a future version
 			slog.Info("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
 		case "application/vnd.ollama.image.adapter":
-			m.AdapterPaths = append(m.AdapterPaths, filename)
+			model.AdapterPaths = append(model.AdapterPaths, filename)
 		case "application/vnd.ollama.image.projector":
-			m.ProjectorPaths = append(m.ProjectorPaths, filename)
+			model.ProjectorPaths = append(model.ProjectorPaths, filename)
 		case "application/vnd.ollama.image.prompt",
 			"application/vnd.ollama.image.template":
 			bts, err := os.ReadFile(filename)
@@ -324,7 +352,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}

-			m.Template, err = template.Parse(string(bts))
+			model.Template, err = template.Parse(string(bts))
 			if err != nil {
 				return nil, err
 			}
@@ -334,7 +362,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}

-			m.System = string(bts)
+			model.System = string(bts)
 		case "application/vnd.ollama.image.params":
 			params, err := os.Open(filename)
 			if err != nil {
@@ -343,7 +371,7 @@ func GetModel(name string) (*Model, error) {
 			defer params.Close()

 			// parse model options parameters into a map so that we can see which fields have been specified explicitly
-			if err = json.NewDecoder(params).Decode(&m.Options); err != nil {
+			if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.messages":
@@ -353,7 +381,7 @@ func GetModel(name string) (*Model, error) {
 			}
 			defer msgs.Close()

-			if err = json.NewDecoder(msgs).Decode(&m.Messages); err != nil {
+			if err = json.NewDecoder(msgs).Decode(&model.Messages); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.license":
@@ -361,11 +389,11 @@ func GetModel(name string) (*Model, error) {
 			if err != nil {
 				return nil, err
 			}
-			m.License = append(m.License, string(bts))
+			model.License = append(model.License, string(bts))
 		}
 	}

-	return m, nil
+	return model, nil
 }

 func CopyModel(src, dst model.Name) error {
@@ -380,7 +408,7 @@ func CopyModel(src, dst model.Name) error {
 		return nil
 	}

-	manifests, err := manifest.Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -409,7 +437,7 @@ func CopyModel(src, dst model.Name) error {

 func deleteUnusedLayers(deleteMap map[string]struct{}) error {
 	// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
-	manifests, err := manifest.Manifests(true)
+	manifests, err := Manifests(true)
 	if err != nil {
 		return err
 	}
@@ -424,7 +452,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {

 	// only delete the files which are still in the deleteMap
 	for k := range deleteMap {
-		fp, err := manifest.BlobsPath(k)
+		fp, err := GetBlobsPath(k)
 		if err != nil {
 			slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
 			continue
@@ -440,7 +468,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {

 func PruneLayers() error {
 	deleteMap := make(map[string]struct{})
-	p, err := manifest.BlobsPath("")
+	p, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
@@ -455,9 +483,9 @@ func PruneLayers() error {
 		name := blob.Name()
 		name = strings.ReplaceAll(name, "-", ":")

-		_, err := manifest.BlobsPath(name)
+		_, err := GetBlobsPath(name)
 		if err != nil {
-			if errors.Is(err, manifest.ErrInvalidDigestFormat) {
+			if errors.Is(err, ErrInvalidDigestFormat) {
 				// remove invalid blobs (e.g. partial downloads)
 				if err := os.Remove(filepath.Join(p, blob.Name())); err != nil {
 					slog.Error("couldn't remove blob", "blob", blob.Name(), "error", err)
@@ -482,30 +510,63 @@ func PruneLayers() error {
 	return nil
 }

+func PruneDirectory(path string) error {
+	info, err := os.Lstat(path)
+	if err != nil {
+		return err
+	}
+
+	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
+		entries, err := os.ReadDir(path)
+		if err != nil {
+			return err
+		}
+
+		for _, entry := range entries {
+			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
+				return err
+			}
+		}
+
+		entries, err = os.ReadDir(path)
+		if err != nil {
+			return err
+		}
+
+		if len(entries) > 0 {
+			return nil
+		}
+
+		return os.Remove(path)
+	}
+
+	return nil
+}
+
 func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	n := model.ParseName(name)
+	mp := ParseModelPath(name)
 	fn(api.ProgressResponse{Status: "retrieving manifest"})

-	if n.ProtocolScheme == "http" && !regOpts.Insecure {
+	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}

-	mf, err := manifest.ParseNamedManifest(n)
+	manifest, _, err := GetManifest(mp)
 	if err != nil {
 		fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
 		return err
 	}

-	var layers []manifest.Layer
-	layers = append(layers, mf.Layers...)
-	if mf.Config.Digest != "" {
-		layers = append(layers, mf.Config)
+	var layers []Layer
+	layers = append(layers, manifest.Layers...)
+	if manifest.Config.Digest != "" {
+		layers = append(layers, manifest.Config)
 	}

 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
 		// Read raw manifest JSON to preserve tensor metadata fields
-		manifestPath, err := manifest.PathForName(n)
+		manifestPath, err := mp.GetManifestPath()
 		if err != nil {
 			return err
 		}
@@ -513,7 +574,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		if err != nil {
 			return err
 		}
-		if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil {
+		if err := pushWithTransfer(ctx, mp, layers, manifestJSON, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -521,17 +582,17 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	}

 	for _, layer := range layers {
-		if err := uploadBlob(ctx, n, layer, regOpts, fn); err != nil {
+		if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
 			slog.Info(fmt.Sprintf("error uploading blob: %v", err))
 			return err
 		}
 	}

 	fn(api.ProgressResponse{Status: "pushing manifest"})
-	requestURL := n.BaseURL()
-	requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
+	requestURL := mp.BaseURL()
+	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)

-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}
@@ -550,44 +611,44 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }

 func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	n := model.ParseName(name)
+	mp := ParseModelPath(name)

 	// build deleteMap to prune unused layers
 	deleteMap := make(map[string]struct{})
-	existingMf, err := manifest.ParseNamedManifest(n)
+	manifest, _, err := GetManifest(mp)
 	if errors.Is(err, os.ErrNotExist) {
 		// noop
 	} else if err != nil {
 		slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
 	} else {
-		for _, l := range existingMf.Layers {
+		for _, l := range manifest.Layers {
 			deleteMap[l.Digest] = struct{}{}
 		}
-		if existingMf.Config.Digest != "" {
-			deleteMap[existingMf.Config.Digest] = struct{}{}
+		if manifest.Config.Digest != "" {
+			deleteMap[manifest.Config.Digest] = struct{}{}
 		}
 	}

-	if n.ProtocolScheme == "http" && !regOpts.Insecure {
+	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}

 	fn(api.ProgressResponse{Status: "pulling manifest"})

-	mf, err := pullModelManifest(ctx, n, regOpts)
+	manifest, err = pullModelManifest(ctx, mp, regOpts)
 	if err != nil {
 		return fmt.Errorf("pull model manifest: %s", err)
 	}

-	var layers []manifest.Layer
-	layers = append(layers, mf.Layers...)
-	if mf.Config.Digest != "" {
-		layers = append(layers, mf.Config)
+	var layers []Layer
+	layers = append(layers, manifest.Layers...)
+	if manifest.Config.Digest != "" {
+		layers = append(layers, manifest.Config)
 	}

 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
-		if err := pullWithTransfer(ctx, n, layers, mf, regOpts, fn); err != nil {
+		if err := pullWithTransfer(ctx, mp, layers, manifest, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -597,7 +658,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	skipVerify := make(map[string]bool)
 	for _, layer := range layers {
 		cacheHit, err := downloadBlob(ctx, downloadOpts{
-			n:       n,
+			mp:      mp,
 			digest:  layer.Digest,
 			regOpts: regOpts,
 			fn:      fn,
@@ -616,7 +677,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		}
 		if err := verifyBlob(layer.Digest); err != nil {
 			if errors.Is(err, errDigestMismatch) {
-				fp, err := manifest.BlobsPath(layer.Digest)
+				fp, err := GetBlobsPath(layer.Digest)
 				if err != nil {
 					return err
 				}
@@ -631,16 +692,16 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	for _, layer := range layers {
 		delete(deleteMap, layer.Digest)
 	}
-	delete(deleteMap, mf.Config.Digest)
+	delete(deleteMap, manifest.Config.Digest)

 	fn(api.ProgressResponse{Status: "writing manifest"})

-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}

-	fp, err := manifest.PathForName(n)
+	fp, err := mp.GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -667,9 +728,9 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }

 // hasTensorLayers checks if any layer has tensor media type.
-func hasTensorLayers(layers []manifest.Layer) bool {
+func hasTensorLayers(layers []Layer) bool {
 	for _, layer := range layers {
-		if layer.MediaType == manifest.MediaTypeImageTensor {
+		if layer.MediaType == MediaTypeImageTensor {
 			return true
 		}
 	}
@@ -677,7 +738,7 @@ func hasTensorLayers(layers []manifest.Layer) bool {
 }

 // pullWithTransfer uses the simplified x/transfer package for downloading blobs.
-func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, mf *manifest.Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifest *Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -686,12 +747,12 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		}
 	}

-	destDir, err := manifest.BlobsPath("")
+	destDir, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}

-	base := n.BaseURL()
+	base := mp.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -723,7 +784,7 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		Blobs:      blobs,
 		BaseURL:    baseURL,
 		DestDir:    destDir,
-		Repository: n.DisplayNamespaceModel(),
+		Repository: mp.GetNamespaceRepository(),
 		Progress:   progress,
 		Token:      regOpts.Token,
 		GetToken:   getToken,
@@ -734,12 +795,12 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer

 	// Write manifest
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}

-	fp, err := manifest.PathForName(n)
+	fp, err := mp.GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -751,7 +812,7 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 }

 // pushWithTransfer uses the simplified x/transfer package for uploading blobs and manifest.
-func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -761,12 +822,12 @@ func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		}
 	}

-	srcDir, err := manifest.BlobsPath("")
+	srcDir, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}

-	base := n.BaseURL()
+	base := mp.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -803,13 +864,13 @@ func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		GetToken:    getToken,
 		Logger:      slog.Default(),
 		Manifest:    manifestJSON,
-		ManifestRef: n.Tag,
-		Repository:  n.DisplayNamespaceModel(),
+		ManifestRef: mp.Tag,
+		Repository:  mp.GetNamespaceRepository(),
 	})
 }

-func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptions) (*manifest.Manifest, error) {
-	requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
+func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) {
+	requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)

 	headers := make(http.Header)
 	headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
@@ -819,7 +880,7 @@ func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptio
 	}
 	defer resp.Body.Close()

-	var m manifest.Manifest
+	var m Manifest
 	if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
 		return nil, err
 	}
@@ -981,7 +1042,7 @@ func parseRegistryChallenge(authStr string) registryChallenge {
 var errDigestMismatch = errors.New("digest mismatch, file must be downloaded again")

 func verifyBlob(digest string) error {
-	fp, err := manifest.BlobsPath(digest)
+	fp, err := GetBlobsPath(digest)
 	if err != nil {
 		return err
 	}
--- a/server/images_test.go
+++ b/server/images_test.go
@@ -56,15 +56,6 @@ func TestModelCapabilities(t *testing.T) {
 			},
 			expectedCaps: []model.Capability{model.CapabilityImage},
 		},
-		{
-			name: "model with image and vision capability (image editing)",
-			model: Model{
-				Config: model.ConfigV2{
-					Capabilities: []string{"image", "vision"},
-				},
-			},
-			expectedCaps: []model.Capability{model.CapabilityImage, model.CapabilityVision},
-		},
 		{
 			name: "model with completion capability",
 			model: Model{
--- a/manifest/layer.go
+++ b/manifest/layer.go
@@ -1,4 +1,4 @@
-package manifest
+package server

 import (
 	"crypto/sha256"
@@ -14,7 +14,7 @@ type Layer struct {
 	Size      int64  `json:"size"`
 	From      string `json:"from,omitempty"`
 	Name      string `json:"name,omitempty"` // tensor name, e.g., "text_encoder/model.embed_tokens.weight"
-	Status    string `json:"-"`
+	status    string
 }

 const (
@@ -22,7 +22,7 @@ const (
 )

 func NewLayer(r io.Reader, mediatype string) (Layer, error) {
-	blobs, err := BlobsPath("")
+	blobs, err := GetBlobsPath("")
 	if err != nil {
 		return Layer{}, err
 	}
@@ -45,7 +45,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 	}

 	digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
-	blob, err := BlobsPath(digest)
+	blob, err := GetBlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -65,7 +65,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 		MediaType: mediatype,
 		Digest:    digest,
 		Size:      n,
-		Status:    fmt.Sprintf("%s %s", status, digest),
+		status:    fmt.Sprintf("%s %s", status, digest),
 	}, nil
 }

@@ -74,7 +74,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		return Layer{}, errors.New("creating new layer from layer with empty digest")
 	}

-	blob, err := BlobsPath(digest)
+	blob, err := GetBlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -89,7 +89,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		Digest:    digest,
 		Size:      fi.Size(),
 		From:      from,
-		Status:    fmt.Sprintf("using existing layer %s", digest),
+		status:    fmt.Sprintf("using existing layer %s", digest),
 	}, nil
 }

@@ -98,7 +98,7 @@ func (l *Layer) Open() (io.ReadSeekCloser, error) {
 		return nil, errors.New("opening layer with empty digest")
 	}

-	blob, err := BlobsPath(l.Digest)
+	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -126,7 +126,7 @@ func (l *Layer) Remove() error {
 		}
 	}

-	blob, err := BlobsPath(l.Digest)
+	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
 		return err
 	}
--- a/manifest/manifest.go
+++ b/manifest/manifest.go
@@ -1,9 +1,10 @@
-package manifest
+package server

 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -32,38 +33,12 @@ func (m *Manifest) Size() (size int64) {
 	return
 }

-func (m *Manifest) Digest() string {
-	return m.digest
-}
-
-func (m *Manifest) FileInfo() os.FileInfo {
-	return m.fi
-}
-
-// ReadConfigJSON reads and unmarshals a config layer as JSON.
-func (m *Manifest) ReadConfigJSON(configPath string, v any) error {
-	for _, layer := range m.Layers {
-		if layer.MediaType == "application/vnd.ollama.image.json" && layer.Name == configPath {
-			blobPath, err := BlobsPath(layer.Digest)
-			if err != nil {
-				return err
-			}
-			data, err := os.ReadFile(blobPath)
-			if err != nil {
-				return err
-			}
-			return json.Unmarshal(data, v)
-		}
-	}
-	return fmt.Errorf("config %q not found in manifest", configPath)
-}
-
 func (m *Manifest) Remove() error {
 	if err := os.Remove(m.filepath); err != nil {
 		return err
 	}

-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -95,11 +70,11 @@ func (m *Manifest) RemoveLayers() error {
 		if _, used := inUse[layer.Digest]; used {
 			continue
 		}
-		blob, err := BlobsPath(layer.Digest)
+		blob, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return err
 		}
-		if err := os.Remove(blob); os.IsNotExist(err) {
+		if err := os.Remove(blob); errors.Is(err, os.ErrNotExist) {
 			slog.Debug("layer does not exist", "digest", layer.Digest)
 		} else if err != nil {
 			return err
@@ -114,7 +89,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 		return nil, model.Unqualified(n)
 	}

-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return nil, err
 	}
@@ -146,7 +121,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 }

 func WriteManifest(name model.Name, config Layer, layers []Layer) error {
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -173,7 +148,7 @@ func WriteManifest(name model.Name, config Layer, layers []Layer) error {
 }

 func Manifests(continueOnError bool) (map[model.Name]*Manifest, error) {
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return nil, err
 	}
--- a/manifest/manifest_test.go
+++ b/manifest/manifest_test.go
@@ -1,4 +1,4 @@
-package manifest
+package server

 import (
 	"encoding/json"
--- a/server/model.go
+++ b/server/model.go
@@ -13,7 +13,6 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/fs/ggml"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/model"
 )
@@ -21,19 +20,19 @@ import (
 var intermediateBlobs map[string]string = make(map[string]string)

 type layerGGML struct {
-	manifest.Layer
+	Layer
 	*ggml.GGML
 }

 func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
-	m, err := manifest.ParseNamedManifest(name)
+	m, err := ParseNamedManifest(name)
 	switch {
 	case errors.Is(err, os.ErrNotExist):
 		if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
 			return nil, err
 		}

-		m, err = manifest.ParseNamedManifest(name)
+		m, err = ParseNamedManifest(name)
 		if err != nil {
 			return nil, err
 		}
@@ -42,7 +41,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
 	}

 	for _, layer := range m.Layers {
-		layer, err := manifest.NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
+		layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
 		if err != nil {
 			return nil, err
 		}
@@ -51,7 +50,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
 		case "application/vnd.ollama.image.model",
 			"application/vnd.ollama.image.projector",
 			"application/vnd.ollama.image.adapter":
-			blobpath, err := manifest.BlobsPath(layer.Digest)
+			blobpath, err := GetBlobsPath(layer.Digest)
 			if err != nil {
 				return nil, err
 			}
@@ -82,12 +81,12 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 			if t, err := template.Named(s); err != nil {
 				slog.Debug("template detection", "error", err, "template", s)
 			} else {
-				layer, err := manifest.NewLayer(t.Reader(), "application/vnd.ollama.image.template")
+				layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 				if err != nil {
 					return nil, err
 				}

-				layer.Status = fmt.Sprintf("using autodetected template %s", t.Name)
+				layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
 				layers = append(layers, &layerGGML{layer, nil})

 				if t.Parameters != nil {
@@ -96,7 +95,7 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 						return nil, err
 					}

-					layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.params")
+					layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
 					if err != nil {
 						return nil, err
 					}
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -0,0 +1,146 @@
+package server
+
+import (
+	"errors"
+	"fmt"
+	"io/fs"
+	"net/url"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/types/model"
+)
+
+type ModelPath struct {
+	ProtocolScheme string
+	Registry       string
+	Namespace      string
+	Repository     string
+	Tag            string
+}
+
+const (
+	DefaultRegistry       = "registry.ollama.ai"
+	DefaultNamespace      = "library"
+	DefaultTag            = "latest"
+	DefaultProtocolScheme = "https"
+)
+
+var (
+	ErrInvalidImageFormat  = errors.New("invalid image format")
+	ErrInvalidDigestFormat = errors.New("invalid digest format")
+	ErrInvalidProtocol     = errors.New("invalid protocol scheme")
+	ErrInsecureProtocol    = errors.New("insecure protocol http")
+	ErrModelPathInvalid    = errors.New("invalid model path")
+)
+
+func ParseModelPath(name string) ModelPath {
+	mp := ModelPath{
+		ProtocolScheme: DefaultProtocolScheme,
+		Registry:       DefaultRegistry,
+		Namespace:      DefaultNamespace,
+		Repository:     "",
+		Tag:            DefaultTag,
+	}
+
+	before, after, found := strings.Cut(name, "://")
+	if found {
+		mp.ProtocolScheme = before
+		name = after
+	}
+
+	name = strings.ReplaceAll(name, string(os.PathSeparator), "/")
+	parts := strings.Split(name, "/")
+	switch len(parts) {
+	case 3:
+		mp.Registry = parts[0]
+		mp.Namespace = parts[1]
+		mp.Repository = parts[2]
+	case 2:
+		mp.Namespace = parts[0]
+		mp.Repository = parts[1]
+	case 1:
+		mp.Repository = parts[0]
+	}
+
+	if repo, tag, found := strings.Cut(mp.Repository, ":"); found {
+		mp.Repository = repo
+		mp.Tag = tag
+	}
+
+	return mp
+}
+
+func (mp ModelPath) GetNamespaceRepository() string {
+	return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
+}
+
+func (mp ModelPath) GetFullTagname() string {
+	return fmt.Sprintf("%s/%s/%s:%s", mp.Registry, mp.Namespace, mp.Repository, mp.Tag)
+}
+
+func (mp ModelPath) GetShortTagname() string {
+	if mp.Registry == DefaultRegistry {
+		if mp.Namespace == DefaultNamespace {
+			return fmt.Sprintf("%s:%s", mp.Repository, mp.Tag)
+		}
+		return fmt.Sprintf("%s/%s:%s", mp.Namespace, mp.Repository, mp.Tag)
+	}
+	return fmt.Sprintf("%s/%s/%s:%s", mp.Registry, mp.Namespace, mp.Repository, mp.Tag)
+}
+
+// GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist.
+func (mp ModelPath) GetManifestPath() (string, error) {
+	name := model.Name{
+		Host:      mp.Registry,
+		Namespace: mp.Namespace,
+		Model:     mp.Repository,
+		Tag:       mp.Tag,
+	}
+	if !name.IsValid() {
+		return "", fs.ErrNotExist
+	}
+	return filepath.Join(envconfig.Models(), "manifests", name.Filepath()), nil
+}
+
+func (mp ModelPath) BaseURL() *url.URL {
+	return &url.URL{
+		Scheme: mp.ProtocolScheme,
+		Host:   mp.Registry,
+	}
+}
+
+func GetManifestPath() (string, error) {
+	path := filepath.Join(envconfig.Models(), "manifests")
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
+	}
+
+	return path, nil
+}
+
+func GetBlobsPath(digest string) (string, error) {
+	// only accept actual sha256 digests
+	pattern := "^sha256[:-][0-9a-fA-F]{64}$"
+	re := regexp.MustCompile(pattern)
+
+	if digest != "" && !re.MatchString(digest) {
+		return "", ErrInvalidDigestFormat
+	}
+
+	digest = strings.ReplaceAll(digest, ":", "-")
+	path := filepath.Join(envconfig.Models(), "blobs", digest)
+	dirPath := filepath.Dir(path)
+	if digest == "" {
+		dirPath = path
+	}
+
+	if err := os.MkdirAll(dirPath, 0o755); err != nil {
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
+	}
+
+	return path, nil
+}
--- a/server/modelpath_test.go
+++ b/server/modelpath_test.go
@@ -0,0 +1,153 @@
+package server
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGetBlobsPath(t *testing.T) {
+	// GetBlobsPath expects an actual directory to exist
+	tempDir := t.TempDir()
+
+	tests := []struct {
+		name     string
+		digest   string
+		expected string
+		err      error
+	}{
+		{
+			"empty digest",
+			"",
+			filepath.Join(tempDir, "blobs"),
+			nil,
+		},
+		{
+			"valid with colon",
+			"sha256:456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
+			filepath.Join(tempDir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
+			nil,
+		},
+		{
+			"valid with dash",
+			"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
+			filepath.Join(tempDir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
+			nil,
+		},
+		{
+			"digest too short",
+			"sha256-45640291",
+			"",
+			ErrInvalidDigestFormat,
+		},
+		{
+			"digest too long",
+			"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9aaaaaaaaaa",
+			"",
+			ErrInvalidDigestFormat,
+		},
+		{
+			"digest invalid chars",
+			"../sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7a",
+			"",
+			ErrInvalidDigestFormat,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Setenv("OLLAMA_MODELS", tempDir)
+
+			got, err := GetBlobsPath(tc.digest)
+
+			require.ErrorIs(t, tc.err, err, tc.name)
+			assert.Equal(t, tc.expected, got, tc.name)
+		})
+	}
+}
+
+func TestParseModelPath(t *testing.T) {
+	tests := []struct {
+		name string
+		arg  string
+		want ModelPath
+	}{
+		{
+			"full path https",
+			"https://example.com/ns/repo:tag",
+			ModelPath{
+				ProtocolScheme: "https",
+				Registry:       "example.com",
+				Namespace:      "ns",
+				Repository:     "repo",
+				Tag:            "tag",
+			},
+		},
+		{
+			"full path http",
+			"http://example.com/ns/repo:tag",
+			ModelPath{
+				ProtocolScheme: "http",
+				Registry:       "example.com",
+				Namespace:      "ns",
+				Repository:     "repo",
+				Tag:            "tag",
+			},
+		},
+		{
+			"no protocol",
+			"example.com/ns/repo:tag",
+			ModelPath{
+				ProtocolScheme: "https",
+				Registry:       "example.com",
+				Namespace:      "ns",
+				Repository:     "repo",
+				Tag:            "tag",
+			},
+		},
+		{
+			"no registry",
+			"ns/repo:tag",
+			ModelPath{
+				ProtocolScheme: "https",
+				Registry:       DefaultRegistry,
+				Namespace:      "ns",
+				Repository:     "repo",
+				Tag:            "tag",
+			},
+		},
+		{
+			"no namespace",
+			"repo:tag",
+			ModelPath{
+				ProtocolScheme: "https",
+				Registry:       DefaultRegistry,
+				Namespace:      DefaultNamespace,
+				Repository:     "repo",
+				Tag:            "tag",
+			},
+		},
+		{
+			"no tag",
+			"repo",
+			ModelPath{
+				ProtocolScheme: "https",
+				Registry:       DefaultRegistry,
+				Namespace:      DefaultNamespace,
+				Repository:     "repo",
+				Tag:            DefaultTag,
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := ParseModelPath(tc.arg)
+
+			if got != tc.want {
+				t.Errorf("got: %q want: %q", got, tc.want)
+			}
+		})
+	}
+}
--- a/server/quantization.go
+++ b/server/quantization.go
@@ -198,8 +198,8 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
 	name := t.Name
 	quantize := strings.HasSuffix(name, "weight")

-	// don't quantize vision encoder tensors (named with "v." prefix)
-	quantize = quantize && !strings.HasPrefix(name, "v.")
+	// don't quantize vision stuff
+	quantize = quantize && (!strings.Contains(name, "v.") || strings.Contains(name, "_v."))
 	quantize = quantize && !strings.Contains(name, "mm.")

 	// quantize only 2D and 3D tensors (experts)
@@ -219,9 +219,6 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
 	// NOTE: can't use LLM_TN here because the layer number is not known
 	quantize = quantize && !strings.Contains(name, "ssm_conv1d.weight")

-	// do not quantize LFM2's shortconv kernel weights
-	quantize = quantize && !strings.Contains(name, "shortconv.conv.weight")
-
 	// do not quantize RWKV's time_mix_first tensors
 	quantize = quantize && !strings.Contains(name, "time_mix_first.weight")
 	quantize = quantize && !strings.Contains(name, "time_mix_w1.weight")
--- a/server/routes.go
+++ b/server/routes.go
@@ -39,7 +39,6 @@ import (
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/logutil"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/middleware"
 	"github.com/ollama/ollama/model/parsers"
 	"github.com/ollama/ollama/model/renderers"
@@ -221,6 +220,12 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		return
 	}

+	// Handle image generation models
+	if slices.Contains(m.Capabilities(), model.CapabilityImage) {
+		s.handleImageGenerate(c, req, name.String(), checkpointStart)
+		return
+	}
+
 	if req.TopLogprobs < 0 || req.TopLogprobs > 20 {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "top_logprobs must be between 0 and 20"})
 		return
@@ -316,7 +321,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		return
 	}

-	// expire the runner if unload is requested (empty prompt, keep alive is 0)
+	// expire the runner
 	if req.Prompt == "" && req.KeepAlive != nil && req.KeepAlive.Duration == 0 {
 		s.sched.expireRunner(m)

@@ -330,12 +335,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		return
 	}

-	// Handle image generation models
-	if slices.Contains(m.Capabilities(), model.CapabilityImage) {
-		s.handleImageGenerate(c, req, name.String(), checkpointStart)
-		return
-	}
-
 	if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
 		return
@@ -975,7 +974,7 @@ func (s *Server) PushHandler(c *gin.Context) {
 // is.
 func getExistingName(n model.Name) (model.Name, error) {
 	var zero model.Name
-	existing, err := manifest.Manifests(true)
+	existing, err := Manifests(true)
 	if err != nil {
 		return zero, err
 	}
@@ -1019,7 +1018,7 @@ func (s *Server) DeleteHandler(c *gin.Context) {
 		return
 	}

-	m, err := manifest.ParseNamedManifest(n)
+	m, err := ParseNamedManifest(n)
 	if err != nil {
 		switch {
 		case os.IsNotExist(err):
@@ -1081,7 +1080,7 @@ func (s *Server) ShowHandler(c *gin.Context) {
 func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	name := model.ParseName(req.Model)
 	if !name.IsValid() {
-		return nil, model.Unqualified(name)
+		return nil, ErrModelPathInvalid
 	}
 	name, err := getExistingName(name)
 	if err != nil {
@@ -1113,7 +1112,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {

 	// For safetensors LLM models (experimental), populate details from config.json
 	if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
-		if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
+		if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
 			if arch, ok := info["general.architecture"].(string); ok && arch != "" {
 				modelDetails.Family = arch
 			}
@@ -1122,7 +1121,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 			}
 		}
 		// Get torch_dtype directly from config.json for quantization level
-		if dtype, err := xserver.GetSafetensorsDtype(name); err == nil && dtype != "" {
+		if dtype, err := xserver.GetSafetensorsDtype(name.String()); err == nil && dtype != "" {
 			modelDetails.QuantizationLevel = dtype
 		}
 	}
@@ -1136,7 +1135,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
 	}

-	mf, err := manifest.ParseNamedManifest(name)
+	manifest, err := ParseNamedManifest(name)
 	if err != nil {
 		return nil, err
 	}
@@ -1148,11 +1147,8 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		Details:      modelDetails,
 		Messages:     msgs,
 		Capabilities: m.Capabilities(),
-		ModifiedAt:   mf.FileInfo().ModTime(),
+		ModifiedAt:   manifest.fi.ModTime(),
 		Requires:     m.Config.Requires,
-		// Several integrations crash on a nil/omitempty+empty ModelInfo, so by
-		// default we return an empty map.
-		ModelInfo: make(map[string]any),
 	}

 	if m.Config.RemoteHost != "" {
@@ -1215,7 +1211,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	if slices.Contains(m.Capabilities(), model.CapabilityImage) {
 		// Populate tensor info if verbose
 		if req.Verbose {
-			if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
+			if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
 				resp.Tensors = tensors
 			}
 		}
@@ -1224,12 +1220,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {

 	// For safetensors LLM models (experimental), populate ModelInfo from config.json
 	if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
-		if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
+		if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
 			resp.ModelInfo = info
 		}
 		// Populate tensor info if verbose
 		if req.Verbose {
-			if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
+			if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
 				resp.Tensors = tensors
 			}
 		}
@@ -1286,7 +1282,7 @@ func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) {
 }

 func (s *Server) ListHandler(c *gin.Context) {
-	ms, err := manifest.Manifests(true)
+	ms, err := Manifests(true)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1317,8 +1313,8 @@ func (s *Server) ListHandler(c *gin.Context) {
 			RemoteModel: cf.RemoteModel,
 			RemoteHost:  cf.RemoteHost,
 			Size:        m.Size(),
-			Digest:      m.Digest(),
-			ModifiedAt:  m.FileInfo().ModTime(),
+			Digest:      m.digest,
+			ModifiedAt:  m.fi.ModTime(),
 			Details: api.ModelDetails{
 				Format:            cf.ModelFormat,
 				Family:            cf.ModelFamily,
@@ -1377,7 +1373,7 @@ func (s *Server) CopyHandler(c *gin.Context) {
 }

 func (s *Server) HeadBlobHandler(c *gin.Context) {
-	path, err := manifest.BlobsPath(c.Param("digest"))
+	path, err := GetBlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
@@ -1393,7 +1389,7 @@ func (s *Server) HeadBlobHandler(c *gin.Context) {

 func (s *Server) CreateBlobHandler(c *gin.Context) {
 	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
-		p, err := manifest.BlobsPath(ib)
+		p, err := GetBlobsPath(ib)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 			return
@@ -1411,7 +1407,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		}
 	}

-	path, err := manifest.BlobsPath(c.Param("digest"))
+	path, err := GetBlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
@@ -1429,7 +1425,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		return
 	}

-	layer, err := manifest.NewLayer(c.Request.Body, "")
+	layer, err := NewLayer(c.Request.Body, "")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1604,9 +1600,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
 	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
-	// OpenAI-compatible image generation endpoints
+	// OpenAI-compatible image generation endpoint
 	r.POST("/v1/images/generations", middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
-	r.POST("/v1/images/edits", middleware.ImageEditsMiddleware(), s.GenerateHandler)

 	// Inference (Anthropic compatibility)
 	r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
@@ -1630,7 +1625,7 @@ func Serve(ln net.Listener) error {
 	slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
 	slog.Info("server config", "env", envconfig.Values())

-	blobsDir, err := manifest.BlobsPath("")
+	blobsDir, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
@@ -1639,7 +1634,7 @@ func Serve(ln net.Listener) error {
 	}

 	if !envconfig.NoPrune() {
-		if _, err := manifest.Manifests(false); err != nil {
+		if _, err := Manifests(false); err != nil {
 			slog.Warn("corrupt manifests detected, skipping prune operation.  Re-pull or delete to clear", "error", err)
 		} else {
 			// clean up unused layers and manifests
@@ -1647,12 +1642,12 @@ func Serve(ln net.Listener) error {
 				return err
 			}

-			manifestsPath, err := manifest.Path()
+			manifestsPath, err := GetManifestPath()
 			if err != nil {
 				return err
 			}

-			if err := manifest.PruneDirectory(manifestsPath); err != nil {
+			if err := PruneDirectory(manifestsPath); err != nil {
 				return err
 			}
 		}
@@ -2524,11 +2519,6 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		}
 	}

-	var images []llm.ImageData
-	for i, imgData := range req.Images {
-		images = append(images, llm.ImageData{ID: i, Data: imgData})
-	}
-
 	var streamStarted bool
 	if err := runner.Completion(c.Request.Context(), llm.CompletionRequest{
 		Prompt: req.Prompt,
@@ -2536,7 +2526,6 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		Height: req.Height,
 		Steps:  req.Steps,
 		Seed:   seed,
-		Images: images,
 	}, func(cr llm.CompletionResponse) {
 		streamStarted = true
 		res := api.GenerateResponse{
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -25,7 +25,6 @@ import (
 	"github.com/ollama/ollama/convert"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/ggml"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 )

@@ -224,15 +223,15 @@ func TestCreateFromModelInheritsRendererParser(t *testing.T) {
 		t.Fatalf("expected status code 200, actual %d", w.Code)
 	}

-	mf, err := manifest.ParseNamedManifest(model.ParseName("child"))
+	manifest, err := ParseNamedManifest(model.ParseName("child"))
 	if err != nil {
 		t.Fatalf("parse manifest: %v", err)
 	}
-	if mf.Config.Digest == "" {
+	if manifest.Config.Digest == "" {
 		t.Fatalf("unexpected empty config digest for child manifest")
 	}

-	configPath, err := manifest.BlobsPath(mf.Config.Digest)
+	configPath, err := GetBlobsPath(manifest.Config.Digest)
 	if err != nil {
 		t.Fatalf("config blob path: %v", err)
 	}
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -10,7 +10,6 @@ import (
 	"github.com/gin-gonic/gin"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 )

@@ -94,13 +93,13 @@ func TestDeleteDuplicateLayers(t *testing.T) {
 		t.Fatal(err)
 	}

-	config, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	config, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		t.Fatal(err)
 	}

 	// create a manifest with duplicate layers
-	if err := manifest.WriteManifest(n, config, []manifest.Layer{config}); err != nil {
+	if err := WriteManifest(n, config, []Layer{config}); err != nil {
 		t.Fatal(err)
 	}

--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -2101,249 +2101,3 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		}
 	})
 }
-
-func TestGenerateUnload(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	var loadFnCalled bool
-
-	s := Server{
-		sched: &Scheduler{
-			pendingReqCh:    make(chan *LlmRequest, 1),
-			finishedReqCh:   make(chan *LlmRequest, 1),
-			expiredCh:       make(chan *runnerRef, 1),
-			unloadedCh:      make(chan any, 1),
-			loaded:          make(map[string]*runnerRef),
-			newServerFn:     newMockServer(&mockRunner{}),
-			getGpuFn:        getGpuFn,
-			getSystemInfoFn: getSystemInfoFn,
-			loadFn: func(req *LlmRequest, _ *ggml.GGML, _ ml.SystemInfo, _ []ml.DeviceInfo, _ bool) bool {
-				loadFnCalled = true
-				req.successCh <- &runnerRef{llama: &mockRunner{}}
-				return false
-			},
-		},
-	}
-
-	go s.sched.Run(t.Context())
-
-	_, digest := createBinFile(t, ggml.KV{
-		"general.architecture":          "llama",
-		"llama.block_count":             uint32(1),
-		"llama.context_length":          uint32(8192),
-		"llama.embedding_length":        uint32(4096),
-		"llama.attention.head_count":    uint32(32),
-		"llama.attention.head_count_kv": uint32(8),
-		"tokenizer.ggml.tokens":         []string{""},
-		"tokenizer.ggml.scores":         []float32{0},
-		"tokenizer.ggml.token_type":     []int32{0},
-	}, []*ggml.Tensor{
-		{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-	})
-
-	w := createRequest(t, s.CreateHandler, api.CreateRequest{
-		Model:  "test",
-		Files:  map[string]string{"file.gguf": digest},
-		Stream: &stream,
-	})
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected status 200, got %d", w.Code)
-	}
-
-	t.Run("unload with empty prompt and keepalive 0", func(t *testing.T) {
-		loadFnCalled = false
-
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:     "test",
-			Prompt:    "",
-			KeepAlive: &api.Duration{Duration: 0},
-			Stream:    &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Errorf("expected status 200, got %d", w.Code)
-		}
-
-		var resp api.GenerateResponse
-		if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
-			t.Fatalf("failed to unmarshal response: %v", err)
-		}
-
-		if resp.DoneReason != "unload" {
-			t.Errorf("expected done_reason 'unload', got %q", resp.DoneReason)
-		}
-
-		if !resp.Done {
-			t.Error("expected done to be true")
-		}
-
-		if loadFnCalled {
-			t.Error("expected model NOT to be loaded for unload request, but loadFn was called")
-		}
-	})
-}
-
-func TestGenerateWithImages(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	mock := mockRunner{
-		CompletionResponse: llm.CompletionResponse{
-			Done:               true,
-			DoneReason:         llm.DoneReasonStop,
-			PromptEvalCount:    1,
-			PromptEvalDuration: 1,
-			EvalCount:          1,
-			EvalDuration:       1,
-		},
-	}
-
-	s := Server{
-		sched: &Scheduler{
-			pendingReqCh:    make(chan *LlmRequest, 1),
-			finishedReqCh:   make(chan *LlmRequest, 1),
-			expiredCh:       make(chan *runnerRef, 1),
-			unloadedCh:      make(chan any, 1),
-			loaded:          make(map[string]*runnerRef),
-			newServerFn:     newMockServer(&mock),
-			getGpuFn:        getGpuFn,
-			getSystemInfoFn: getSystemInfoFn,
-			waitForRecovery: 250 * time.Millisecond,
-			loadFn: func(req *LlmRequest, _ *ggml.GGML, _ ml.SystemInfo, _ []ml.DeviceInfo, _ bool) bool {
-				time.Sleep(time.Millisecond)
-				req.successCh <- &runnerRef{
-					llama: &mock,
-				}
-				return false
-			},
-		},
-	}
-
-	go s.sched.Run(t.Context())
-
-	_, digest := createBinFile(t, ggml.KV{
-		"general.architecture":          "llama",
-		"llama.block_count":             uint32(1),
-		"llama.context_length":          uint32(8192),
-		"llama.embedding_length":        uint32(4096),
-		"llama.attention.head_count":    uint32(32),
-		"llama.attention.head_count_kv": uint32(8),
-		"tokenizer.ggml.tokens":         []string{""},
-		"tokenizer.ggml.scores":         []float32{0},
-		"tokenizer.ggml.token_type":     []int32{0},
-	}, []*ggml.Tensor{
-		{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-		{Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
-	})
-
-	w := createRequest(t, s.CreateHandler, api.CreateRequest{
-		Model:  "test",
-		Files:  map[string]string{"file.gguf": digest},
-		Stream: &stream,
-	})
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected status 200, got %d", w.Code)
-	}
-
-	t.Run("images passed to completion request", func(t *testing.T) {
-		testImage := []byte("test-image-data")
-
-		mock.CompletionResponse.Content = "Image processed"
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:  "test",
-			Prompt: "Describe this image",
-			Images: []api.ImageData{testImage},
-			Stream: &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
-		}
-
-		// Verify images were passed to the completion request
-		if len(mock.CompletionRequest.Images) != 1 {
-			t.Fatalf("expected 1 image in completion request, got %d", len(mock.CompletionRequest.Images))
-		}
-
-		if !bytes.Equal(mock.CompletionRequest.Images[0].Data, testImage) {
-			t.Errorf("image data mismatch in completion request")
-		}
-
-		if mock.CompletionRequest.Images[0].ID != 0 {
-			t.Errorf("expected image ID 0, got %d", mock.CompletionRequest.Images[0].ID)
-		}
-	})
-
-	t.Run("multiple images passed to completion request", func(t *testing.T) {
-		testImage1 := []byte("test-image-1")
-		testImage2 := []byte("test-image-2")
-
-		mock.CompletionResponse.Content = "Images processed"
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:  "test",
-			Prompt: "Compare these images",
-			Images: []api.ImageData{testImage1, testImage2},
-			Stream: &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
-		}
-
-		// Verify both images were passed
-		if len(mock.CompletionRequest.Images) != 2 {
-			t.Fatalf("expected 2 images in completion request, got %d", len(mock.CompletionRequest.Images))
-		}
-
-		if !bytes.Equal(mock.CompletionRequest.Images[0].Data, testImage1) {
-			t.Errorf("first image data mismatch")
-		}
-
-		if !bytes.Equal(mock.CompletionRequest.Images[1].Data, testImage2) {
-			t.Errorf("second image data mismatch")
-		}
-
-		if mock.CompletionRequest.Images[0].ID != 0 || mock.CompletionRequest.Images[1].ID != 1 {
-			t.Errorf("expected image IDs 0 and 1, got %d and %d",
-				mock.CompletionRequest.Images[0].ID, mock.CompletionRequest.Images[1].ID)
-		}
-	})
-
-	t.Run("no images when none provided", func(t *testing.T) {
-		mock.CompletionResponse.Content = "No images"
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:  "test",
-			Prompt: "Hello",
-			Stream: &stream,
-		})
-
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
-		}
-
-		// Verify no images in completion request
-		if len(mock.CompletionRequest.Images) != 0 {
-			t.Fatalf("expected 0 images in completion request, got %d", len(mock.CompletionRequest.Images))
-		}
-	})
-}
--- a/server/sched.go
+++ b/server/sched.go
@@ -571,7 +571,6 @@ func (s *Scheduler) loadImageGen(req *LlmRequest) bool {
 		model:           req.model,
 		modelPath:       req.model.ModelPath,
 		llama:           server,
-		Options:         &req.opts,
 		loading:         false,
 		sessionDuration: sessionDuration,
 		totalSize:       server.TotalSize(),
--- a/server/upload.go
+++ b/server/upload.go
@@ -21,14 +21,12 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
-	"github.com/ollama/ollama/manifest"
-	"github.com/ollama/ollama/types/model"
 )

 var blobUploadManager sync.Map

 type blobUpload struct {
-	manifest.Layer
+	Layer

 	Total     int64
 	Completed atomic.Int64
@@ -53,7 +51,7 @@ const (
 )

 func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
-	p, err := manifest.BlobsPath(b.Digest)
+	p, err := GetBlobsPath(b.Digest)
 	if err != nil {
 		return err
 	}
@@ -61,7 +59,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *reg
 	if b.From != "" {
 		values := requestURL.Query()
 		values.Add("mount", b.Digest)
-		values.Add("from", model.ParseName(b.From).DisplayNamespaceModel())
+		values.Add("from", ParseModelPath(b.From).GetNamespaceRepository())
 		requestURL.RawQuery = values.Encode()
 	}

@@ -130,7 +128,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
 	defer blobUploadManager.Delete(b.Digest)
 	ctx, b.CancelFunc = context.WithCancel(ctx)

-	p, err := manifest.BlobsPath(b.Digest)
+	p, err := GetBlobsPath(b.Digest)
 	if err != nil {
 		b.err = err
 		return
@@ -366,9 +364,9 @@ func (p *progressWriter) Rollback() {
 	p.written = 0
 }

-func uploadBlob(ctx context.Context, n model.Name, layer manifest.Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
-	requestURL := n.BaseURL()
-	requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "blobs", layer.Digest)
+func uploadBlob(ctx context.Context, mp ModelPath, layer Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
+	requestURL := mp.BaseURL()
+	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest)

 	resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, opts)
 	switch {
@@ -390,8 +388,8 @@ func uploadBlob(ctx context.Context, n model.Name, layer manifest.Layer, opts *r
 	data, ok := blobUploadManager.LoadOrStore(layer.Digest, &blobUpload{Layer: layer})
 	upload := data.(*blobUpload)
 	if !ok {
-		requestURL := n.BaseURL()
-		requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "blobs/uploads/")
+		requestURL := mp.BaseURL()
+		requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs/uploads/")
 		if err := upload.Prepare(ctx, requestURL, opts); err != nil {
 			blobUploadManager.Delete(layer.Digest)
 			return err
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -7,7 +7,6 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
-	"net/url"
 	"path/filepath"
 	"strings"
 )
@@ -36,25 +35,22 @@ func Unqualified(n Name) error {
 const MissingPart = "!MISSING!"

 const (
-	defaultHost           = "registry.ollama.ai"
-	defaultNamespace      = "library"
-	defaultTag            = "latest"
-	defaultProtocolScheme = "https"
+	defaultHost      = "registry.ollama.ai"
+	defaultNamespace = "library"
+	defaultTag       = "latest"
 )

 // DefaultName returns a name with the default values for the host, namespace,
-// tag, and protocol scheme parts. The model and digest parts are empty.
+// and tag parts. The model and digest parts are empty.
 //
 //   - The default host is ("registry.ollama.ai")
 //   - The default namespace is ("library")
 //   - The default tag is ("latest")
-//   - The default protocol scheme is ("https")
 func DefaultName() Name {
 	return Name{
-		Host:           defaultHost,
-		Namespace:      defaultNamespace,
-		Tag:            defaultTag,
-		ProtocolScheme: defaultProtocolScheme,
+		Host:      defaultHost,
+		Namespace: defaultNamespace,
+		Tag:       defaultTag,
 	}
 }

@@ -91,11 +87,10 @@ func (k partKind) String() string {
 // It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
 // is valid.
 type Name struct {
-	Host           string
-	Namespace      string
-	Model          string
-	Tag            string
-	ProtocolScheme string
+	Host      string
+	Namespace string
+	Model     string
+	Tag       string
 }

 // ParseName parses and assembles a Name from a name string. The
@@ -165,9 +160,7 @@ func ParseNameBare(s string) Name {
 	}

 	scheme, host, ok := strings.Cut(s, "://")
-	if ok {
-		n.ProtocolScheme = scheme
-	} else {
+	if !ok {
 		host = scheme
 	}
 	n.Host = host
@@ -196,13 +189,12 @@ func ParseNameFromFilepath(s string) (n Name) {
 	return n
 }

-// Merge merges the host, namespace, tag, and protocol scheme parts of the two names,
+// Merge merges the host, namespace, and tag parts of the two names,
 // preferring the non-empty parts of a.
 func Merge(a, b Name) Name {
 	a.Host = cmp.Or(a.Host, b.Host)
 	a.Namespace = cmp.Or(a.Namespace, b.Namespace)
 	a.Tag = cmp.Or(a.Tag, b.Tag)
-	a.ProtocolScheme = cmp.Or(a.ProtocolScheme, b.ProtocolScheme)
 	return a
 }

@@ -313,23 +305,6 @@ func (n Name) EqualFold(o Name) bool {
 		strings.EqualFold(n.Tag, o.Tag)
 }

-// BaseURL returns the base URL for the registry.
-func (n Name) BaseURL() *url.URL {
-	return &url.URL{
-		Scheme: n.ProtocolScheme,
-		Host:   n.Host,
-	}
-}
-
-// DisplayNamespaceModel returns the namespace and model joined by "/".
-func (n Name) DisplayNamespaceModel() string {
-	var b strings.Builder
-	b.WriteString(n.Namespace)
-	b.WriteByte('/')
-	b.WriteString(n.Model)
-	return b.String()
-}
-
 func isValidLen(kind partKind, s string) bool {
 	switch kind {
 	case kindHost:
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -32,11 +32,10 @@ func TestParseNameParts(t *testing.T) {
 		{
 			in: "scheme://host:port/namespace/model:tag",
 			want: Name{
-				Host:           "host:port",
-				Namespace:      "namespace",
-				Model:          "model",
-				Tag:            "tag",
-				ProtocolScheme: "scheme",
+				Host:      "host:port",
+				Namespace: "namespace",
+				Model:     "model",
+				Tag:       "tag",
 			},
 			wantFilepath: filepath.Join("host:port", "namespace", "model", "tag"),
 		},
--- a/x/create/client/create.go
+++ b/x/create/client/create.go
@@ -12,8 +12,8 @@ import (
 	"fmt"
 	"io"

-	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/progress"
+	"github.com/ollama/ollama/server"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/x/create"
 )
@@ -103,7 +103,7 @@ func CreateModel(opts CreateOptions, p *progress.Progress) error {
 // newLayerCreator returns a LayerCreator callback for creating config/JSON layers.
 func newLayerCreator() create.LayerCreator {
 	return func(r io.Reader, mediaType, name string) (create.LayerInfo, error) {
-		layer, err := manifest.NewLayer(r, mediaType)
+		layer, err := server.NewLayer(r, mediaType)
 		if err != nil {
 			return create.LayerInfo{}, err
 		}
@@ -141,13 +141,13 @@ func createQuantizedLayers(r io.Reader, name, dtype string, shape []int32, quant
 	}

 	// Create layer for quantized weight
-	weightLayer, err := manifest.NewLayer(bytes.NewReader(qweightData), manifest.MediaTypeImageTensor)
+	weightLayer, err := server.NewLayer(bytes.NewReader(qweightData), server.MediaTypeImageTensor)
 	if err != nil {
 		return nil, err
 	}

 	// Create layer for scales
-	scalesLayer, err := manifest.NewLayer(bytes.NewReader(scalesData), manifest.MediaTypeImageTensor)
+	scalesLayer, err := server.NewLayer(bytes.NewReader(scalesData), server.MediaTypeImageTensor)
 	if err != nil {
 		return nil, err
 	}
@@ -169,7 +169,7 @@ func createQuantizedLayers(r io.Reader, name, dtype string, shape []int32, quant

 	// Add qbiases layer if present (affine mode)
 	if qbiasData != nil {
-		qbiasLayer, err := manifest.NewLayer(bytes.NewReader(qbiasData), manifest.MediaTypeImageTensor)
+		qbiasLayer, err := server.NewLayer(bytes.NewReader(qbiasData), server.MediaTypeImageTensor)
 		if err != nil {
 			return nil, err
 		}
@@ -186,7 +186,7 @@ func createQuantizedLayers(r io.Reader, name, dtype string, shape []int32, quant

 // createUnquantizedLayer creates a single tensor layer without quantization.
 func createUnquantizedLayer(r io.Reader, name string) ([]create.LayerInfo, error) {
-	layer, err := manifest.NewLayer(r, manifest.MediaTypeImageTensor)
+	layer, err := server.NewLayer(r, server.MediaTypeImageTensor)
 	if err != nil {
 		return nil, err
 	}
@@ -221,15 +221,15 @@ func newManifestWriter(opts CreateOptions, capabilities []string) create.Manifes
 		}

 		// Create config layer blob
-		configLayer, err := manifest.NewLayer(bytes.NewReader(configJSON), "application/vnd.docker.container.image.v1+json")
+		configLayer, err := server.NewLayer(bytes.NewReader(configJSON), "application/vnd.docker.container.image.v1+json")
 		if err != nil {
 			return fmt.Errorf("failed to create config layer: %w", err)
 		}

-		// Convert LayerInfo to manifest.Layer
-		manifestLayers := make([]manifest.Layer, 0, len(layers))
+		// Convert LayerInfo to server.Layer
+		serverLayers := make([]server.Layer, 0, len(layers))
 		for _, l := range layers {
-			manifestLayers = append(manifestLayers, manifest.Layer{
+			serverLayers = append(serverLayers, server.Layer{
 				MediaType: l.MediaType,
 				Digest:    l.Digest,
 				Size:      l.Size,
@@ -243,19 +243,19 @@ func newManifestWriter(opts CreateOptions, capabilities []string) create.Manifes
 			if err != nil {
 				return err
 			}
-			manifestLayers = append(manifestLayers, modelfileLayers...)
+			serverLayers = append(serverLayers, modelfileLayers...)
 		}

-		return manifest.WriteManifest(name, configLayer, manifestLayers)
+		return server.WriteManifest(name, configLayer, serverLayers)
 	}
 }

 // createModelfileLayers creates layers for template, system, and license from Modelfile config.
-func createModelfileLayers(mf *ModelfileConfig) ([]manifest.Layer, error) {
-	var layers []manifest.Layer
+func createModelfileLayers(mf *ModelfileConfig) ([]server.Layer, error) {
+	var layers []server.Layer

 	if mf.Template != "" {
-		layer, err := manifest.NewLayer(bytes.NewReader([]byte(mf.Template)), "application/vnd.ollama.image.template")
+		layer, err := server.NewLayer(bytes.NewReader([]byte(mf.Template)), "application/vnd.ollama.image.template")
 		if err != nil {
 			return nil, fmt.Errorf("failed to create template layer: %w", err)
 		}
@@ -263,7 +263,7 @@ func createModelfileLayers(mf *ModelfileConfig) ([]manifest.Layer, error) {
 	}

 	if mf.System != "" {
-		layer, err := manifest.NewLayer(bytes.NewReader([]byte(mf.System)), "application/vnd.ollama.image.system")
+		layer, err := server.NewLayer(bytes.NewReader([]byte(mf.System)), "application/vnd.ollama.image.system")
 		if err != nil {
 			return nil, fmt.Errorf("failed to create system layer: %w", err)
 		}
@@ -271,7 +271,7 @@ func createModelfileLayers(mf *ModelfileConfig) ([]manifest.Layer, error) {
 	}

 	if mf.License != "" {
-		layer, err := manifest.NewLayer(bytes.NewReader([]byte(mf.License)), "application/vnd.ollama.image.license")
+		layer, err := server.NewLayer(bytes.NewReader([]byte(mf.License)), "application/vnd.ollama.image.license")
 		if err != nil {
 			return nil, fmt.Errorf("failed to create license layer: %w", err)
 		}
--- a/x/create/client/quantize.go
+++ b/x/create/client/quantize.go
@@ -16,6 +16,11 @@ import (
 // Supported quantization types: "fp8" (affine 8-bit)
 // Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
 func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
+	// Lazy init MLX when needed for quantization
+	if err := mlx.InitMLX(); err != nil {
+		return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err)
+	}
+
 	tmpDir := ensureTempDir()

 	// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)
@@ -54,9 +59,6 @@ func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize str
 	// Quantize based on quantization type
 	var qweight, scales, qbiases *mlx.Array
 	switch quantize {
-	case "fp4":
-		// affine mode: group_size=32, bits=4
-		qweight, scales, qbiases = mlx.Quantize(arr, 32, 4, "affine")
 	case "fp8":
 		// affine mode: group_size=32, bits=8
 		qweight, scales, qbiases = mlx.Quantize(arr, 32, 8, "affine")
--- a/x/create/imagegen.go
+++ b/x/create/imagegen.go
@@ -20,10 +20,10 @@ import (
 func CreateImageGenModel(modelName, modelDir, quantize string, createLayer LayerCreator, createTensorLayer QuantizingTensorLayerCreator, writeManifest ManifestWriter, fn func(status string)) error {
 	// Validate quantization type
 	switch quantize {
-	case "", "fp4", "fp8":
+	case "", "fp8":
 		// valid
 	default:
-		return fmt.Errorf("unsupported quantization type %q: supported types are fp4, fp8", quantize)
+		return fmt.Errorf("unsupported quantization type %q: supported types are fp8", quantize)
 	}

 	var layers []LayerInfo
--- a/x/imagegen/cache/step.go
+++ b/x/imagegen/cache/step.go
@@ -9,7 +9,7 @@ import "github.com/ollama/ollama/x/imagegen/mlx"
 // shallow layers change little between consecutive steps, so we can
 // cache their outputs and skip recomputation on non-refresh steps.
 //
-// Supports both single-stream and dual-stream architectures:
+// Supports both single-stream (Z-Image) and dual-stream (Qwen-Image) architectures:
 //   - Single-stream: use Get/Set for the single output per layer
 //   - Dual-stream: use Get/Set for stream 1 (imgH), Get2/Set2 for stream 2 (txtH)
 //
@@ -87,7 +87,7 @@ func (c *StepCache) Set(layer int, arr *mlx.Array) {
 }

 // Get2 returns the cached output for a layer (stream 2), or nil if not cached.
-// Used for dual-stream architectures.
+// Used for dual-stream architectures like Qwen-Image.
 func (c *StepCache) Get2(layer int) *mlx.Array {
 	if layer < len(c.layers2) {
 		return c.layers2[layer]
@@ -96,7 +96,7 @@ func (c *StepCache) Get2(layer int) *mlx.Array {
 }

 // Set2 stores a layer output (stream 2), freeing any previous value.
-// Used for dual-stream architectures.
+// Used for dual-stream architectures like Qwen-Image.
 func (c *StepCache) Set2(layer int, arr *mlx.Array) {
 	if layer < len(c.layers2) {
 		if c.layers2[layer] != nil {
--- a/x/imagegen/cli.go
+++ b/x/imagegen/cli.go
@@ -10,10 +10,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"net/http"
 	"os"
-	"regexp"
-	"slices"
 	"strconv"
 	"strings"
 	"time"
@@ -78,7 +75,6 @@ Image Generation Flags (experimental):
 // RunCLI handles the CLI for image generation models.
 // Returns true if it handled the request, false if the caller should continue with normal flow.
 // Supports flags: --width, --height, --steps, --seed, --negative
-// Image paths can be included in the prompt and will be extracted automatically.
 func RunCLI(cmd *cobra.Command, name string, prompt string, interactive bool, keepAlive *api.Duration) error {
 	// Get options from flags (with env var defaults)
 	opts := DefaultOptions()
@@ -115,16 +111,9 @@ func generateImageWithOptions(cmd *cobra.Command, modelName, prompt string, keep
 		return err
 	}

-	// Extract any image paths from the prompt
-	prompt, images, err := extractFileData(prompt)
-	if err != nil {
-		return err
-	}
-
 	req := &api.GenerateRequest{
 		Model:  modelName,
 		Prompt: prompt,
-		Images: images,
 		Width:  int32(opts.Width),
 		Height: int32(opts.Height),
 		Steps:  int32(opts.Steps),
@@ -265,33 +254,14 @@ func runInteractive(cmd *cobra.Command, modelName string, keepAlive *api.Duratio
 			printCurrentSettings(opts)
 			continue
 		case strings.HasPrefix(line, "/"):
-			// Check if it's a file path, not a command
-			args := strings.Fields(line)
-			isFile := false
-			for _, f := range extractFileNames(line) {
-				if strings.HasPrefix(f, args[0]) {
-					isFile = true
-					break
-				}
-			}
-			if !isFile {
-				fmt.Fprintf(os.Stderr, "Unknown command: %s (try /help)\n", args[0])
-				continue
-			}
-		}
-
-		// Extract any image paths from the input
-		prompt, images, err := extractFileData(line)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+			fmt.Fprintf(os.Stderr, "Unknown command: %s (try /help)\n", line)
 			continue
 		}

 		// Generate image with current options
 		req := &api.GenerateRequest{
 			Model:  modelName,
-			Prompt: prompt,
-			Images: images,
+			Prompt: line,
 			Width:  int32(opts.Width),
 			Height: int32(opts.Height),
 			Steps:  int32(opts.Steps),
@@ -516,59 +486,3 @@ func displayImageInTerminal(imagePath string) bool {
 		return false
 	}
 }
-
-// extractFileNames finds image file paths in the input string.
-func extractFileNames(input string) []string {
-	// Regex to match file paths with image extensions
-	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp)\b`
-	re := regexp.MustCompile(regexPattern)
-	return re.FindAllString(input, -1)
-}
-
-// extractFileData extracts image data from file paths found in the input.
-// Returns the cleaned prompt (with file paths removed) and the image data.
-func extractFileData(input string) (string, []api.ImageData, error) {
-	filePaths := extractFileNames(input)
-	var imgs []api.ImageData
-
-	for _, fp := range filePaths {
-		// Normalize escaped spaces
-		nfp := strings.ReplaceAll(fp, "\\ ", " ")
-		nfp = strings.ReplaceAll(nfp, "%20", " ")
-
-		data, err := getImageData(nfp)
-		if errors.Is(err, os.ErrNotExist) {
-			continue
-		} else if err != nil {
-			return "", nil, err
-		}
-		fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
-		input = strings.ReplaceAll(input, fp, "")
-		imgs = append(imgs, data)
-	}
-	return strings.TrimSpace(input), imgs, nil
-}
-
-// getImageData reads and validates image data from a file.
-func getImageData(filePath string) ([]byte, error) {
-	file, err := os.Open(filePath)
-	if err != nil {
-		return nil, err
-	}
-	defer file.Close()
-
-	buf := make([]byte, 512)
-	_, err = file.Read(buf)
-	if err != nil {
-		return nil, err
-	}
-
-	contentType := http.DetectContentType(buf)
-	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp"}
-	if !slices.Contains(allowedTypes, contentType) {
-		return nil, fmt.Errorf("invalid image type: %s", contentType)
-	}
-
-	// Re-read the full file
-	return os.ReadFile(filePath)
-}
--- a/x/imagegen/cmd/engine/main.go
+++ b/x/imagegen/cmd/engine/main.go
@@ -7,20 +7,17 @@ import (
 	"encoding/json"
 	"flag"
 	"fmt"
-	"image"
-	_ "image/jpeg"
-	_ "image/png"
 	"log"
 	"os"
 	"path/filepath"
 	"runtime/pprof"

-	"github.com/ollama/ollama/x/imagegen"
 	"github.com/ollama/ollama/x/imagegen/mlx"
-	"github.com/ollama/ollama/x/imagegen/models/flux2"
 	"github.com/ollama/ollama/x/imagegen/models/gemma3"
 	"github.com/ollama/ollama/x/imagegen/models/gpt_oss"
 	"github.com/ollama/ollama/x/imagegen/models/llama"
+	"github.com/ollama/ollama/x/imagegen/models/qwen_image"
+	"github.com/ollama/ollama/x/imagegen/models/qwen_image_edit"
 	"github.com/ollama/ollama/x/imagegen/models/zimage"
 	"github.com/ollama/ollama/x/imagegen/safetensors"
 )
@@ -49,8 +46,8 @@ func main() {
 	imagePath := flag.String("image", "", "Image path for multimodal models")

 	// Image generation params
-	width := flag.Int("width", 0, "Image width (0 = auto from input or 1024)")
-	height := flag.Int("height", 0, "Image height (0 = auto from input or 1024)")
+	width := flag.Int("width", 1024, "Image width")
+	height := flag.Int("height", 1024, "Image height")
 	steps := flag.Int("steps", 0, "Denoising steps (0 = model default)")
 	seed := flag.Int64("seed", 42, "Random seed")
 	out := flag.String("output", "output.png", "Output path")
@@ -59,11 +56,13 @@ func main() {
 	listTensors := flag.Bool("list", false, "List tensors only")
 	cpuProfile := flag.String("cpuprofile", "", "Write CPU profile to file")
 	gpuCapture := flag.String("gpu-capture", "", "Capture GPU trace to .gputrace file (run with MTL_CAPTURE_ENABLED=1)")
+	layerCache := flag.Bool("layer-cache", false, "Enable layer caching for faster diffusion (Z-Image, Qwen-Image). Not compatible with CFG/negative prompts.")
 	wiredLimitGB := flag.Int("wired-limit", 32, "Metal wired memory limit in GB")

 	// Legacy mode flags
 	zimageFlag := flag.Bool("zimage", false, "Z-Image generation")
-	flux2Flag := flag.Bool("flux2", false, "FLUX.2 Klein generation")
+	qwenImage := flag.Bool("qwen-image", false, "Qwen-Image text-to-image generation")
+	qwenImageEdit := flag.Bool("qwen-image-edit", false, "Qwen-Image-Edit image editing")
 	var inputImages stringSlice
 	flag.Var(&inputImages, "input-image", "Input image for image editing (can be specified multiple times)")
 	negativePrompt := flag.String("negative-prompt", "", "Negative prompt for CFG (empty = no CFG, matching Python)")
@@ -123,44 +122,60 @@ func main() {
 		if err == nil {
 			err = saveImageArray(img, *out)
 		}
-	case *flux2Flag:
-		m := &flux2.Model{}
-		if loadErr := m.Load(*modelPath); loadErr != nil {
+	case *qwenImage:
+		m, loadErr := qwen_image.LoadPersistent(*modelPath)
+		if loadErr != nil {
 			log.Fatal(loadErr)
 		}
-		// Load input images with EXIF orientation correction
-		var loadedImages []image.Image
-		for _, path := range inputImages {
-			img, loadErr := loadImageWithEXIF(path)
-			if loadErr != nil {
-				log.Fatalf("Failed to load image %s: %v", path, loadErr)
-			}
-			loadedImages = append(loadedImages, img)
-		}
-		// When input images provided and user didn't override dimensions, use 0 to match input
-		fluxWidth := int32(*width)
-		fluxHeight := int32(*height)
-		if len(loadedImages) > 0 && *width == 0 && *height == 0 {
-			// Both unset, will auto-detect from input
-		} else if len(loadedImages) > 0 && *width == 0 {
-			fluxWidth = 0 // Compute from height + aspect ratio
-		} else if len(loadedImages) > 0 && *height == 0 {
-			fluxHeight = 0 // Compute from width + aspect ratio
-		}
 		var img *mlx.Array
-		img, err = m.GenerateFromConfig(context.Background(), &flux2.GenerateConfig{
-			Prompt:        *prompt,
-			Width:         fluxWidth,
-			Height:        fluxHeight,
-			Steps:         *steps,
-			GuidanceScale: float32(*cfgScale),
-			Seed:          *seed,
-			CapturePath:   *gpuCapture,
-			InputImages:   loadedImages,
+		img, err = m.GenerateFromConfig(&qwen_image.GenerateConfig{
+			Prompt:         *prompt,
+			NegativePrompt: *negativePrompt,
+			CFGScale:       float32(*cfgScale),
+			Width:          int32(*width),
+			Height:         int32(*height),
+			Steps:          *steps,
+			Seed:           *seed,
+			LayerCache:     *layerCache,
 		})
 		if err == nil {
 			err = saveImageArray(img, *out)
 		}
+	case *qwenImageEdit:
+		if len(inputImages) == 0 {
+			log.Fatal("qwen-image-edit requires at least one -input-image")
+		}
+
+		m, loadErr := qwen_image_edit.LoadPersistent(*modelPath)
+		if loadErr != nil {
+			log.Fatal(loadErr)
+		}
+		// For image editing, use 0 for dimensions to auto-detect from input image
+		// unless explicitly overridden from defaults
+		editWidth := int32(0)
+		editHeight := int32(0)
+		if *width != 1024 {
+			editWidth = int32(*width)
+		}
+		if *height != 1024 {
+			editHeight = int32(*height)
+		}
+
+		cfg := &qwen_image_edit.GenerateConfig{
+			Prompt:         *prompt,
+			NegativePrompt: *negativePrompt,
+			CFGScale:       float32(*cfgScale),
+			Width:          editWidth,
+			Height:         editHeight,
+			Steps:          *steps,
+			Seed:           *seed,
+		}
+
+		var img *mlx.Array
+		img, err = m.EditFromConfig(inputImages, cfg)
+		if err == nil {
+			err = saveImageArray(img, *out)
+		}
 	case *listTensors:
 		err = listModelTensors(*modelPath)
 	default:
@@ -261,8 +276,6 @@ func detectModelKind(modelPath string) (string, error) {
 			switch index.ClassName {
 			case "FluxPipeline", "ZImagePipeline":
 				return "zimage", nil
-			case "Flux2KleinPipeline":
-				return "flux2", nil
 			}
 		}
 		return "zimage", nil
@@ -283,12 +296,3 @@ func detectModelKind(modelPath string) (string, error) {

 	return cfg.ModelType, nil
 }
-
-// loadImageWithEXIF loads an image from a file path with EXIF orientation correction.
-func loadImageWithEXIF(path string) (image.Image, error) {
-	data, err := os.ReadFile(path)
-	if err != nil {
-		return nil, fmt.Errorf("read file: %w", err)
-	}
-	return imagegen.DecodeImage(data)
-}
--- a/x/imagegen/image.go
+++ b/x/imagegen/image.go
@@ -7,7 +7,6 @@ import (
 	"encoding/base64"
 	"fmt"
 	"image"
-	_ "image/jpeg"
 	"image/png"
 	"os"
 	"path/filepath"
@@ -109,160 +108,3 @@ func clampF(v, min, max float32) float32 {
 	}
 	return v
 }
-
-// DecodeImage decodes image bytes with EXIF orientation applied.
-func DecodeImage(data []byte) (image.Image, error) {
-	orientation := readJPEGOrientation(data)
-
-	img, _, err := image.Decode(bytes.NewReader(data))
-	if err != nil {
-		return nil, err
-	}
-
-	return applyOrientation(img, orientation), nil
-}
-
-// readJPEGOrientation extracts EXIF orientation from JPEG bytes.
-// Returns 1 (normal) for non-JPEG or if orientation not found.
-func readJPEGOrientation(data []byte) int {
-	if len(data) < 2 || data[0] != 0xFF || data[1] != 0xD8 {
-		return 1 // Not JPEG
-	}
-
-	r := bytes.NewReader(data[2:])
-	for {
-		var marker [2]byte
-		if _, err := r.Read(marker[:]); err != nil || marker[0] != 0xFF {
-			return 1
-		}
-
-		if marker[1] == 0xE1 { // APP1 (EXIF)
-			var lenBytes [2]byte
-			if _, err := r.Read(lenBytes[:]); err != nil {
-				return 1
-			}
-			segLen := int(uint16(lenBytes[0])<<8|uint16(lenBytes[1])) - 2
-			if segLen < 14 {
-				r.Seek(int64(segLen), 1)
-				continue
-			}
-			seg := make([]byte, segLen)
-			if _, err := r.Read(seg); err != nil {
-				return 1
-			}
-			if string(seg[:4]) == "Exif" && seg[4] == 0 && seg[5] == 0 {
-				return parseTIFFOrientation(seg[6:])
-			}
-			continue
-		}
-
-		if marker[1] == 0xD9 || marker[1] == 0xDA {
-			return 1 // EOI or SOS
-		}
-		if marker[1] >= 0xD0 && marker[1] <= 0xD7 {
-			continue // RST markers
-		}
-
-		var lenBytes [2]byte
-		if _, err := r.Read(lenBytes[:]); err != nil {
-			return 1
-		}
-		segLen := int(uint16(lenBytes[0])<<8|uint16(lenBytes[1])) - 2
-		if segLen > 0 {
-			r.Seek(int64(segLen), 1)
-		}
-	}
-}
-
-func parseTIFFOrientation(tiff []byte) int {
-	if len(tiff) < 8 {
-		return 1
-	}
-
-	var big bool
-	switch string(tiff[:2]) {
-	case "MM":
-		big = true
-	case "II":
-		big = false
-	default:
-		return 1
-	}
-
-	u16 := func(b []byte) uint16 {
-		if big {
-			return uint16(b[0])<<8 | uint16(b[1])
-		}
-		return uint16(b[1])<<8 | uint16(b[0])
-	}
-	u32 := func(b []byte) uint32 {
-		if big {
-			return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
-		}
-		return uint32(b[3])<<24 | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[0])
-	}
-
-	if u16(tiff[2:4]) != 42 {
-		return 1
-	}
-
-	ifdOffset := u32(tiff[4:8])
-	if int(ifdOffset)+2 > len(tiff) {
-		return 1
-	}
-
-	numEntries := u16(tiff[ifdOffset : ifdOffset+2])
-	for i := range int(numEntries) {
-		offset := ifdOffset + 2 + uint32(i)*12
-		if int(offset)+12 > len(tiff) {
-			break
-		}
-		if u16(tiff[offset:offset+2]) == 0x0112 { // Orientation tag
-			o := int(u16(tiff[offset+8 : offset+10]))
-			if o >= 1 && o <= 8 {
-				return o
-			}
-			return 1
-		}
-	}
-	return 1
-}
-
-func applyOrientation(img image.Image, orientation int) image.Image {
-	if orientation <= 1 || orientation > 8 {
-		return img
-	}
-
-	bounds := img.Bounds()
-	w, h := bounds.Dx(), bounds.Dy()
-
-	outW, outH := w, h
-	if orientation >= 5 {
-		outW, outH = h, w
-	}
-
-	out := image.NewRGBA(image.Rect(0, 0, outW, outH))
-	for y := range h {
-		for x := range w {
-			var dx, dy int
-			switch orientation {
-			case 2:
-				dx, dy = w-1-x, y
-			case 3:
-				dx, dy = w-1-x, h-1-y
-			case 4:
-				dx, dy = x, h-1-y
-			case 5:
-				dx, dy = y, x
-			case 6:
-				dx, dy = h-1-y, x
-			case 7:
-				dx, dy = h-1-y, w-1-x
-			case 8:
-				dx, dy = y, w-1-x
-			}
-			out.Set(dx, dy, img.At(x+bounds.Min.X, y+bounds.Min.Y))
-		}
-	}
-	return out
-}
--- a/x/imagegen/manifest.go
+++ b/x/imagegen/manifest.go
@@ -6,9 +6,8 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"runtime"
 	"strings"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // ManifestLayer represents a layer in the manifest.
@@ -33,15 +32,31 @@ type ModelManifest struct {
 	BlobDir  string
 }

+// DefaultBlobDir returns the default blob storage directory.
 func DefaultBlobDir() string {
-	return filepath.Join(envconfig.Models(), "blobs")
+	home, err := os.UserHomeDir()
+	if err != nil {
+		home = "."
+	}
+	switch runtime.GOOS {
+	case "darwin":
+		return filepath.Join(home, ".ollama", "models", "blobs")
+	case "linux":
+		return filepath.Join(home, ".ollama", "models", "blobs")
+	case "windows":
+		return filepath.Join(home, ".ollama", "models", "blobs")
+	default:
+		return filepath.Join(home, ".ollama", "models", "blobs")
+	}
 }

-// DefaultManifestDir returns the manifest storage directory.
-// Respects OLLAMA_MODELS.
-
+// DefaultManifestDir returns the default manifest storage directory.
 func DefaultManifestDir() string {
-	return filepath.Join(envconfig.Models(), "manifests")
+	home, err := os.UserHomeDir()
+	if err != nil {
+		home = "."
+	}
+	return filepath.Join(home, ".ollama", "models", "manifests")
 }

 // LoadManifest loads a manifest for the given model name.
@@ -161,17 +176,6 @@ func (m *ModelManifest) HasTensorLayers() bool {
 	return false
 }

-// TotalTensorSize returns the total size in bytes of all tensor layers.
-func (m *ModelManifest) TotalTensorSize() int64 {
-	var total int64
-	for _, layer := range m.Manifest.Layers {
-		if layer.MediaType == "application/vnd.ollama.image.tensor" {
-			total += layer.Size
-		}
-	}
-	return total
-}
-
 // ModelInfo contains metadata about an image generation model.
 type ModelInfo struct {
 	Architecture   string
--- a/x/imagegen/manifest_test.go
+++ b/x/imagegen/manifest_test.go
@@ -1,57 +0,0 @@
-package imagegen
-
-import (
-	"path/filepath"
-	"testing"
-)
-
-func TestTotalTensorSize(t *testing.T) {
-	m := &ModelManifest{
-		Manifest: &Manifest{
-			Layers: []ManifestLayer{
-				{MediaType: "application/vnd.ollama.image.tensor", Size: 1000},
-				{MediaType: "application/vnd.ollama.image.tensor", Size: 2000},
-				{MediaType: "application/vnd.ollama.image.json", Size: 500}, // not a tensor
-				{MediaType: "application/vnd.ollama.image.tensor", Size: 3000},
-			},
-		},
-	}
-
-	got := m.TotalTensorSize()
-	want := int64(6000)
-	if got != want {
-		t.Errorf("TotalTensorSize() = %d, want %d", got, want)
-	}
-}
-
-func TestTotalTensorSizeEmpty(t *testing.T) {
-	m := &ModelManifest{
-		Manifest: &Manifest{
-			Layers: []ManifestLayer{},
-		},
-	}
-
-	if got := m.TotalTensorSize(); got != 0 {
-		t.Errorf("TotalTensorSize() = %d, want 0", got)
-	}
-}
-
-func TestManifestAndBlobDirsRespectOLLAMAModels(t *testing.T) {
-	modelsDir := filepath.Join(t.TempDir(), "models")
-
-	// Simulate packaged/systemd environment
-	t.Setenv("OLLAMA_MODELS", modelsDir)
-	t.Setenv("HOME", "/usr/share/ollama")
-
-	// Manifest dir must respect OLLAMA_MODELS
-	wantManifest := filepath.Join(modelsDir, "manifests")
-	if got := DefaultManifestDir(); got != wantManifest {
-		t.Fatalf("DefaultManifestDir() = %q, want %q", got, wantManifest)
-	}
-
-	// Blob dir must respect OLLAMA_MODELS
-	wantBlobs := filepath.Join(modelsDir, "blobs")
-	if got := DefaultBlobDir(); got != wantBlobs {
-		t.Fatalf("DefaultBlobDir() = %q, want %q", got, wantBlobs)
-	}
-}
--- a/x/imagegen/memory.go
+++ b/x/imagegen/memory.go
@@ -16,9 +16,19 @@ import (
 	"runtime"
 )

+// GB is a convenience constant for gigabytes.
+const GB = 1024 * 1024 * 1024
+
 // SupportedBackends lists the backends that support image generation.
 var SupportedBackends = []string{"metal", "cuda", "cpu"}

+// modelVRAMEstimates maps pipeline class names to their estimated VRAM requirements.
+var modelVRAMEstimates = map[string]uint64{
+	"ZImagePipeline":    21 * GB, // ~21GB for Z-Image (text encoder + transformer + VAE)
+	"FluxPipeline":      21 * GB, // ~21GB for Flux (same architecture)
+	"QwenImagePipeline": 80 * GB, // TODO: verify actual requirements, using conservative estimate for now
+}
+
 // CheckPlatformSupport validates that image generation is supported on the current platform.
 // Returns nil if supported, or an error describing why it's not supported.
 func CheckPlatformSupport() error {
@@ -38,6 +48,17 @@ func CheckPlatformSupport() error {
 	}
 }

+// CheckMemoryRequirements validates that there's enough memory for image generation.
+// Returns nil if memory is sufficient, or an error if not.
+func CheckMemoryRequirements(modelName string, availableMemory uint64) error {
+	required := EstimateVRAM(modelName)
+	if availableMemory < required {
+		return fmt.Errorf("insufficient memory for image generation: need %d GB, have %d GB",
+			required/GB, availableMemory/GB)
+	}
+	return nil
+}
+
 // ResolveModelName checks if a model name is a known image generation model.
 // Returns the normalized model name if found, empty string otherwise.
 func ResolveModelName(modelName string) string {
@@ -48,31 +69,29 @@ func ResolveModelName(modelName string) string {
 	return ""
 }

-// DetectModelType reads model_index.json and returns the model type.
-// Checks both "architecture" (Ollama format) and "_class_name" (diffusers format).
-// Returns empty string if detection fails.
-func DetectModelType(modelName string) string {
+// EstimateVRAM returns the estimated VRAM needed for an image generation model.
+// Returns a conservative default of 21GB if the model type cannot be determined.
+func EstimateVRAM(modelName string) uint64 {
 	manifest, err := LoadManifest(modelName)
 	if err != nil {
-		return ""
+		return 21 * GB
 	}

 	data, err := manifest.ReadConfig("model_index.json")
 	if err != nil {
-		return ""
+		return 21 * GB
 	}

+	// Parse just the class name
 	var index struct {
-		Architecture string `json:"architecture"`
-		ClassName    string `json:"_class_name"`
+		ClassName string `json:"_class_name"`
 	}
 	if err := json.Unmarshal(data, &index); err != nil {
-		return ""
+		return 21 * GB
 	}

-	// Prefer architecture (Ollama format), fall back to _class_name (diffusers)
-	if index.Architecture != "" {
-		return index.Architecture
+	if estimate, ok := modelVRAMEstimates[index.ClassName]; ok {
+		return estimate
 	}
-	return index.ClassName
+	return 21 * GB
 }
--- a/x/imagegen/memory_test.go
+++ b/x/imagegen/memory_test.go
@@ -30,6 +30,70 @@ func TestCheckPlatformSupport(t *testing.T) {
 	}
 }

+func TestCheckMemoryRequirements(t *testing.T) {
+	tests := []struct {
+		name            string
+		availableMemory uint64
+		wantErr         bool
+	}{
+		{
+			name:            "sufficient memory",
+			availableMemory: 32 * GB,
+			wantErr:         false,
+		},
+		{
+			name:            "exactly enough memory",
+			availableMemory: 21 * GB,
+			wantErr:         false,
+		},
+		{
+			name:            "insufficient memory",
+			availableMemory: 16 * GB,
+			wantErr:         true,
+		},
+		{
+			name:            "zero memory",
+			availableMemory: 0,
+			wantErr:         true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Use a non-existent model name which will default to 21GB estimate
+			err := CheckMemoryRequirements("nonexistent-model", tt.availableMemory)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("CheckMemoryRequirements() error = %v, wantErr %v", err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestModelVRAMEstimates(t *testing.T) {
+	// Verify the VRAM estimates map has expected entries
+	expected := map[string]uint64{
+		"ZImagePipeline":    21 * GB,
+		"FluxPipeline":      21 * GB,
+		"QwenImagePipeline": 80 * GB,
+	}
+
+	for name, expectedVRAM := range expected {
+		if actual, ok := modelVRAMEstimates[name]; !ok {
+			t.Errorf("Missing VRAM estimate for %s", name)
+		} else if actual != expectedVRAM {
+			t.Errorf("VRAM estimate for %s = %d GB, want %d GB", name, actual/GB, expectedVRAM/GB)
+		}
+	}
+}
+
+func TestEstimateVRAMDefault(t *testing.T) {
+	// Non-existent model should return default 21GB
+	vram := EstimateVRAM("nonexistent-model-that-does-not-exist")
+	if vram != 21*GB {
+		t.Errorf("EstimateVRAM() = %d GB, want 21 GB", vram/GB)
+	}
+}
+
 func TestResolveModelName(t *testing.T) {
 	// Non-existent model should return empty string
 	result := ResolveModelName("nonexistent-model")
--- a/x/imagegen/mlx/mlx.go
+++ b/x/imagegen/mlx/mlx.go
@@ -1137,27 +1137,6 @@ func RMSNormNoWeight(x *Array, eps float32) *Array {
 	return RMSNorm(x, ones, eps)
 }

-// LayerNorm applies layer normalization without learnable params
-// (x - mean) / sqrt(var + eps)
-func LayerNorm(x *Array, eps float32) *Array {
-	return LayerNormWithWeightBias(x, nil, nil, eps)
-}
-
-// LayerNormWithWeightBias computes layer normalization using mlx.fast
-// weight and bias can be nil for elementwise_affine=False
-func LayerNormWithWeightBias(x, weight, bias *Array, eps float32) *Array {
-	res := C.mlx_array_new()
-	var wc, bc C.mlx_array
-	if weight != nil {
-		wc = weight.c
-	}
-	if bias != nil {
-		bc = bias.c
-	}
-	C.mlx_fast_layer_norm(&res, x.c, wc, bc, C.float(eps), C.default_stream())
-	return newArray(res)
-}
-
 // RoPE applies rotary position embeddings using mlx.fast
 func RoPE(x *Array, dims int, traditional bool, base, scale float32, offset int) *Array {
 	res := C.mlx_array_new()
--- a/x/imagegen/models/flux2/flux2.go
+++ b/x/imagegen/models/flux2/flux2.go
@@ -1,553 +0,0 @@
-//go:build mlx
-
-// Package flux2 implements the FLUX.2 Klein diffusion transformer model.
-// Klein is a 4B parameter distilled model that supports sub-second inference.
-package flux2
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"image"
-	"math"
-	"time"
-
-	"github.com/ollama/ollama/x/imagegen"
-	"github.com/ollama/ollama/x/imagegen/mlx"
-	"github.com/ollama/ollama/x/imagegen/models/qwen3"
-	"github.com/ollama/ollama/x/imagegen/tokenizer"
-	"golang.org/x/image/draw"
-)
-
-// GenerateConfig holds all options for image generation.
-type GenerateConfig struct {
-	Prompt        string
-	Width         int32                 // Image width (default: 1024)
-	Height        int32                 // Image height (default: 1024)
-	Steps         int                   // Denoising steps (default: 4 for Klein)
-	GuidanceScale float32               // Guidance scale (default: 1.0, Klein doesn't need CFG)
-	Seed          int64                 // Random seed
-	Progress      func(step, totalSteps int) // Optional progress callback
-	CapturePath   string                // GPU capture path (debug)
-	InputImages   []image.Image         // Reference images for image conditioning (already loaded)
-}
-
-// Model represents a FLUX.2 Klein model.
-type Model struct {
-	ModelName       string
-	Tokenizer       *tokenizer.Tokenizer
-	TextEncoder     *qwen3.TextEncoder
-	Transformer     *Flux2Transformer2DModel
-	VAE             *AutoencoderKLFlux2
-	SchedulerConfig *SchedulerConfig
-}
-
-// TextEncoderLayerIndices are the layers from which to extract text embeddings.
-// Diffusers uses hidden_states[9, 18, 27]. In Python, hidden_states[0] is the embedding
-// output before any layers, so hidden_states[9] = after layer 8 (0-indexed).
-// Go's ForwardWithLayerOutputs captures after layer i runs, so we use [8, 17, 26].
-var TextEncoderLayerIndices = []int{8, 17, 26}
-
-// Load loads the FLUX.2 Klein model from ollama blob storage.
-func (m *Model) Load(modelName string) error {
-	fmt.Printf("Loading FLUX.2 Klein model from manifest: %s...\n", modelName)
-	start := time.Now()
-
-	if mlx.GPUIsAvailable() {
-		mlx.SetDefaultDeviceGPU()
-		mlx.EnableCompile()
-	}
-
-	m.ModelName = modelName
-
-	// Load manifest
-	manifest, err := imagegen.LoadManifest(modelName)
-	if err != nil {
-		return fmt.Errorf("load manifest: %w", err)
-	}
-
-	// Load tokenizer
-	fmt.Print("  Loading tokenizer... ")
-	tokData, err := manifest.ReadConfig("tokenizer/tokenizer.json")
-	if err != nil {
-		return fmt.Errorf("tokenizer: %w", err)
-	}
-
-	tokConfig := &tokenizer.TokenizerConfig{}
-	if data, err := manifest.ReadConfig("tokenizer/tokenizer_config.json"); err == nil {
-		tokConfig.TokenizerConfigJSON = data
-	}
-	if data, err := manifest.ReadConfig("tokenizer/generation_config.json"); err == nil {
-		tokConfig.GenerationConfigJSON = data
-	}
-	if data, err := manifest.ReadConfig("tokenizer/special_tokens_map.json"); err == nil {
-		tokConfig.SpecialTokensMapJSON = data
-	}
-
-	tok, err := tokenizer.LoadFromBytesWithConfig(tokData, tokConfig)
-	if err != nil {
-		return fmt.Errorf("tokenizer: %w", err)
-	}
-	m.Tokenizer = tok
-	fmt.Println("✓")
-
-	// Load text encoder
-	m.TextEncoder = &qwen3.TextEncoder{}
-	if err := m.TextEncoder.Load(manifest, "text_encoder/config.json"); err != nil {
-		return fmt.Errorf("text encoder: %w", err)
-	}
-
-	// Load transformer
-	m.Transformer = &Flux2Transformer2DModel{}
-	if err := m.Transformer.Load(manifest); err != nil {
-		return fmt.Errorf("transformer: %w", err)
-	}
-
-	// Load VAE
-	m.VAE = &AutoencoderKLFlux2{}
-	if err := m.VAE.Load(manifest); err != nil {
-		return fmt.Errorf("VAE: %w", err)
-	}
-
-	// Evaluate all weights in a single batch (reduces GPU sync overhead)
-	fmt.Print("  Evaluating weights... ")
-	allWeights := mlx.Collect(m.TextEncoder)
-	allWeights = append(allWeights, mlx.Collect(m.Transformer)...)
-	allWeights = append(allWeights, mlx.Collect(m.VAE)...)
-	mlx.Eval(allWeights...)
-	fmt.Println("✓")
-
-	// Load scheduler config
-	m.SchedulerConfig = DefaultSchedulerConfig()
-	if schedData, err := manifest.ReadConfig("scheduler/scheduler_config.json"); err == nil {
-		if err := json.Unmarshal(schedData, m.SchedulerConfig); err != nil {
-			fmt.Printf("  Warning: failed to parse scheduler config: %v\n", err)
-		}
-	}
-
-	mem := mlx.MetalGetActiveMemory()
-	fmt.Printf("  Loaded in %.2fs (%.1f GB VRAM)\n", time.Since(start).Seconds(), float64(mem)/(1024*1024*1024))
-
-	return nil
-}
-
-// Generate creates an image from a prompt.
-func (m *Model) Generate(prompt string, width, height int32, steps int, seed int64) (*mlx.Array, error) {
-	return m.GenerateFromConfig(context.Background(), &GenerateConfig{
-		Prompt: prompt,
-		Width:  width,
-		Height: height,
-		Steps:  steps,
-		Seed:   seed,
-	})
-}
-
-// GenerateWithProgress creates an image with progress callback.
-func (m *Model) GenerateWithProgress(prompt string, width, height int32, steps int, seed int64, progress func(step, totalSteps int)) (*mlx.Array, error) {
-	return m.GenerateFromConfig(context.Background(), &GenerateConfig{
-		Prompt:   prompt,
-		Width:    width,
-		Height:   height,
-		Steps:    steps,
-		Seed:     seed,
-		Progress: progress,
-	})
-}
-
-// GenerateFromConfig generates an image using the unified config struct.
-func (m *Model) GenerateFromConfig(ctx context.Context, cfg *GenerateConfig) (*mlx.Array, error) {
-	start := time.Now()
-	result, err := m.generate(ctx, cfg)
-	if err != nil {
-		return nil, err
-	}
-	fmt.Printf("Generated in %.2fs (%d steps)\n", time.Since(start).Seconds(), cfg.Steps)
-	return result, nil
-}
-
-// GenerateImage implements runner.ImageModel interface.
-func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height int32, steps int, seed int64, progress func(step, total int)) (*mlx.Array, error) {
-	return m.GenerateFromConfig(ctx, &GenerateConfig{
-		Prompt:   prompt,
-		Width:    width,
-		Height:   height,
-		Steps:    steps,
-		Seed:     seed,
-		Progress: progress,
-	})
-}
-
-// GenerateImageWithInputs implements runner.ImageEditModel interface.
-// It generates an image conditioned on the provided input images for image editing.
-func (m *Model) GenerateImageWithInputs(ctx context.Context, prompt string, width, height int32, steps int, seed int64, inputImages []image.Image, progress func(step, total int)) (*mlx.Array, error) {
-	return m.GenerateFromConfig(ctx, &GenerateConfig{
-		Prompt:      prompt,
-		Width:       width,
-		Height:      height,
-		Steps:       steps,
-		Seed:        seed,
-		InputImages: inputImages,
-		Progress:    progress,
-	})
-}
-
-// MaxOutputPixels is the maximum output resolution (4 megapixels, ~2048x2048)
-const MaxOutputPixels = 2048 * 2048
-
-// MaxRefPixels is the maximum resolution for reference images (smaller to reduce attention memory)
-const MaxRefPixels = 728 * 728
-
-// generate is the internal denoising pipeline.
-func (m *Model) generate(ctx context.Context, cfg *GenerateConfig) (*mlx.Array, error) {
-	// Enable MLX compilation for fused kernels
-	mlx.EnableCompile()
-
-	// Apply defaults
-	if cfg.Steps <= 0 {
-		cfg.Steps = 4 // Klein default: 4 steps for distilled model
-	}
-	if cfg.GuidanceScale <= 0 {
-		cfg.GuidanceScale = 1.0 // Klein doesn't need guidance
-	}
-
-	// Determine output dimensions
-	if len(cfg.InputImages) > 0 {
-		// With input images, compute missing dimension from aspect ratio
-		// Images are already EXIF-rotated by the caller
-		bounds := cfg.InputImages[0].Bounds()
-		imgW, imgH := bounds.Dx(), bounds.Dy()
-		aspectRatio := float64(imgH) / float64(imgW)
-		if cfg.Width > 0 && cfg.Height <= 0 {
-			// Width specified, compute height
-			cfg.Height = int32(math.Round(float64(cfg.Width)*aspectRatio/16) * 16)
-		} else if cfg.Height > 0 && cfg.Width <= 0 {
-			// Height specified, compute width
-			cfg.Width = int32(math.Round(float64(cfg.Height)/aspectRatio/16) * 16)
-		} else if cfg.Width <= 0 && cfg.Height <= 0 {
-			// Neither specified, use input dimensions
-			cfg.Width = int32(imgW)
-			cfg.Height = int32(imgH)
-		}
-	}
-	if cfg.Width <= 0 {
-		cfg.Width = 1024
-	}
-	if cfg.Height <= 0 {
-		cfg.Height = 1024
-	}
-
-	// Cap to max pixels, preserve aspect ratio, round to multiple of 16
-	pixels := int(cfg.Width) * int(cfg.Height)
-	if pixels > MaxOutputPixels {
-		scale := math.Sqrt(float64(MaxOutputPixels) / float64(pixels))
-		cfg.Width = int32(math.Round(float64(cfg.Width) * scale / 16) * 16)
-		cfg.Height = int32(math.Round(float64(cfg.Height) * scale / 16) * 16)
-	}
-	cfg.Height = int32((cfg.Height + 8) / 16 * 16) // round to nearest 16
-	cfg.Width = int32((cfg.Width + 8) / 16 * 16)
-	fmt.Printf("  Output: %dx%d\n", cfg.Width, cfg.Height)
-
-	tcfg := m.Transformer.TransformerConfig
-	patchSize := m.VAE.Config.PatchSize
-
-	// Latent dimensions: image / 8 (VAE downscale) / patch_size
-	latentH := cfg.Height / 8
-	latentW := cfg.Width / 8
-	patchH := latentH / patchSize[0]
-	patchW := latentW / patchSize[1]
-	imgSeqLen := patchH * patchW
-
-	// Text encoding with multi-layer extraction (no padding, use true sequence length)
-	fmt.Print("  Encoding prompt... ")
-	promptEmbeds, textLen := m.TextEncoder.EncodePromptWithLayers(m.Tokenizer, cfg.Prompt, 512, TextEncoderLayerIndices, false)
-	fmt.Println("✓")
-
-	// Encode reference images if provided
-	var refTokens *ImageCondTokens
-	var refHeights, refWidths []int32
-	if len(cfg.InputImages) > 0 {
-		fmt.Printf("  Encoding %d reference image(s):\n", len(cfg.InputImages))
-
-		var err error
-		refTokens, err = m.EncodeImageRefs(cfg.InputImages)
-		if err != nil {
-			return nil, fmt.Errorf("encode reference images: %w", err)
-		}
-
-		// Extract heights/widths for RoPE computation (same limits as EncodeImageRefs)
-		limitPixels := MaxRefPixels
-		if len(cfg.InputImages) > 1 {
-			limitPixels = MaxRefPixels / 2
-		}
-		for _, img := range cfg.InputImages {
-			_, w, h := PrepareImage(img, limitPixels)
-			refHeights = append(refHeights, int32(h/16))
-			refWidths = append(refWidths, int32(w/16))
-		}
-	}
-
-	// Scheduler
-	scheduler := NewFlowMatchScheduler(m.SchedulerConfig)
-	scheduler.SetTimestepsWithMu(cfg.Steps, CalculateShift(imgSeqLen, cfg.Steps))
-
-	// Init latents in packed form [B, C*4, H/2, W/2] like diffusers
-	// diffusers creates noise in [B, 128, 64, 64] and packs to [B, 4096, 128]
-	latentChannels := m.VAE.Config.LatentChannels
-	packedChannels := latentChannels * 4 // 32 * 4 = 128
-	latents := scheduler.InitNoise([]int32{1, packedChannels, patchH, patchW}, cfg.Seed)
-
-	// Pack latents (transpose): [B, C, H, W] -> [B, H*W, C]
-	// This matches diffusers' _pack_latents
-	patches := packLatents(latents)
-	noiseSeqLen := patches.Shape()[1]
-
-	// RoPE cache - includes reference images if present
-	rope := PrepareRoPECache(textLen, patchH, patchW, tcfg.AxesDimsRoPE, tcfg.RopeTheta, refHeights, refWidths, ImageRefScale)
-
-	// Cleanup setup arrays when done
-	defer func() {
-		rope.Cos.Free()
-		rope.Sin.Free()
-		promptEmbeds.Free()
-		if refTokens != nil {
-			refTokens.Tokens.Free()
-		}
-	}()
-
-	// Pre-compute all timesteps before the loop to avoid per-step tensor creation
-	timesteps := make([]*mlx.Array, cfg.Steps)
-	for i := 0; i < cfg.Steps; i++ {
-		tCurr := scheduler.Timesteps[i] / float32(m.SchedulerConfig.NumTrainTimesteps)
-		timesteps[i] = mlx.ToBFloat16(mlx.NewArray([]float32{tCurr}, []int32{1}))
-	}
-
-	// Evaluate setup arrays
-	fmt.Print("  Evaluating setup... ")
-	setupStart := time.Now()
-	toEval := []*mlx.Array{promptEmbeds, patches, rope.Cos, rope.Sin}
-	toEval = append(toEval, timesteps...)
-	if refTokens != nil {
-		toEval = append(toEval, refTokens.Tokens)
-	}
-	mlx.Eval(toEval...)
-	mlx.MetalResetPeakMemory() // Reset peak to measure generation separately
-	fmt.Printf("✓ (%.2fs, %.1f GB)\n", time.Since(setupStart).Seconds(),
-		float64(mlx.MetalGetActiveMemory())/(1024*1024*1024))
-
-	if cfg.Progress != nil {
-		cfg.Progress(0, cfg.Steps)
-	}
-
-	loopStart := time.Now()
-	stepStart := time.Now()
-
-	// Denoising loop
-	for i := 0; i < cfg.Steps; i++ {
-		// Check for cancellation
-		if ctx != nil {
-			select {
-			case <-ctx.Done():
-				return nil, ctx.Err()
-			default:
-			}
-		}
-
-		// GPU capture on step 2 if requested
-		if cfg.CapturePath != "" && i == 1 {
-			mlx.MetalStartCapture(cfg.CapturePath)
-		}
-
-		timestep := timesteps[i]
-
-		// Prepare input - concatenate noise patches with reference tokens if present
-		imgInput := patches
-		if refTokens != nil {
-			imgInput = mlx.Concatenate([]*mlx.Array{patches, refTokens.Tokens}, 1)
-		}
-
-		// Transformer forward pass
-		output := m.Transformer.Forward(imgInput, promptEmbeds, timestep, rope)
-
-		// If we concatenated reference tokens, slice to only get noise portion
-		if refTokens != nil {
-			output = mlx.Slice(output, []int32{0, 0, 0}, []int32{1, noiseSeqLen, output.Shape()[2]})
-		}
-
-		// Scheduler step (keep reference to old patches for the computation graph)
-		newPatches := scheduler.Step(output, patches, i)
-
-		if cfg.CapturePath != "" && i == 1 {
-			mlx.MetalStopCapture()
-		}
-
-		mlx.Eval(newPatches)
-		patches = newPatches
-
-		elapsed := time.Since(stepStart).Seconds()
-		peakGB := float64(mlx.MetalGetPeakMemory()) / (1024 * 1024 * 1024)
-		if i == 0 {
-			fmt.Printf("    step %d: %.2fs (JIT warmup), peak %.1f GB\n", i+1, elapsed, peakGB)
-		} else {
-			fmt.Printf("    step %d: %.2fs, peak %.1f GB\n", i+1, elapsed, peakGB)
-		}
-		stepStart = time.Now()
-		if cfg.Progress != nil {
-			cfg.Progress(i+1, cfg.Steps)
-		}
-	}
-
-	loopTime := time.Since(loopStart).Seconds()
-	peakMem := float64(mlx.MetalGetPeakMemory()) / (1024 * 1024 * 1024)
-	fmt.Printf("  Denoised %d steps in %.2fs (%.2fs/step), peak %.1f GB\n",
-		cfg.Steps, loopTime, loopTime/float64(cfg.Steps), peakMem)
-
-	// Free timesteps now that denoising is done
-	for _, ts := range timesteps {
-		ts.Free()
-	}
-
-	// VAE decode with tiling for larger images
-	fmt.Print("  Decoding VAE... ")
-	vaeStart := time.Now()
-	// Enable tiling for images > 512x512 (latent > 64x64)
-	// VAE attention is O(n²) on latent pixels, tiling reduces memory significantly
-	if patchH*2 > 64 || patchW*2 > 64 {
-		m.VAE.Tiling = DefaultTilingConfig()
-	}
-	decoded := m.VAE.Decode(patches, patchH, patchW)
-	mlx.Eval(decoded)
-
-	// Free patches now that decode is done
-	patches.Free()
-
-	fmt.Printf("✓ (%.2fs, peak %.1f GB)\n", time.Since(vaeStart).Seconds(),
-		float64(mlx.MetalGetPeakMemory())/(1024*1024*1024))
-
-	return decoded, nil
-}
-
-// packLatents converts [B, C, H, W] to [B, H*W, C] (matches diffusers _pack_latents)
-func packLatents(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	C := shape[1]
-	H := shape[2]
-	W := shape[3]
-	// [B, C, H, W] -> [B, C, H*W] -> [B, H*W, C]
-	x = mlx.Reshape(x, B, C, H*W)
-	return mlx.Transpose(x, 0, 2, 1)
-}
-
-// LoadPersistent loads the model and keeps it in memory for repeated use.
-func LoadPersistent(modelName string) (*Model, error) {
-	m := &Model{}
-	if err := m.Load(modelName); err != nil {
-		return nil, err
-	}
-	return m, nil
-}
-
-// ImageRefScale is the time coordinate offset between reference images (matches diffusers scale=10)
-const ImageRefScale = 10
-
-// PrepareImage resizes and crops an image to be a multiple of 16, with optional pixel limit.
-// Returns the processed image and its dimensions.
-func PrepareImage(img image.Image, limitPixels int) (image.Image, int, int) {
-	bounds := img.Bounds()
-	w, h := bounds.Dx(), bounds.Dy()
-
-	// Cap pixels if needed (like diffusers cap_pixels)
-	if limitPixels > 0 && w*h > limitPixels {
-		scale := math.Sqrt(float64(limitPixels) / float64(w*h))
-		w = int(float64(w) * scale)
-		h = int(float64(h) * scale)
-	}
-
-	// Round down to multiple of 16
-	w = (w / 16) * 16
-	h = (h / 16) * 16
-
-	if w < 16 {
-		w = 16
-	}
-	if h < 16 {
-		h = 16
-	}
-
-	// Resize using high-quality bicubic interpolation (matches diffusers' default lanczos)
-	resized := image.NewRGBA(image.Rect(0, 0, w, h))
-	draw.CatmullRom.Scale(resized, resized.Bounds(), img, img.Bounds(), draw.Over, nil)
-
-	return resized, w, h
-}
-
-// ImageToTensor converts an image to a tensor in [-1, 1] range with shape [1, C, H, W].
-func ImageToTensor(img image.Image) *mlx.Array {
-	bounds := img.Bounds()
-	w, h := bounds.Dx(), bounds.Dy()
-
-	// Convert to float32 array in NCHW format [1, 3, H, W] with values in [-1, 1]
-	data := make([]float32, 3*h*w)
-
-	for y := 0; y < h; y++ {
-		for x := 0; x < w; x++ {
-			r, g, b, _ := img.At(x+bounds.Min.X, y+bounds.Min.Y).RGBA()
-			// RGBA returns 16-bit values, convert to [-1, 1]
-			data[0*h*w+y*w+x] = float32(r>>8)/127.5 - 1.0
-			data[1*h*w+y*w+x] = float32(g>>8)/127.5 - 1.0
-			data[2*h*w+y*w+x] = float32(b>>8)/127.5 - 1.0
-		}
-	}
-
-	arr := mlx.NewArrayFloat32(data, []int32{1, 3, int32(h), int32(w)})
-	return arr
-}
-
-// ImageCondTokens holds encoded reference image tokens.
-type ImageCondTokens struct {
-	Tokens *mlx.Array // [1, total_tokens, C] - concatenated reference tokens
-}
-
-// EncodeImageRefs encodes reference images using the VAE.
-func (m *Model) EncodeImageRefs(images []image.Image) (*ImageCondTokens, error) {
-	if len(images) == 0 {
-		return nil, nil
-	}
-
-	// Limit reference images to reduce attention memory
-	limitPixels := MaxRefPixels
-	if len(images) > 1 {
-		limitPixels = MaxRefPixels / 2
-	}
-
-	var allTokens []*mlx.Array
-
-	for _, img := range images {
-		// Prepare image (resize, crop to multiple of 16)
-		prepared, prepW, prepH := PrepareImage(img, limitPixels)
-		fmt.Printf("    Encoding %dx%d image... ", prepW, prepH)
-
-		// Convert to tensor [-1, 1]
-		tensor := ImageToTensor(prepared)
-
-		// Encode with VAE - returns [1, L, 128]
-		encoded := m.VAE.EncodeImage(tensor)
-		squeezed := mlx.Squeeze(encoded, 0) // [L, C]
-
-		// Defer eval - will be done with other setup arrays
-		allTokens = append(allTokens, squeezed)
-		fmt.Println("✓")
-	}
-
-	// For single image, just add batch dimension directly
-	// For multiple images, concatenate first
-	var tokens *mlx.Array
-	if len(allTokens) == 1 {
-		tokens = mlx.ExpandDims(allTokens[0], 0) // [1, L, C]
-	} else {
-		tokens = mlx.Concatenate(allTokens, 0) // [total_L, C]
-		tokens = mlx.ExpandDims(tokens, 0)     // [1, total_L, C]
-	}
-
-	return &ImageCondTokens{Tokens: tokens}, nil
-}
--- a/x/imagegen/models/flux2/rope.go
+++ b/x/imagegen/models/flux2/rope.go
@@ -1,224 +0,0 @@
-//go:build mlx
-
-package flux2
-
-import (
-	"math"
-
-	"github.com/ollama/ollama/x/imagegen/mlx"
-)
-
-// RoPEConfig holds 4D RoPE configuration for Flux2
-type RoPEConfig struct {
-	Theta    int32   // 2000 for Klein
-	AxesDims []int32 // [32, 32, 32, 32] - dimensions for T, H, W, L axes
-}
-
-// RoPECache holds precomputed RoPE cos/sin values
-type RoPECache struct {
-	Cos      *mlx.Array // [1, TotalSeqLen, 1, head_dim/2]
-	Sin      *mlx.Array // [1, TotalSeqLen, 1, head_dim/2]
-	TextLen  int32      // Length of text sequence
-	ImageLen int32      // Length of image sequence
-}
-
-// PrepareTextIDs creates position IDs for text tokens.
-// Text tokens use: T=0, H=0, W=0, L=0..seqLen-1
-// Returns: [seqLen, 4]
-func PrepareTextIDs(seqLen int32) *mlx.Array {
-	ids := make([]float32, seqLen*4)
-	for i := int32(0); i < seqLen; i++ {
-		idx := i * 4
-		ids[idx+0] = 0             // T = 0
-		ids[idx+1] = 0             // H = 0
-		ids[idx+2] = 0             // W = 0
-		ids[idx+3] = float32(i)    // L = sequence position
-	}
-	return mlx.NewArray(ids, []int32{seqLen, 4})
-}
-
-// PrepareLatentIDs creates position IDs for image latent tokens.
-// Latent tokens use: T=0, H=0..height-1, W=0..width-1, L=0
-// The latents are in row-major order (H then W).
-// Returns: [height*width, 4]
-func PrepareLatentIDs(height, width int32) *mlx.Array {
-	seqLen := height * width
-	ids := make([]float32, seqLen*4)
-	idx := 0
-	for h := int32(0); h < height; h++ {
-		for w := int32(0); w < width; w++ {
-			ids[idx*4+0] = 0           // T = 0
-			ids[idx*4+1] = float32(h)  // H = row
-			ids[idx*4+2] = float32(w)  // W = column
-			ids[idx*4+3] = 0           // L = 0
-			idx++
-		}
-	}
-	return mlx.NewArray(ids, []int32{seqLen, 4})
-}
-
-// PrepareImageIDs creates position IDs for reference image tokens (used in editing).
-// Reference images use: T=scale*(i+1), H=0..h-1, W=0..w-1, L=0
-// where i is the image index (0, 1, 2, ...) and scale separates images in T dimension.
-// Returns: [total_tokens, 4]
-func PrepareImageIDs(imageHeights, imageWidths []int32, scale int32) *mlx.Array {
-	// Calculate total tokens
-	totalTokens := int32(0)
-	for i := range imageHeights {
-		totalTokens += imageHeights[i] * imageWidths[i]
-	}
-
-	ids := make([]float32, totalTokens*4)
-	idx := int32(0)
-	for imgIdx, h := range imageHeights {
-		w := imageWidths[imgIdx]
-		tValue := float32(scale * int32(imgIdx+1))
-		for hi := int32(0); hi < h; hi++ {
-			for wi := int32(0); wi < w; wi++ {
-				ids[idx*4+0] = tValue       // T = scale * (imgIdx + 1)
-				ids[idx*4+1] = float32(hi)  // H = row
-				ids[idx*4+2] = float32(wi)  // W = column
-				ids[idx*4+3] = 0            // L = 0
-				idx++
-			}
-		}
-	}
-	return mlx.NewArray(ids, []int32{totalTokens, 4})
-}
-
-// ComputeRoPE computes cos and sin for 4D rotary position embeddings.
-// ids: [L, 4] with (T, H, W, L) coordinates
-// axesDims: [32, 32, 32, 32] - each axis has this many dimensions (total = head_dim = 128)
-// theta: base frequency (2000 for Klein)
-// Returns: cos, sin each [1, L, 1, head_dim] with repeat_interleave applied
-func ComputeRoPE(ids *mlx.Array, axesDims []int32, theta int32) (*mlx.Array, *mlx.Array) {
-	shape := ids.Shape()
-	seqLen := shape[0]
-
-	// Compute total head dim (sum of all axes dims)
-	headDim := int32(0)
-	for _, d := range axesDims {
-		headDim += d
-	}
-
-	// Extract each coordinate dimension
-	// ids[:, 0] = T, ids[:, 1] = H, ids[:, 2] = W, ids[:, 3] = L
-	posT := mlx.Slice(ids, []int32{0, 0}, []int32{seqLen, 1}) // [L, 1]
-	posH := mlx.Slice(ids, []int32{0, 1}, []int32{seqLen, 2}) // [L, 1]
-	posW := mlx.Slice(ids, []int32{0, 2}, []int32{seqLen, 3}) // [L, 1]
-	posL := mlx.Slice(ids, []int32{0, 3}, []int32{seqLen, 4}) // [L, 1]
-
-	// Compute frequencies for each axis
-	logTheta := float32(math.Log(float64(theta)))
-	cosArrs := make([]*mlx.Array, 4)
-	sinArrs := make([]*mlx.Array, 4)
-	positions := []*mlx.Array{posT, posH, posW, posL}
-
-	for i, axisDim := range axesDims {
-		half := axisDim / 2
-
-		// Create frequency array for this axis: theta^(-2j/dim) for j=0..half-1
-		// This matches diffusers: 1.0 / (theta ** (torch.arange(0, dim, 2) / dim))
-		freqs := make([]float32, half)
-		for j := int32(0); j < half; j++ {
-			freqs[j] = float32(math.Exp(float64(-logTheta * float32(2*j) / float32(axisDim))))
-		}
-		freqArr := mlx.NewArray(freqs, []int32{1, half})
-
-		// Compute pos * freq -> [L, half]
-		posExpanded := positions[i] // [L, 1]
-		args := mlx.Mul(posExpanded, freqArr) // [L, half]
-
-		// Compute cos and sin for this axis
-		cosAxis := mlx.Cos(args) // [L, half]
-		sinAxis := mlx.Sin(args) // [L, half]
-
-		// repeat_interleave(2): [c0, c1, ...] -> [c0, c0, c1, c1, ...]
-		// Reshape [L, half] -> [L, half, 1], tile to [L, half, 2], reshape to [L, axisDim]
-		cosAxis = mlx.ExpandDims(cosAxis, 2)                        // [L, half, 1]
-		cosAxis = mlx.Tile(cosAxis, []int32{1, 1, 2})               // [L, half, 2]
-		cosAxis = mlx.Reshape(cosAxis, seqLen, axisDim)             // [L, axisDim]
-
-		sinAxis = mlx.ExpandDims(sinAxis, 2)
-		sinAxis = mlx.Tile(sinAxis, []int32{1, 1, 2})
-		sinAxis = mlx.Reshape(sinAxis, seqLen, axisDim)
-
-		cosArrs[i] = cosAxis
-		sinArrs[i] = sinAxis
-	}
-
-	// Concatenate all axes: [L, headDim]
-	cos := mlx.Concatenate(cosArrs, 1)
-	sin := mlx.Concatenate(sinArrs, 1)
-
-	// Reshape to [1, L, 1, headDim] for broadcasting with attention
-	cos = mlx.Reshape(cos, 1, seqLen, 1, headDim)
-	sin = mlx.Reshape(sin, 1, seqLen, 1, headDim)
-
-	return cos, sin
-}
-
-// ApplyRoPE4D applies 4D rotary position embeddings to queries and keys.
-// x: [B, L, nheads, head_dim]
-// cos, sin: [1, L, 1, head_dim] (with repeat_interleave applied)
-// Returns: x with RoPE applied
-// Matches diffusers apply_rotary_emb with use_real=True, use_real_unbind_dim=-1
-func ApplyRoPE4D(x *mlx.Array, cos, sin *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	L := shape[1]
-	nheads := shape[2]
-	headDim := shape[3]
-	half := headDim / 2
-
-	// Reshape x to [B, L, nheads, half, 2] and split into real/imag
-	xReshaped := mlx.Reshape(x, B, L, nheads, half, 2)
-
-	// Extract real (index 0) and imag (index 1) parts
-	xReal := mlx.Slice(xReshaped, []int32{0, 0, 0, 0, 0}, []int32{B, L, nheads, half, 1})
-	xImag := mlx.Slice(xReshaped, []int32{0, 0, 0, 0, 1}, []int32{B, L, nheads, half, 2})
-	xReal = mlx.Squeeze(xReal, 4) // [B, L, nheads, half]
-	xImag = mlx.Squeeze(xImag, 4) // [B, L, nheads, half]
-
-	// x_rotated = stack([-x_imag, x_real], dim=-1).flatten(-2)
-	// This creates [-x_imag[0], x_real[0], -x_imag[1], x_real[1], ...]
-	negXImag := mlx.Neg(xImag)
-	negXImag = mlx.ExpandDims(negXImag, 4) // [B, L, nheads, half, 1]
-	xReal = mlx.ExpandDims(xReal, 4)       // [B, L, nheads, half, 1]
-	xRotated := mlx.Concatenate([]*mlx.Array{negXImag, xReal}, 4) // [B, L, nheads, half, 2]
-	xRotated = mlx.Reshape(xRotated, B, L, nheads, headDim)       // [B, L, nheads, headDim]
-
-	// out = x * cos + x_rotated * sin
-	return mlx.Add(mlx.Mul(x, cos), mlx.Mul(xRotated, sin))
-}
-
-// PrepareRoPECache creates RoPE cache for text + noise, optionally with reference images.
-// textLen: number of text tokens
-// noiseH, noiseW: dimensions of the noise latent in patch tokens
-// axesDims: [32, 32, 32, 32]
-// theta: 2000
-// refHeights, refWidths: optional reference image dimensions (pass nil/empty for no images)
-// scale: time coordinate offset between reference images (e.g., 10)
-func PrepareRoPECache(textLen, noiseH, noiseW int32, axesDims []int32, theta int32, refHeights, refWidths []int32, scale int32) *RoPECache {
-	textIDs := PrepareTextIDs(textLen)
-	noiseIDs := PrepareLatentIDs(noiseH, noiseW)
-
-	var allIDs *mlx.Array
-	imageLen := noiseH * noiseW
-
-	if len(refHeights) > 0 {
-		refIDs := PrepareImageIDs(refHeights, refWidths, scale)
-		allIDs = mlx.Concatenate([]*mlx.Array{textIDs, noiseIDs, refIDs}, 0)
-		for i := range refHeights {
-			imageLen += refHeights[i] * refWidths[i]
-		}
-	} else {
-		allIDs = mlx.Concatenate([]*mlx.Array{textIDs, noiseIDs}, 0)
-	}
-
-	cos, sin := ComputeRoPE(allIDs, axesDims, theta)
-	cos = mlx.ToBFloat16(cos)
-	sin = mlx.ToBFloat16(sin)
-
-	return &RoPECache{Cos: cos, Sin: sin, TextLen: textLen, ImageLen: imageLen}
-}
--- a/x/imagegen/models/flux2/scheduler.go
+++ b/x/imagegen/models/flux2/scheduler.go
@@ -1,149 +0,0 @@
-//go:build mlx
-
-package flux2
-
-import (
-	"math"
-
-	"github.com/ollama/ollama/x/imagegen/mlx"
-)
-
-// SchedulerConfig holds Flow-Match scheduler configuration
-type SchedulerConfig struct {
-	NumTrainTimesteps  int32   `json:"num_train_timesteps"`  // 1000
-	Shift              float32 `json:"shift"`                // 3.0 for Klein
-	UseDynamicShifting bool    `json:"use_dynamic_shifting"` // true
-	TimeShiftType      string  `json:"time_shift_type"`      // "exponential" or "linear"
-}
-
-// DefaultSchedulerConfig returns default config for Klein
-func DefaultSchedulerConfig() *SchedulerConfig {
-	return &SchedulerConfig{
-		NumTrainTimesteps:  1000,
-		Shift:              3.0, // Klein uses 3.0
-		UseDynamicShifting: true,
-		TimeShiftType:      "exponential",
-	}
-}
-
-// FlowMatchScheduler implements the Flow-Match Euler discrete scheduler
-type FlowMatchScheduler struct {
-	Config    *SchedulerConfig
-	Timesteps []float32 // Discretized timesteps (t from 1 to 0)
-	Sigmas    []float32 // Noise levels at each timestep
-	NumSteps  int       // Number of inference steps
-}
-
-// NewFlowMatchScheduler creates a new scheduler
-func NewFlowMatchScheduler(cfg *SchedulerConfig) *FlowMatchScheduler {
-	return &FlowMatchScheduler{
-		Config: cfg,
-	}
-}
-
-// SetTimesteps sets up the scheduler for the given number of inference steps
-func (s *FlowMatchScheduler) SetTimesteps(numSteps int) {
-	s.SetTimestepsWithMu(numSteps, 0)
-}
-
-// SetTimestepsWithMu sets up scheduler matching diffusers set_timesteps(sigmas=..., mu=...)
-func (s *FlowMatchScheduler) SetTimestepsWithMu(numSteps int, mu float32) {
-	s.NumSteps = numSteps
-
-	// diffusers: sigmas = linspace(1, 1/num_steps, num_steps)
-	// Then applies time shift, appends 0.0 at end
-	s.Sigmas = make([]float32, numSteps+1)
-
-	for i := 0; i < numSteps; i++ {
-		// linspace(1, 1/num_steps, num_steps)
-		var sigma float32
-		if numSteps == 1 {
-			sigma = 1.0
-		} else {
-			sigma = 1.0 - float32(i)/float32(numSteps-1)*(1.0-1.0/float32(numSteps))
-		}
-
-		// Apply time shift if using dynamic shifting
-		if s.Config.UseDynamicShifting && mu != 0 {
-			sigma = s.timeShift(mu, sigma)
-		} else {
-			// If not dynamic shifting, apply fixed shift scaling like diffusers
-			shift := s.Config.Shift
-			sigma = shift * sigma / (1 + (shift-1)*sigma)
-		}
-		s.Sigmas[i] = sigma
-	}
-	// Append terminal zero
-	s.Sigmas[numSteps] = 0.0
-
-	// Timesteps scaled to training range (matches diffusers: timesteps = sigmas * num_train_timesteps)
-	s.Timesteps = make([]float32, numSteps+1)
-	for i, v := range s.Sigmas {
-		s.Timesteps[i] = v * float32(s.Config.NumTrainTimesteps)
-	}
-}
-
-// timeShift applies the dynamic time shift
-func (s *FlowMatchScheduler) timeShift(mu float32, t float32) float32 {
-	if t <= 0 {
-		return 0
-	}
-	if s.Config.TimeShiftType == "linear" {
-		return mu / (mu + (1.0/t-1.0))
-	}
-	// Default: exponential
-	expMu := float32(math.Exp(float64(mu)))
-	return expMu / (expMu + (1.0/t - 1.0))
-}
-
-// Step performs one denoising step
-func (s *FlowMatchScheduler) Step(modelOutput, sample *mlx.Array, timestepIdx int) *mlx.Array {
-	sigma := s.Sigmas[timestepIdx]
-	sigmaNext := s.Sigmas[timestepIdx+1]
-
-	// Euler step: x_{t-dt} = x_t + (sigma_next - sigma) * v_t
-	dt := sigmaNext - sigma
-
-	// Upcast to float32 for precision (matches diffusers)
-	sampleF32 := mlx.AsType(sample, mlx.DtypeFloat32)
-	outputF32 := mlx.AsType(modelOutput, mlx.DtypeFloat32)
-
-	scaledOutput := mlx.MulScalar(outputF32, dt)
-	result := mlx.Add(sampleF32, scaledOutput)
-
-	// Cast back to bfloat16
-	return mlx.ToBFloat16(result)
-}
-
-// GetTimestep returns the timestep value at the given index
-func (s *FlowMatchScheduler) GetTimestep(idx int) float32 {
-	if idx < len(s.Timesteps) {
-		return s.Timesteps[idx]
-	}
-	return 0.0
-}
-
-// InitNoise creates initial noise for sampling
-func (s *FlowMatchScheduler) InitNoise(shape []int32, seed int64) *mlx.Array {
-	return mlx.RandomNormalWithDtype(shape, uint64(seed), mlx.DtypeBFloat16)
-}
-
-// CalculateShift computes the mu shift value for dynamic scheduling
-// Matches diffusers compute_empirical_mu function
-func CalculateShift(imgSeqLen int32, numSteps int) float32 {
-	a1, b1 := float32(8.73809524e-05), float32(1.89833333)
-	a2, b2 := float32(0.00016927), float32(0.45666666)
-
-	seqLen := float32(imgSeqLen)
-
-	if imgSeqLen > 4300 {
-		return a2*seqLen + b2
-	}
-
-	m200 := a2*seqLen + b2
-	m10 := a1*seqLen + b1
-
-	a := (m200 - m10) / 190.0
-	b := m200 - 200.0*a
-	return a*float32(numSteps) + b
-}
--- a/x/imagegen/models/flux2/transformer.go
+++ b/x/imagegen/models/flux2/transformer.go
@@ -1,562 +0,0 @@
-//go:build mlx
-
-package flux2
-
-import (
-	"fmt"
-	"math"
-
-	"github.com/ollama/ollama/x/imagegen"
-	"github.com/ollama/ollama/x/imagegen/mlx"
-	"github.com/ollama/ollama/x/imagegen/nn"
-	"github.com/ollama/ollama/x/imagegen/safetensors"
-)
-
-// TransformerConfig holds Flux2 transformer configuration
-type TransformerConfig struct {
-	AttentionHeadDim         int32   `json:"attention_head_dim"`          // 128
-	AxesDimsRoPE             []int32 `json:"axes_dims_rope"`              // [32, 32, 32, 32]
-	Eps                      float32 `json:"eps"`                         // 1e-6
-	GuidanceEmbeds           bool    `json:"guidance_embeds"`             // false for Klein
-	InChannels               int32   `json:"in_channels"`                 // 128
-	JointAttentionDim        int32   `json:"joint_attention_dim"`         // 7680
-	MLPRatio                 float32 `json:"mlp_ratio"`                   // 3.0
-	NumAttentionHeads        int32   `json:"num_attention_heads"`         // 24
-	NumLayers                int32   `json:"num_layers"`                  // 5
-	NumSingleLayers          int32   `json:"num_single_layers"`           // 20
-	PatchSize                int32   `json:"patch_size"`                  // 1
-	RopeTheta                int32   `json:"rope_theta"`                  // 2000
-	TimestepGuidanceChannels int32   `json:"timestep_guidance_channels"`  // 256
-}
-
-// Computed dimensions
-func (c *TransformerConfig) InnerDim() int32 {
-	return c.NumAttentionHeads * c.AttentionHeadDim // 24 * 128 = 3072
-}
-
-func (c *TransformerConfig) MLPHiddenDim() int32 {
-	return int32(float32(c.InnerDim()) * c.MLPRatio) // 3072 * 3.0 = 9216
-}
-
-// TimestepEmbedder creates timestep embeddings
-// Weight names: time_guidance_embed.timestep_embedder.linear_1.weight, linear_2.weight
-type TimestepEmbedder struct {
-	Linear1  nn.LinearLayer `weight:"linear_1"`
-	Linear2  nn.LinearLayer `weight:"linear_2"`
-	EmbedDim int32          // 256
-}
-
-// Forward creates sinusoidal embeddings and projects them
-func (t *TimestepEmbedder) Forward(timesteps *mlx.Array) *mlx.Array {
-	half := t.EmbedDim / 2
-	freqs := make([]float32, half)
-	for i := int32(0); i < half; i++ {
-		freqs[i] = float32(math.Exp(-math.Log(10000.0) * float64(i) / float64(half)))
-	}
-	freqsArr := mlx.NewArray(freqs, []int32{1, half})
-
-	// timesteps: [B] -> [B, 1]
-	tExpanded := mlx.ExpandDims(timesteps, 1)
-	// args: [B, half]
-	args := mlx.Mul(tExpanded, freqsArr)
-
-	// [cos(args), sin(args)] -> [B, embed_dim]
-	sinEmbed := mlx.Concatenate([]*mlx.Array{mlx.Cos(args), mlx.Sin(args)}, 1)
-
-	// MLP: linear_1 -> silu -> linear_2
-	h := t.Linear1.Forward(sinEmbed)
-	h = mlx.SiLU(h)
-	return t.Linear2.Forward(h)
-}
-
-// TimeGuidanceEmbed wraps the timestep embedder
-// Weight names: time_guidance_embed.timestep_embedder.*
-type TimeGuidanceEmbed struct {
-	TimestepEmbedder *TimestepEmbedder `weight:"timestep_embedder"`
-}
-
-// Forward computes timestep embeddings
-func (t *TimeGuidanceEmbed) Forward(timesteps *mlx.Array) *mlx.Array {
-	return t.TimestepEmbedder.Forward(timesteps)
-}
-
-// Modulation computes adaptive modulation parameters
-// Weight names: double_stream_modulation_img.linear.weight, etc.
-type Modulation struct {
-	Linear nn.LinearLayer `weight:"linear"`
-}
-
-// Forward computes modulation parameters
-func (m *Modulation) Forward(temb *mlx.Array) *mlx.Array {
-	h := mlx.SiLU(temb)
-	return m.Linear.Forward(h)
-}
-
-// TransformerBlockAttn implements dual-stream attention
-// Weight names: transformer_blocks.N.attn.*
-type TransformerBlockAttn struct {
-	// Image stream (separate Q, K, V projections)
-	ToQ nn.LinearLayer `weight:"to_q"`
-	ToK nn.LinearLayer `weight:"to_k"`
-	ToV nn.LinearLayer `weight:"to_v"`
-	// Note: to_out has .0 suffix in weights, handled specially
-	ToOut0 nn.LinearLayer `weight:"to_out.0"`
-
-	// Text stream (add_ projections)
-	AddQProj nn.LinearLayer `weight:"add_q_proj"`
-	AddKProj nn.LinearLayer `weight:"add_k_proj"`
-	AddVProj nn.LinearLayer `weight:"add_v_proj"`
-	ToAddOut nn.LinearLayer `weight:"to_add_out"`
-
-	// QK norms for image stream
-	NormQ *mlx.Array `weight:"norm_q.weight"`
-	NormK *mlx.Array `weight:"norm_k.weight"`
-
-	// QK norms for text stream (added)
-	NormAddedQ *mlx.Array `weight:"norm_added_q.weight"`
-	NormAddedK *mlx.Array `weight:"norm_added_k.weight"`
-}
-
-// FeedForward implements SwiGLU MLP
-// Weight names: transformer_blocks.N.ff.linear_in.weight, linear_out.weight
-type FeedForward struct {
-	LinearIn  nn.LinearLayer `weight:"linear_in"`
-	LinearOut nn.LinearLayer `weight:"linear_out"`
-}
-
-// Forward applies SwiGLU MLP
-func (ff *FeedForward) Forward(x *mlx.Array) *mlx.Array {
-	// LinearIn outputs 2x hidden dim for SwiGLU
-	h := ff.LinearIn.Forward(x)
-	shape := h.Shape()
-	half := shape[len(shape)-1] / 2
-
-	// Split into gate and up
-	gate := mlx.Slice(h, []int32{0, 0, 0}, []int32{shape[0], shape[1], half})
-	up := mlx.Slice(h, []int32{0, 0, half}, []int32{shape[0], shape[1], shape[2]})
-
-	// SwiGLU: silu(gate) * up
-	h = mlx.Mul(mlx.SiLU(gate), up)
-	return ff.LinearOut.Forward(h)
-}
-
-// TransformerBlock implements a dual-stream transformer block
-// Weight names: transformer_blocks.N.*
-type TransformerBlock struct {
-	Attn      *TransformerBlockAttn `weight:"attn"`
-	FF        *FeedForward          `weight:"ff"`
-	FFContext *FeedForward          `weight:"ff_context"`
-
-	// Config (set after loading)
-	NHeads  int32
-	HeadDim int32
-	Scale   float32
-}
-
-// Forward applies the dual-stream block
-// imgHidden: [B, imgLen, dim]
-// txtHidden: [B, txtLen, dim]
-// imgMod, txtMod: modulation params [B, 6*dim] each
-// cos, sin: RoPE values
-func (block *TransformerBlock) Forward(imgHidden, txtHidden *mlx.Array, imgMod, txtMod *mlx.Array, cos, sin *mlx.Array) (*mlx.Array, *mlx.Array) {
-	imgShape := imgHidden.Shape()
-	B := imgShape[0]
-	imgLen := imgShape[1]
-	dim := imgShape[2]
-	txtLen := txtHidden.Shape()[1]
-
-	// Parse modulation: 6 params each (shift1, scale1, gate1, shift2, scale2, gate2)
-	imgShift1, imgScale1, imgGate1 := parseModulation3(imgMod, dim, 0)
-	imgShift2, imgScale2, imgGate2 := parseModulation3(imgMod, dim, 3)
-	txtShift1, txtScale1, txtGate1 := parseModulation3(txtMod, dim, 0)
-	txtShift2, txtScale2, txtGate2 := parseModulation3(txtMod, dim, 3)
-
-	// === Attention branch ===
-	// Modulate inputs
-	imgNorm := modulateLayerNorm(imgHidden, imgShift1, imgScale1)
-	txtNorm := modulateLayerNorm(txtHidden, txtShift1, txtScale1)
-
-	// Compute Q, K, V for image stream (separate projections)
-	imgQ := block.Attn.ToQ.Forward(imgNorm)
-	imgK := block.Attn.ToK.Forward(imgNorm)
-	imgV := block.Attn.ToV.Forward(imgNorm)
-
-	// Compute Q, K, V for text stream (add_ projections)
-	txtQ := block.Attn.AddQProj.Forward(txtNorm)
-	txtK := block.Attn.AddKProj.Forward(txtNorm)
-	txtV := block.Attn.AddVProj.Forward(txtNorm)
-
-	// Reshape for attention: [B, L, dim] -> [B, L, nheads, headDim]
-	imgQ = mlx.Reshape(imgQ, B, imgLen, block.NHeads, block.HeadDim)
-	imgK = mlx.Reshape(imgK, B, imgLen, block.NHeads, block.HeadDim)
-	imgV = mlx.Reshape(imgV, B, imgLen, block.NHeads, block.HeadDim)
-	txtQ = mlx.Reshape(txtQ, B, txtLen, block.NHeads, block.HeadDim)
-	txtK = mlx.Reshape(txtK, B, txtLen, block.NHeads, block.HeadDim)
-	txtV = mlx.Reshape(txtV, B, txtLen, block.NHeads, block.HeadDim)
-
-	// Apply QK norm (RMSNorm with learned scale)
-	imgQ = applyQKNorm(imgQ, block.Attn.NormQ)
-	imgK = applyQKNorm(imgK, block.Attn.NormK)
-	txtQ = applyQKNorm(txtQ, block.Attn.NormAddedQ)
-	txtK = applyQKNorm(txtK, block.Attn.NormAddedK)
-
-	// Concatenate for joint attention: text first, then image
-	q := mlx.Concatenate([]*mlx.Array{txtQ, imgQ}, 1)
-	k := mlx.Concatenate([]*mlx.Array{txtK, imgK}, 1)
-	v := mlx.Concatenate([]*mlx.Array{txtV, imgV}, 1)
-
-	// Apply RoPE
-	q = ApplyRoPE4D(q, cos, sin)
-	k = ApplyRoPE4D(k, cos, sin)
-
-	// Transpose for SDPA: [B, nheads, L, headDim]
-	q = mlx.Transpose(q, 0, 2, 1, 3)
-	k = mlx.Transpose(k, 0, 2, 1, 3)
-	v = mlx.Transpose(v, 0, 2, 1, 3)
-
-	// Scaled dot-product attention
-	out := mlx.ScaledDotProductAttention(q, k, v, block.Scale, false)
-
-	// Transpose back: [B, L, nheads, headDim]
-	out = mlx.Transpose(out, 0, 2, 1, 3)
-
-	// Split back into txt and img
-	totalLen := txtLen + imgLen
-	txtOut := mlx.Slice(out, []int32{0, 0, 0, 0}, []int32{B, txtLen, block.NHeads, block.HeadDim})
-	imgOut := mlx.Slice(out, []int32{0, txtLen, 0, 0}, []int32{B, totalLen, block.NHeads, block.HeadDim})
-
-	// Reshape and project
-	txtOut = mlx.Reshape(txtOut, B, txtLen, dim)
-	imgOut = mlx.Reshape(imgOut, B, imgLen, dim)
-	txtOut = block.Attn.ToAddOut.Forward(txtOut)
-	imgOut = block.Attn.ToOut0.Forward(imgOut)
-
-	// Apply gates and residual
-	imgHidden = mlx.Add(imgHidden, mlx.Mul(imgGate1, imgOut))
-	txtHidden = mlx.Add(txtHidden, mlx.Mul(txtGate1, txtOut))
-
-	// === MLP branch ===
-	imgNorm = modulateLayerNorm(imgHidden, imgShift2, imgScale2)
-	txtNorm = modulateLayerNorm(txtHidden, txtShift2, txtScale2)
-
-	imgFFOut := block.FF.Forward(imgNorm)
-	txtFFOut := block.FFContext.Forward(txtNorm)
-
-	imgHidden = mlx.Add(imgHidden, mlx.Mul(imgGate2, imgFFOut))
-	txtHidden = mlx.Add(txtHidden, mlx.Mul(txtGate2, txtFFOut))
-
-	return imgHidden, txtHidden
-}
-
-// SingleTransformerBlockAttn implements attention for single-stream blocks
-// Weight names: single_transformer_blocks.N.attn.*
-type SingleTransformerBlockAttn struct {
-	ToQKVMlpProj nn.LinearLayer `weight:"to_qkv_mlp_proj"` // Fused QKV + MLP input
-	ToOut        nn.LinearLayer `weight:"to_out"`          // Fused attn_out + MLP out
-	NormQ        *mlx.Array     `weight:"norm_q.weight"`
-	NormK        *mlx.Array     `weight:"norm_k.weight"`
-}
-
-// SingleTransformerBlock implements a single-stream transformer block
-// Weight names: single_transformer_blocks.N.*
-type SingleTransformerBlock struct {
-	Attn *SingleTransformerBlockAttn `weight:"attn"`
-
-	// Config
-	NHeads    int32
-	HeadDim   int32
-	InnerDim  int32
-	MLPHidDim int32
-	Scale     float32
-}
-
-// Forward applies the single-stream block
-// x: [B, L, dim] concatenated text+image
-// mod: modulation [B, 3*dim]
-func (block *SingleTransformerBlock) Forward(x *mlx.Array, mod *mlx.Array, cos, sin *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	L := shape[1]
-	dim := shape[2]
-
-	// Parse modulation: (shift, scale, gate)
-	shift, scale, gate := parseModulation3(mod, dim, 0)
-
-	// Modulate input
-	h := modulateLayerNorm(x, shift, scale)
-
-	// Fused projection: QKV + MLP gate/up
-	// linear1 outputs: [q, k, v, mlp_gate, mlp_up] = [dim, dim, dim, mlpHid, mlpHid]
-	qkvMlp := block.Attn.ToQKVMlpProj.Forward(h)
-
-	// Split: first 3*dim is QKV, rest is MLP
-	qkvDim := 3 * block.InnerDim
-	qkv := mlx.Slice(qkvMlp, []int32{0, 0, 0}, []int32{B, L, qkvDim})
-	mlpIn := mlx.Slice(qkvMlp, []int32{0, 0, qkvDim}, []int32{B, L, qkvMlp.Shape()[2]})
-
-	// Split QKV
-	q, k, v := splitQKV(qkv, B, L, block.InnerDim)
-
-	// Reshape for attention
-	q = mlx.Reshape(q, B, L, block.NHeads, block.HeadDim)
-	k = mlx.Reshape(k, B, L, block.NHeads, block.HeadDim)
-	v = mlx.Reshape(v, B, L, block.NHeads, block.HeadDim)
-
-	// QK norm
-	q = applyQKNorm(q, block.Attn.NormQ)
-	k = applyQKNorm(k, block.Attn.NormK)
-
-	// Apply RoPE
-	q = ApplyRoPE4D(q, cos, sin)
-	k = ApplyRoPE4D(k, cos, sin)
-
-	// Transpose for SDPA
-	q = mlx.Transpose(q, 0, 2, 1, 3)
-	k = mlx.Transpose(k, 0, 2, 1, 3)
-	v = mlx.Transpose(v, 0, 2, 1, 3)
-
-	// SDPA
-	attnOut := mlx.ScaledDotProductAttention(q, k, v, block.Scale, false)
-
-	// Transpose back and reshape
-	attnOut = mlx.Transpose(attnOut, 0, 2, 1, 3)
-	attnOut = mlx.Reshape(attnOut, B, L, block.InnerDim)
-
-	// MLP: SwiGLU
-	mlpShape := mlpIn.Shape()
-	half := mlpShape[2] / 2
-	mlpGate := mlx.Slice(mlpIn, []int32{0, 0, 0}, []int32{B, L, half})
-	mlpUp := mlx.Slice(mlpIn, []int32{0, 0, half}, []int32{B, L, mlpShape[2]})
-	mlpOut := mlx.Mul(mlx.SiLU(mlpGate), mlpUp)
-
-	// Concatenate attention and MLP for fused output
-	combined := mlx.Concatenate([]*mlx.Array{attnOut, mlpOut}, 2)
-
-	// Output projection
-	out := block.Attn.ToOut.Forward(combined)
-
-	// Apply gate and residual
-	return mlx.Add(x, mlx.Mul(gate, out))
-}
-
-// NormOut implements the output normalization with modulation
-// Weight names: norm_out.linear.weight
-type NormOut struct {
-	Linear nn.LinearLayer `weight:"linear"`
-}
-
-// Forward computes final modulated output
-func (n *NormOut) Forward(x *mlx.Array, temb *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	dim := shape[2]
-
-	// Modulation: temb -> silu -> linear -> [shift, scale]
-	mod := mlx.SiLU(temb)
-	mod = n.Linear.Forward(mod)
-
-	// Split into scale and shift (diffusers order: scale first, shift second)
-	scale := mlx.Slice(mod, []int32{0, 0}, []int32{B, dim})
-	shift := mlx.Slice(mod, []int32{0, dim}, []int32{B, 2 * dim})
-	shift = mlx.ExpandDims(shift, 1)
-	scale = mlx.ExpandDims(scale, 1)
-
-	// Modulate with RMSNorm
-	return modulateLayerNorm(x, shift, scale)
-}
-
-// Flux2Transformer2DModel is the main Flux2 transformer
-// Weight names at top level: time_guidance_embed.*, double_stream_modulation_*.*, etc.
-type Flux2Transformer2DModel struct {
-	// Timestep embedding
-	TimeGuidanceEmbed *TimeGuidanceEmbed `weight:"time_guidance_embed"`
-
-	// Shared modulation
-	DoubleStreamModulationImg *Modulation `weight:"double_stream_modulation_img"`
-	DoubleStreamModulationTxt *Modulation `weight:"double_stream_modulation_txt"`
-	SingleStreamModulation    *Modulation `weight:"single_stream_modulation"`
-
-	// Embedders
-	XEmbedder       nn.LinearLayer `weight:"x_embedder"`
-	ContextEmbedder nn.LinearLayer `weight:"context_embedder"`
-
-	// Transformer blocks
-	TransformerBlocks       []*TransformerBlock       `weight:"transformer_blocks"`
-	SingleTransformerBlocks []*SingleTransformerBlock `weight:"single_transformer_blocks"`
-
-	// Output
-	NormOut *NormOut       `weight:"norm_out"`
-	ProjOut nn.LinearLayer `weight:"proj_out"`
-
-	*TransformerConfig
-}
-
-// Load loads the Flux2 transformer from ollama blob storage.
-func (m *Flux2Transformer2DModel) Load(manifest *imagegen.ModelManifest) error {
-	fmt.Print("  Loading transformer... ")
-
-	// Load config from blob
-	var cfg TransformerConfig
-	if err := manifest.ReadConfigJSON("transformer/config.json", &cfg); err != nil {
-		return fmt.Errorf("config: %w", err)
-	}
-	m.TransformerConfig = &cfg
-
-	// Initialize slices
-	m.TransformerBlocks = make([]*TransformerBlock, cfg.NumLayers)
-	m.SingleTransformerBlocks = make([]*SingleTransformerBlock, cfg.NumSingleLayers)
-
-	// Initialize TimeGuidanceEmbed with embed dim
-	m.TimeGuidanceEmbed = &TimeGuidanceEmbed{
-		TimestepEmbedder: &TimestepEmbedder{EmbedDim: cfg.TimestepGuidanceChannels},
-	}
-
-	// Load weights from tensor blobs
-	weights, err := imagegen.LoadWeightsFromManifest(manifest, "transformer")
-	if err != nil {
-		return fmt.Errorf("weights: %w", err)
-	}
-	if err := weights.Load(0); err != nil {
-		return fmt.Errorf("load weights: %w", err)
-	}
-	defer weights.ReleaseAll()
-
-	return m.loadWeights(weights)
-}
-
-// loadWeights loads weights from any WeightSource into the model
-func (m *Flux2Transformer2DModel) loadWeights(weights safetensors.WeightSource) error {
-	if err := safetensors.LoadModule(m, weights, ""); err != nil {
-		return fmt.Errorf("load module: %w", err)
-	}
-	m.initComputedFields()
-	fmt.Println("✓")
-	return nil
-}
-
-// initComputedFields initializes computed fields after loading weights
-func (m *Flux2Transformer2DModel) initComputedFields() {
-	cfg := m.TransformerConfig
-	innerDim := cfg.InnerDim()
-	scale := float32(1.0 / math.Sqrt(float64(cfg.AttentionHeadDim)))
-
-	// Initialize transformer blocks
-	for _, block := range m.TransformerBlocks {
-		block.NHeads = cfg.NumAttentionHeads
-		block.HeadDim = cfg.AttentionHeadDim
-		block.Scale = scale
-	}
-
-	// Initialize single transformer blocks
-	for _, block := range m.SingleTransformerBlocks {
-		block.NHeads = cfg.NumAttentionHeads
-		block.HeadDim = cfg.AttentionHeadDim
-		block.InnerDim = innerDim
-		block.MLPHidDim = cfg.MLPHiddenDim()
-		block.Scale = scale
-	}
-}
-
-// Forward runs the Flux2 transformer
-func (m *Flux2Transformer2DModel) Forward(patches, txtEmbeds *mlx.Array, timesteps *mlx.Array, rope *RoPECache) *mlx.Array {
-	patchShape := patches.Shape()
-	B := patchShape[0]
-	imgLen := patchShape[1]
-	txtLen := txtEmbeds.Shape()[1]
-
-	// Scale timestep to 0-1000 range (diffusers multiplies by 1000)
-	scaledTimesteps := mlx.MulScalar(timesteps, 1000.0)
-
-	// Compute timestep embedding
-	temb := m.TimeGuidanceEmbed.Forward(scaledTimesteps)
-
-	// Embed patches and text
-	imgHidden := m.XEmbedder.Forward(patches)
-	txtHidden := m.ContextEmbedder.Forward(txtEmbeds)
-
-	// Compute shared modulation
-	imgMod := m.DoubleStreamModulationImg.Forward(temb)
-	txtMod := m.DoubleStreamModulationTxt.Forward(temb)
-	singleMod := m.SingleStreamModulation.Forward(temb)
-
-	// Double (dual-stream) blocks
-	for _, block := range m.TransformerBlocks {
-		imgHidden, txtHidden = block.Forward(imgHidden, txtHidden, imgMod, txtMod, rope.Cos, rope.Sin)
-	}
-
-	// Concatenate for single-stream: text first, then image
-	hidden := mlx.Concatenate([]*mlx.Array{txtHidden, imgHidden}, 1)
-
-	// Single-stream blocks
-	for _, block := range m.SingleTransformerBlocks {
-		hidden = block.Forward(hidden, singleMod, rope.Cos, rope.Sin)
-	}
-
-	// Extract image portion
-	totalLen := txtLen + imgLen
-	imgOut := mlx.Slice(hidden, []int32{0, txtLen, 0}, []int32{B, totalLen, hidden.Shape()[2]})
-
-	// Final norm and projection
-	imgOut = m.NormOut.Forward(imgOut, temb)
-	return m.ProjOut.Forward(imgOut)
-}
-
-// Note: QK normalization uses mlx.RMSNorm (the fast version) directly
-// See applyQKNorm function below
-
-// compiledSwiGLU fuses: silu(gate) * up
-// Called 30x per step (10 in dual-stream + 20 in single-stream blocks)
-var compiledSwiGLU *mlx.CompiledFunc
-
-func getCompiledSwiGLU() *mlx.CompiledFunc {
-	if compiledSwiGLU == nil {
-		compiledSwiGLU = mlx.CompileShapeless(func(inputs []*mlx.Array) []*mlx.Array {
-			gate, up := inputs[0], inputs[1]
-			return []*mlx.Array{mlx.Mul(mlx.SiLU(gate), up)}
-		}, true)
-	}
-	return compiledSwiGLU
-}
-
-// Helper functions
-
-// parseModulation3 extracts 3 modulation params (shift, scale, gate) starting at offset
-func parseModulation3(mod *mlx.Array, dim int32, offset int32) (*mlx.Array, *mlx.Array, *mlx.Array) {
-	B := mod.Shape()[0]
-	start := offset * dim
-	shift := mlx.Slice(mod, []int32{0, start}, []int32{B, start + dim})
-	scale := mlx.Slice(mod, []int32{0, start + dim}, []int32{B, start + 2*dim})
-	gate := mlx.Slice(mod, []int32{0, start + 2*dim}, []int32{B, start + 3*dim})
-
-	// Expand for broadcasting [B, dim] -> [B, 1, dim]
-	shift = mlx.ExpandDims(shift, 1)
-	scale = mlx.ExpandDims(scale, 1)
-	gate = mlx.ExpandDims(gate, 1)
-
-	return shift, scale, gate
-}
-
-// modulateLayerNorm applies LayerNorm then shift/scale modulation
-// Diffusers uses LayerNorm(elementwise_affine=False) which centers the data
-func modulateLayerNorm(x *mlx.Array, shift, scale *mlx.Array) *mlx.Array {
-	// Fast LayerNorm without learnable params
-	x = mlx.LayerNorm(x, 1e-6)
-
-	// Modulate: x * (1 + scale) + shift
-	x = mlx.Mul(x, mlx.AddScalar(scale, 1.0))
-	return mlx.Add(x, shift)
-}
-
-// splitQKV splits a fused QKV tensor into Q, K, V
-func splitQKV(qkv *mlx.Array, B, L, dim int32) (*mlx.Array, *mlx.Array, *mlx.Array) {
-	q := mlx.Slice(qkv, []int32{0, 0, 0}, []int32{B, L, dim})
-	k := mlx.Slice(qkv, []int32{0, 0, dim}, []int32{B, L, 2 * dim})
-	v := mlx.Slice(qkv, []int32{0, 0, 2 * dim}, []int32{B, L, 3 * dim})
-	return q, k, v
-}
-
-// applyQKNorm applies RMSNorm with learned scale (no bias)
-// Uses the optimized mlx_fast_rms_norm
-func applyQKNorm(x *mlx.Array, scale *mlx.Array) *mlx.Array {
-	return mlx.RMSNorm(x, scale, 1e-6)
-}
--- a/x/imagegen/models/flux2/vae.go
+++ b/x/imagegen/models/flux2/vae.go
@@ -1,804 +0,0 @@
-//go:build mlx
-
-package flux2
-
-import (
-	"fmt"
-	"math"
-
-	"github.com/ollama/ollama/x/imagegen"
-	"github.com/ollama/ollama/x/imagegen/mlx"
-	"github.com/ollama/ollama/x/imagegen/nn"
-	"github.com/ollama/ollama/x/imagegen/safetensors"
-	"github.com/ollama/ollama/x/imagegen/vae"
-)
-
-// VAEConfig holds AutoencoderKLFlux2 configuration
-type VAEConfig struct {
-	ActFn             string  `json:"act_fn"`              // "silu"
-	BatchNormEps      float32 `json:"batch_norm_eps"`      // 0.0001
-	BatchNormMomentum float32 `json:"batch_norm_momentum"` // 0.1
-	BlockOutChannels  []int32 `json:"block_out_channels"`  // [128, 256, 512, 512]
-	ForceUpcast       bool    `json:"force_upcast"`        // true
-	InChannels        int32   `json:"in_channels"`         // 3
-	LatentChannels    int32   `json:"latent_channels"`     // 32
-	LayersPerBlock    int32   `json:"layers_per_block"`    // 2
-	MidBlockAddAttn   bool    `json:"mid_block_add_attention"` // true
-	NormNumGroups     int32   `json:"norm_num_groups"`     // 32
-	OutChannels       int32   `json:"out_channels"`        // 3
-	PatchSize         []int32 `json:"patch_size"`          // [2, 2]
-	SampleSize        int32   `json:"sample_size"`         // 1024
-	UsePostQuantConv  bool    `json:"use_post_quant_conv"` // true
-	UseQuantConv      bool    `json:"use_quant_conv"`      // true
-}
-
-// BatchNorm2D implements 2D batch normalization with running statistics
-type BatchNorm2D struct {
-	RunningMean *mlx.Array // [C]
-	RunningVar  *mlx.Array // [C]
-	Weight      *mlx.Array // [C] gamma
-	Bias        *mlx.Array // [C] beta
-	Eps         float32
-	Momentum    float32
-}
-
-// Forward applies batch normalization (inference mode - uses running stats)
-// Input and output are in NHWC format [B, H, W, C]
-func (bn *BatchNorm2D) Forward(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	C := shape[3]
-
-	// Reshape stats for broadcasting [1, 1, 1, C]
-	mean := mlx.Reshape(bn.RunningMean, 1, 1, 1, C)
-	variance := mlx.Reshape(bn.RunningVar, 1, 1, 1, C)
-
-	// Normalize: (x - mean) / sqrt(var + eps)
-	xNorm := mlx.Sub(x, mean)
-	xNorm = mlx.Div(xNorm, mlx.Sqrt(mlx.AddScalar(variance, bn.Eps)))
-
-	// Scale and shift (only if affine=True)
-	if bn.Weight != nil {
-		weight := mlx.Reshape(bn.Weight, 1, 1, 1, C)
-		xNorm = mlx.Mul(xNorm, weight)
-	}
-	if bn.Bias != nil {
-		bias := mlx.Reshape(bn.Bias, 1, 1, 1, C)
-		xNorm = mlx.Add(xNorm, bias)
-	}
-
-	return xNorm
-}
-
-// Denormalize inverts the batch normalization
-// Used when decoding latents
-func (bn *BatchNorm2D) Denormalize(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	C := shape[3]
-
-	// Reshape stats for broadcasting [1, 1, 1, C]
-	mean := mlx.Reshape(bn.RunningMean, 1, 1, 1, C)
-	variance := mlx.Reshape(bn.RunningVar, 1, 1, 1, C)
-
-	// Inverse: first undo affine, then undo normalization
-	// For affine=False: x_denorm = x * sqrt(var + eps) + mean
-	if bn.Bias != nil {
-		bias := mlx.Reshape(bn.Bias, 1, 1, 1, C)
-		x = mlx.Sub(x, bias)
-	}
-	if bn.Weight != nil {
-		weight := mlx.Reshape(bn.Weight, 1, 1, 1, C)
-		x = mlx.Div(x, weight)
-	}
-	x = mlx.Mul(x, mlx.Sqrt(mlx.AddScalar(variance, bn.Eps)))
-	x = mlx.Add(x, mean)
-
-	return x
-}
-
-// GroupNormLayer implements group normalization
-// Reused from zimage package pattern
-type GroupNormLayer struct {
-	Weight    *mlx.Array `weight:"weight"`
-	Bias      *mlx.Array `weight:"bias"`
-	NumGroups int32
-	Eps       float32
-}
-
-// Forward applies group normalization
-// Input and output are in NHWC format [B, H, W, C]
-func (gn *GroupNormLayer) Forward(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	H := shape[1]
-	W := shape[2]
-	C := shape[3]
-
-	// Reshape to [B, H, W, groups, C/groups]
-	groupSize := C / gn.NumGroups
-	x = mlx.Reshape(x, B, H, W, gn.NumGroups, groupSize)
-
-	// Compute mean and variance per group
-	mean := mlx.Mean(x, 1, true)
-	mean = mlx.Mean(mean, 2, true)
-	mean = mlx.Mean(mean, 4, true)
-
-	xCentered := mlx.Sub(x, mean)
-
-	sq := mlx.Square(xCentered)
-	variance := mlx.Mean(sq, 1, true)
-	variance = mlx.Mean(variance, 2, true)
-	variance = mlx.Mean(variance, 4, true)
-
-	// Normalize
-	xNorm := mlx.Div(xCentered, mlx.Sqrt(mlx.AddScalar(variance, gn.Eps)))
-
-	// Reshape back to [B, H, W, C]
-	xNorm = mlx.Reshape(xNorm, B, H, W, C)
-
-	// Scale and shift
-	if gn.Weight != nil {
-		weight := mlx.Reshape(gn.Weight, 1, 1, 1, C)
-		xNorm = mlx.Mul(xNorm, weight)
-	}
-	if gn.Bias != nil {
-		bias := mlx.Reshape(gn.Bias, 1, 1, 1, C)
-		xNorm = mlx.Add(xNorm, bias)
-	}
-
-	return xNorm
-}
-
-// Conv2D represents a 2D convolution layer (reused pattern)
-type Conv2D struct {
-	Weight  *mlx.Array `weight:"weight"`
-	Bias    *mlx.Array `weight:"bias,optional"`
-	Stride  int32
-	Padding int32
-}
-
-// Transform implements safetensors.Transformer to transpose weights from PyTorch's OIHW to MLX's OHWI.
-func (conv *Conv2D) Transform(field string, arr *mlx.Array) *mlx.Array {
-	if field == "Weight" {
-		return mlx.Transpose(arr, 0, 2, 3, 1)
-	}
-	return arr
-}
-
-// Forward applies convolution (NHWC format)
-func (conv *Conv2D) Forward(x *mlx.Array) *mlx.Array {
-	out := mlx.Conv2d(x, conv.Weight, conv.Stride, conv.Padding)
-
-	if conv.Bias != nil {
-		bias := mlx.Reshape(conv.Bias, 1, 1, 1, conv.Bias.Dim(0))
-		out = mlx.Add(out, bias)
-	}
-
-	return out
-}
-
-// ResnetBlock2D implements a ResNet block for VAE
-type ResnetBlock2D struct {
-	Norm1        *GroupNormLayer `weight:"norm1"`
-	Conv1        *Conv2D         `weight:"conv1"`
-	Norm2        *GroupNormLayer `weight:"norm2"`
-	Conv2        *Conv2D         `weight:"conv2"`
-	ConvShortcut *Conv2D         `weight:"conv_shortcut,optional"`
-}
-
-// Forward applies the ResNet block
-func (rb *ResnetBlock2D) Forward(x *mlx.Array) *mlx.Array {
-	h := rb.Norm1.Forward(x)
-	h = mlx.SiLU(h)
-	h = rb.Conv1.Forward(h)
-
-	h = rb.Norm2.Forward(h)
-	h = mlx.SiLU(h)
-	h = rb.Conv2.Forward(h)
-
-	if rb.ConvShortcut != nil {
-		x = rb.ConvShortcut.Forward(x)
-	}
-
-	return mlx.Add(h, x)
-}
-
-// VAEAttentionBlock implements self-attention for VAE
-type VAEAttentionBlock struct {
-	GroupNorm *GroupNormLayer `weight:"group_norm"`
-	ToQ       nn.LinearLayer  `weight:"to_q"`
-	ToK       nn.LinearLayer  `weight:"to_k"`
-	ToV       nn.LinearLayer  `weight:"to_v"`
-	ToOut     nn.LinearLayer  `weight:"to_out.0"`
-}
-
-// Forward applies attention (NHWC format)
-func (ab *VAEAttentionBlock) Forward(x *mlx.Array) *mlx.Array {
-	residual := x
-	shape := x.Shape()
-	B := shape[0]
-	H := shape[1]
-	W := shape[2]
-	C := shape[3]
-
-	h := ab.GroupNorm.Forward(x)
-	h = mlx.Reshape(h, B, H*W, C)
-
-	q := ab.ToQ.Forward(h)
-	k := ab.ToK.Forward(h)
-	v := ab.ToV.Forward(h)
-
-	q = mlx.ExpandDims(q, 1)
-	k = mlx.ExpandDims(k, 1)
-	v = mlx.ExpandDims(v, 1)
-
-	scale := float32(1.0 / math.Sqrt(float64(C)))
-	out := mlx.ScaledDotProductAttention(q, k, v, scale, false)
-	out = mlx.Squeeze(out, 1)
-
-	out = ab.ToOut.Forward(out)
-	out = mlx.Reshape(out, B, H, W, C)
-	out = mlx.Add(out, residual)
-
-	return out
-}
-
-// UpDecoderBlock2D implements an upsampling decoder block
-type UpDecoderBlock2D struct {
-	ResnetBlocks []*ResnetBlock2D
-	Upsample     *Conv2D
-}
-
-// Forward applies the up decoder block
-func (ub *UpDecoderBlock2D) Forward(x *mlx.Array) *mlx.Array {
-	for _, resnet := range ub.ResnetBlocks {
-		x = resnet.Forward(x)
-	}
-
-	if ub.Upsample != nil {
-		x = upsample2x(x)
-		x = ub.Upsample.Forward(x)
-	}
-
-	return x
-}
-
-// upsample2x performs 2x nearest neighbor upsampling
-func upsample2x(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	H := shape[1]
-	W := shape[2]
-
-	hIdx := mlx.ArangeInt(0, H, 1, mlx.DtypeInt32)
-	hIdx = mlx.Reshape(hIdx, H, 1)
-	hIdx = mlx.BroadcastTo(hIdx, []int32{H, 2})
-	hIdx = mlx.Reshape(hIdx, H*2)
-
-	wIdx := mlx.ArangeInt(0, W, 1, mlx.DtypeInt32)
-	wIdx = mlx.Reshape(wIdx, W, 1)
-	wIdx = mlx.BroadcastTo(wIdx, []int32{W, 2})
-	wIdx = mlx.Reshape(wIdx, W*2)
-
-	x = mlx.Take(x, hIdx, 1)
-	x = mlx.Take(x, wIdx, 2)
-
-	return x
-}
-
-// VAEMidBlock is the middle block with attention
-type VAEMidBlock struct {
-	Resnet1   *ResnetBlock2D
-	Attention *VAEAttentionBlock
-	Resnet2   *ResnetBlock2D
-}
-
-// Forward applies the mid block
-func (mb *VAEMidBlock) Forward(x *mlx.Array) *mlx.Array {
-	x = mb.Resnet1.Forward(x)
-	x = mb.Attention.Forward(x)
-	x = mb.Resnet2.Forward(x)
-	return x
-}
-
-// DefaultTilingConfig returns reasonable defaults for tiled decoding
-// Matches diffusers: tile_latent_min_size=64, tile_overlap_factor=0.25
-func DefaultTilingConfig() *vae.TilingConfig {
-	return vae.DefaultTilingConfig()
-}
-
-// AutoencoderKLFlux2 is the Flux2 VAE with BatchNorm
-type AutoencoderKLFlux2 struct {
-	Config *VAEConfig
-
-	// Encoder components (for image editing)
-	EncoderConvIn  *Conv2D
-	EncoderMid     *VAEMidBlock
-	EncoderDown    []*DownEncoderBlock2D
-	EncoderNormOut *GroupNormLayer
-	EncoderConvOut *Conv2D
-
-	// Decoder components
-	DecoderConvIn  *Conv2D
-	DecoderMid     *VAEMidBlock
-	DecoderUp      []*UpDecoderBlock2D
-	DecoderNormOut *GroupNormLayer
-	DecoderConvOut *Conv2D
-
-	// Quant conv layers
-	QuantConv     *Conv2D
-	PostQuantConv *Conv2D
-
-	// BatchNorm for latent normalization
-	LatentBN *BatchNorm2D
-
-	// Tiling configuration (nil = no tiling)
-	Tiling *vae.TilingConfig
-}
-
-// DownEncoderBlock2D implements a downsampling encoder block
-type DownEncoderBlock2D struct {
-	ResnetBlocks []*ResnetBlock2D
-	Downsample   *Conv2D
-}
-
-// Forward applies the down encoder block
-func (db *DownEncoderBlock2D) Forward(x *mlx.Array) *mlx.Array {
-	for _, resnet := range db.ResnetBlocks {
-		x = resnet.Forward(x)
-	}
-
-	if db.Downsample != nil {
-		// Pad then conv with stride 2
-		x = mlx.Pad(x, []int32{0, 0, 0, 1, 0, 1, 0, 0})
-		x = db.Downsample.Forward(x)
-	}
-
-	return x
-}
-
-// Load loads the Flux2 VAE from ollama blob storage.
-func (m *AutoencoderKLFlux2) Load(manifest *imagegen.ModelManifest) error {
-	fmt.Print("  Loading VAE... ")
-
-	// Load config from blob
-	var cfg VAEConfig
-	if err := manifest.ReadConfigJSON("vae/config.json", &cfg); err != nil {
-		return fmt.Errorf("config: %w", err)
-	}
-	m.Config = &cfg
-
-	// Load weights from tensor blobs
-	weights, err := imagegen.LoadWeightsFromManifest(manifest, "vae")
-	if err != nil {
-		return fmt.Errorf("weights: %w", err)
-	}
-	if err := weights.Load(0); err != nil {
-		return fmt.Errorf("load weights: %w", err)
-	}
-	defer weights.ReleaseAll()
-
-	return m.loadWeights(weights, &cfg)
-}
-
-// loadWeights loads VAE weights from any WeightSource
-func (m *AutoencoderKLFlux2) loadWeights(weights safetensors.WeightSource, cfg *VAEConfig) error {
-	var err error
-
-	// Load encoder components (for image conditioning)
-	if err := m.loadEncoderWeights(weights, cfg); err != nil {
-		return fmt.Errorf("encoder: %w", err)
-	}
-
-	// Load decoder conv_in
-	m.DecoderConvIn = &Conv2D{Stride: 1, Padding: 1}
-	if err := safetensors.LoadModule(m.DecoderConvIn, weights, "decoder.conv_in"); err != nil {
-		return fmt.Errorf("decoder.conv_in: %w", err)
-	}
-
-	// Load mid block
-	m.DecoderMid, err = loadVAEMidBlock(weights, "decoder.mid_block", cfg.NormNumGroups)
-	if err != nil {
-		return fmt.Errorf("decoder.mid_block: %w", err)
-	}
-
-	// Load up blocks
-	numBlocks := len(cfg.BlockOutChannels)
-	m.DecoderUp = make([]*UpDecoderBlock2D, numBlocks)
-	for i := 0; i < numBlocks; i++ {
-		prefix := fmt.Sprintf("decoder.up_blocks.%d", i)
-		hasUpsample := i < numBlocks-1
-		m.DecoderUp[i], err = loadUpDecoderBlock2D(weights, prefix, cfg.LayersPerBlock+1, cfg.NormNumGroups, hasUpsample)
-		if err != nil {
-			return fmt.Errorf("%s: %w", prefix, err)
-		}
-	}
-
-	// Load decoder conv_norm_out and conv_out
-	m.DecoderNormOut = &GroupNormLayer{NumGroups: cfg.NormNumGroups, Eps: 1e-5}
-	if err := safetensors.LoadModule(m.DecoderNormOut, weights, "decoder.conv_norm_out"); err != nil {
-		return fmt.Errorf("decoder.conv_norm_out: %w", err)
-	}
-
-	m.DecoderConvOut = &Conv2D{Stride: 1, Padding: 1}
-	if err := safetensors.LoadModule(m.DecoderConvOut, weights, "decoder.conv_out"); err != nil {
-		return fmt.Errorf("decoder.conv_out: %w", err)
-	}
-
-	// Load post_quant_conv
-	if cfg.UsePostQuantConv {
-		m.PostQuantConv = &Conv2D{Stride: 1, Padding: 0}
-		if err := safetensors.LoadModule(m.PostQuantConv, weights, "post_quant_conv"); err != nil {
-			return fmt.Errorf("post_quant_conv: %w", err)
-		}
-	}
-
-	// Load latent BatchNorm (affine=False, so no weight/bias)
-	bnMean, err := weights.GetTensor("bn.running_mean")
-	if err != nil {
-		return fmt.Errorf("bn.running_mean: %w", err)
-	}
-	bnVar, err := weights.GetTensor("bn.running_var")
-	if err != nil {
-		return fmt.Errorf("bn.running_var: %w", err)
-	}
-	m.LatentBN = &BatchNorm2D{
-		RunningMean: bnMean,
-		RunningVar:  bnVar,
-		Weight:      nil, // affine=False
-		Bias:        nil, // affine=False
-		Eps:         cfg.BatchNormEps,
-		Momentum:    cfg.BatchNormMomentum,
-	}
-
-	fmt.Println("✓")
-	return nil
-}
-
-// loadVAEMidBlock loads the mid block.
-func loadVAEMidBlock(weights safetensors.WeightSource, prefix string, numGroups int32) (*VAEMidBlock, error) {
-	resnet1, err := loadResnetBlock2D(weights, prefix+".resnets.0", numGroups)
-	if err != nil {
-		return nil, err
-	}
-
-	attention, err := loadVAEAttentionBlock(weights, prefix+".attentions.0", numGroups)
-	if err != nil {
-		return nil, err
-	}
-
-	resnet2, err := loadResnetBlock2D(weights, prefix+".resnets.1", numGroups)
-	if err != nil {
-		return nil, err
-	}
-
-	return &VAEMidBlock{
-		Resnet1:   resnet1,
-		Attention: attention,
-		Resnet2:   resnet2,
-	}, nil
-}
-
-// loadResnetBlock2D loads a ResNet block.
-func loadResnetBlock2D(weights safetensors.WeightSource, prefix string, numGroups int32) (*ResnetBlock2D, error) {
-	block := &ResnetBlock2D{
-		Norm1:        &GroupNormLayer{NumGroups: numGroups, Eps: 1e-5},
-		Conv1:        &Conv2D{Stride: 1, Padding: 1},
-		Norm2:        &GroupNormLayer{NumGroups: numGroups, Eps: 1e-5},
-		Conv2:        &Conv2D{Stride: 1, Padding: 1},
-		ConvShortcut: &Conv2D{Stride: 1, Padding: 0}, // Pre-allocate for optional loading
-	}
-	if err := safetensors.LoadModule(block, weights, prefix); err != nil {
-		return nil, err
-	}
-	// If ConvShortcut wasn't loaded (no weights found), nil it out
-	if block.ConvShortcut.Weight == nil {
-		block.ConvShortcut = nil
-	}
-	return block, nil
-}
-
-// loadVAEAttentionBlock loads an attention block using LoadModule.
-func loadVAEAttentionBlock(weights safetensors.WeightSource, prefix string, numGroups int32) (*VAEAttentionBlock, error) {
-	ab := &VAEAttentionBlock{
-		GroupNorm: &GroupNormLayer{NumGroups: numGroups, Eps: 1e-5},
-	}
-	if err := safetensors.LoadModule(ab, weights, prefix); err != nil {
-		return nil, err
-	}
-	return ab, nil
-}
-
-// loadUpDecoderBlock2D loads an up decoder block.
-func loadUpDecoderBlock2D(weights safetensors.WeightSource, prefix string, numLayers, numGroups int32, hasUpsample bool) (*UpDecoderBlock2D, error) {
-	resnets := make([]*ResnetBlock2D, numLayers)
-	for i := int32(0); i < numLayers; i++ {
-		resPrefix := fmt.Sprintf("%s.resnets.%d", prefix, i)
-		resnet, err := loadResnetBlock2D(weights, resPrefix, numGroups)
-		if err != nil {
-			return nil, err
-		}
-		resnets[i] = resnet
-	}
-
-	var upsample *Conv2D
-	if hasUpsample {
-		upsample = &Conv2D{Stride: 1, Padding: 1}
-		if err := safetensors.LoadModule(upsample, weights, prefix+".upsamplers.0.conv"); err != nil {
-			return nil, err
-		}
-	}
-
-	return &UpDecoderBlock2D{
-		ResnetBlocks: resnets,
-		Upsample:     upsample,
-	}, nil
-}
-
-// Patchify converts latents [B, C, H, W] to patches [B, H*W/4, C*4] using 2x2 patches
-// This is the inverse of the VAE's patchify for feeding to transformer
-func (vae *AutoencoderKLFlux2) Patchify(latents *mlx.Array) *mlx.Array {
-	shape := latents.Shape()
-	B := shape[0]
-	C := shape[1]
-	H := shape[2]
-	W := shape[3]
-
-	patchH := vae.Config.PatchSize[0]
-	patchW := vae.Config.PatchSize[1]
-
-	pH := H / patchH
-	pW := W / patchW
-
-	// [B, C, H, W] -> [B, C, pH, patchH, pW, patchW]
-	x := mlx.Reshape(latents, B, C, pH, patchH, pW, patchW)
-	// [B, C, pH, patchH, pW, patchW] -> [B, pH, pW, C, patchH, patchW]
-	x = mlx.Transpose(x, 0, 2, 4, 1, 3, 5)
-	// [B, pH, pW, C, patchH, patchW] -> [B, pH*pW, C*patchH*patchW]
-	return mlx.Reshape(x, B, pH*pW, C*patchH*patchW)
-}
-
-// Unpatchify converts patches [B, L, C*4] back to [B, C, H, W]
-func (vae *AutoencoderKLFlux2) Unpatchify(patches *mlx.Array, pH, pW, C int32) *mlx.Array {
-	shape := patches.Shape()
-	B := shape[0]
-
-	patchH := vae.Config.PatchSize[0]
-	patchW := vae.Config.PatchSize[1]
-
-	// [B, pH*pW, C*patchH*patchW] -> [B, pH, pW, C, patchH, patchW]
-	x := mlx.Reshape(patches, B, pH, pW, C, patchH, patchW)
-	// [B, pH, pW, C, patchH, patchW] -> [B, C, pH, patchH, pW, patchW]
-	x = mlx.Transpose(x, 0, 3, 1, 4, 2, 5)
-	// [B, C, pH, patchH, pW, patchW] -> [B, C, H, W]
-	H := pH * patchH
-	W := pW * patchW
-	return mlx.Reshape(x, B, C, H, W)
-}
-
-// denormalizePatchified applies inverse batch normalization to patchified latents.
-// Input: [B, L, 128] where 128 = 32 latent channels * 4 (2x2 patch)
-// Output: [B, L, 128] denormalized
-func (vae *AutoencoderKLFlux2) denormalizePatchified(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	C := shape[2] // 128
-
-	// Reshape stats for broadcasting [1, 1, C]
-	mean := mlx.Reshape(vae.LatentBN.RunningMean, 1, 1, C)
-	variance := mlx.Reshape(vae.LatentBN.RunningVar, 1, 1, C)
-
-	// Inverse BN (affine=False): x_denorm = x * sqrt(var + eps) + mean
-	if vae.LatentBN.Bias != nil {
-		bias := mlx.Reshape(vae.LatentBN.Bias, 1, 1, C)
-		x = mlx.Sub(x, bias)
-	}
-	if vae.LatentBN.Weight != nil {
-		weight := mlx.Reshape(vae.LatentBN.Weight, 1, 1, C)
-		x = mlx.Div(x, weight)
-	}
-	x = mlx.Mul(x, mlx.Sqrt(mlx.AddScalar(variance, vae.LatentBN.Eps)))
-	x = mlx.Add(x, mean)
-
-	return x
-}
-
-// Decode decodes latent patches to images.
-// If Tiling is set, uses tiled decoding to reduce memory for large images.
-// latents: [B, L, C*4] patchified latents from transformer
-// pH, pW: patch grid dimensions
-// Returns: [B, 3, H, W] image tensor
-func (v *AutoencoderKLFlux2) Decode(latents *mlx.Array, pH, pW int32) *mlx.Array {
-	// Denormalize patchified latents
-	z := v.denormalizePatchified(latents)
-
-	// Unpatchify: [B, L, C*4] -> [B, C, H, W]
-	z = v.Unpatchify(z, pH, pW, v.Config.LatentChannels)
-
-	// Convert NCHW -> NHWC for processing
-	z = mlx.Transpose(z, 0, 2, 3, 1)
-
-	// Use tiled decoding if enabled
-	if v.Tiling != nil {
-		mlx.Eval(z)
-		return vae.DecodeTiled(z, v.Tiling, v.decodeTile)
-	}
-
-	// Direct decode (no tiling)
-	h := v.decodeTile(z)
-	h = mlx.ClipScalar(h, 0.0, 1.0, true, true)
-	h = mlx.Transpose(h, 0, 3, 1, 2)
-	return h
-}
-
-// decodeTile decodes a single latent tile to pixels (internal helper)
-// z: [B, H, W, C] latent tile in NHWC format
-// Returns: [B, H*8, W*8, 3] pixel tile in NHWC format (before clipping)
-func (vae *AutoencoderKLFlux2) decodeTile(z *mlx.Array) *mlx.Array {
-	// Post-quant conv
-	if vae.PostQuantConv != nil {
-		z = vae.PostQuantConv.Forward(z)
-	}
-
-	// Decoder
-	h := vae.DecoderConvIn.Forward(z)
-	h = vae.DecoderMid.Forward(h)
-
-	for _, upBlock := range vae.DecoderUp {
-		h = upBlock.Forward(h)
-	}
-
-	h = vae.DecoderNormOut.Forward(h)
-	h = mlx.SiLU(h)
-	h = vae.DecoderConvOut.Forward(h)
-
-	// VAE outputs [-1, 1], convert to [0, 1]
-	h = mlx.MulScalar(h, 0.5)
-	h = mlx.AddScalar(h, 0.5)
-
-	return h
-}
-
-// loadEncoderWeights loads the encoder components for image conditioning
-func (m *AutoencoderKLFlux2) loadEncoderWeights(weights safetensors.WeightSource, cfg *VAEConfig) error {
-	var err error
-
-	// Load encoder conv_in
-	m.EncoderConvIn = &Conv2D{Stride: 1, Padding: 1}
-	if err := safetensors.LoadModule(m.EncoderConvIn, weights, "encoder.conv_in"); err != nil {
-		return fmt.Errorf("encoder.conv_in: %w", err)
-	}
-
-	// Load encoder down blocks
-	numBlocks := len(cfg.BlockOutChannels)
-	m.EncoderDown = make([]*DownEncoderBlock2D, numBlocks)
-	for i := 0; i < numBlocks; i++ {
-		prefix := fmt.Sprintf("encoder.down_blocks.%d", i)
-		hasDownsample := i < numBlocks-1
-		m.EncoderDown[i], err = loadDownEncoderBlock2D(weights, prefix, cfg.LayersPerBlock, cfg.NormNumGroups, hasDownsample)
-		if err != nil {
-			return fmt.Errorf("%s: %w", prefix, err)
-		}
-	}
-
-	// Load encoder mid block
-	m.EncoderMid, err = loadVAEMidBlock(weights, "encoder.mid_block", cfg.NormNumGroups)
-	if err != nil {
-		return fmt.Errorf("encoder.mid_block: %w", err)
-	}
-
-	// Load encoder conv_norm_out and conv_out
-	m.EncoderNormOut = &GroupNormLayer{NumGroups: cfg.NormNumGroups, Eps: 1e-5}
-	if err := safetensors.LoadModule(m.EncoderNormOut, weights, "encoder.conv_norm_out"); err != nil {
-		return fmt.Errorf("encoder.conv_norm_out: %w", err)
-	}
-
-	m.EncoderConvOut = &Conv2D{Stride: 1, Padding: 1}
-	if err := safetensors.LoadModule(m.EncoderConvOut, weights, "encoder.conv_out"); err != nil {
-		return fmt.Errorf("encoder.conv_out: %w", err)
-	}
-
-	// Load quant_conv (for encoding)
-	if cfg.UseQuantConv {
-		m.QuantConv = &Conv2D{Stride: 1, Padding: 0}
-		if err := safetensors.LoadModule(m.QuantConv, weights, "quant_conv"); err != nil {
-			return fmt.Errorf("quant_conv: %w", err)
-		}
-	}
-
-	return nil
-}
-
-// loadDownEncoderBlock2D loads a down encoder block.
-func loadDownEncoderBlock2D(weights safetensors.WeightSource, prefix string, numLayers, numGroups int32, hasDownsample bool) (*DownEncoderBlock2D, error) {
-	resnets := make([]*ResnetBlock2D, numLayers)
-	for i := int32(0); i < numLayers; i++ {
-		resPrefix := fmt.Sprintf("%s.resnets.%d", prefix, i)
-		resnet, err := loadResnetBlock2D(weights, resPrefix, numGroups)
-		if err != nil {
-			return nil, err
-		}
-		resnets[i] = resnet
-	}
-
-	var downsample *Conv2D
-	if hasDownsample {
-		downsample = &Conv2D{Stride: 2, Padding: 0}
-		if err := safetensors.LoadModule(downsample, weights, prefix+".downsamplers.0.conv"); err != nil {
-			return nil, err
-		}
-	}
-
-	return &DownEncoderBlock2D{
-		ResnetBlocks: resnets,
-		Downsample:   downsample,
-	}, nil
-}
-
-// EncodeImage encodes an image to normalized latents.
-// image: [B, 3, H, W] image tensor in [-1, 1]
-// Returns: [B, L, C*4] patchified normalized latents
-func (vae *AutoencoderKLFlux2) EncodeImage(image *mlx.Array) *mlx.Array {
-	// Convert NCHW -> NHWC
-	x := mlx.Transpose(image, 0, 2, 3, 1)
-
-	// Encoder
-	h := vae.EncoderConvIn.Forward(x)
-
-	for _, downBlock := range vae.EncoderDown {
-		h = downBlock.Forward(h)
-	}
-
-	h = vae.EncoderMid.Forward(h)
-	h = vae.EncoderNormOut.Forward(h)
-	h = mlx.SiLU(h)
-	h = vae.EncoderConvOut.Forward(h)
-
-	// Quant conv outputs [B, H, W, 2*latent_channels] (mean + logvar)
-	if vae.QuantConv != nil {
-		h = vae.QuantConv.Forward(h)
-	}
-
-	// Take only the mean (first latent_channels) - deterministic encoding
-	// h is [B, H, W, 64] -> take first 32 channels for mean
-	shape := h.Shape()
-	latentChannels := vae.Config.LatentChannels // 32
-	h = mlx.Slice(h, []int32{0, 0, 0, 0}, []int32{shape[0], shape[1], shape[2], latentChannels})
-
-	// Convert NHWC -> NCHW for patchifying
-	h = mlx.Transpose(h, 0, 3, 1, 2)
-
-	// Patchify: [B, C, H, W] -> [B, L, C*4]
-	h = vae.Patchify(h)
-
-	// Apply BatchNorm on patchified latents [B, L, 128]
-	// The BatchNorm has 128 channels matching the patchified dimension
-	h = vae.normalizePatchified(h)
-
-	return h
-}
-
-// normalizePatchified applies batch normalization to patchified latents.
-// Input: [B, L, 128] where 128 = 32 latent channels * 4 (2x2 patch)
-// Output: [B, L, 128] normalized
-func (vae *AutoencoderKLFlux2) normalizePatchified(x *mlx.Array) *mlx.Array {
-	shape := x.Shape()
-	C := shape[2] // 128
-
-	// Reshape stats for broadcasting [1, 1, C]
-	mean := mlx.Reshape(vae.LatentBN.RunningMean, 1, 1, C)
-	variance := mlx.Reshape(vae.LatentBN.RunningVar, 1, 1, C)
-
-	// Normalize: (x - mean) / sqrt(var + eps)
-	xNorm := mlx.Sub(x, mean)
-	xNorm = mlx.Div(xNorm, mlx.Sqrt(mlx.AddScalar(variance, vae.LatentBN.Eps)))
-
-	// Scale and shift (only if affine=True)
-	if vae.LatentBN.Weight != nil {
-		weight := mlx.Reshape(vae.LatentBN.Weight, 1, 1, C)
-		xNorm = mlx.Mul(xNorm, weight)
-	}
-	if vae.LatentBN.Bias != nil {
-		bias := mlx.Reshape(vae.LatentBN.Bias, 1, 1, C)
-		xNorm = mlx.Add(xNorm, bias)
-	}
-
-	return xNorm
-}
--- a/x/imagegen/models/qwen3/text_encoder.go
+++ b/x/imagegen/models/qwen3/text_encoder.go
@@ -1,390 +0,0 @@
-//go:build mlx
-
-// Package qwen3 provides a shared Qwen3 text encoder used by multiple image generation models.
-package qwen3
-
-import (
-	"fmt"
-	"math"
-
-	"github.com/ollama/ollama/x/imagegen"
-	"github.com/ollama/ollama/x/imagegen/mlx"
-	"github.com/ollama/ollama/x/imagegen/nn"
-	"github.com/ollama/ollama/x/imagegen/safetensors"
-	"github.com/ollama/ollama/x/imagegen/tokenizer"
-)
-
-// Config holds Qwen3 text encoder configuration
-type Config struct {
-	HiddenSize        int32   `json:"hidden_size"`
-	NumHiddenLayers   int32   `json:"num_hidden_layers"`
-	IntermediateSize  int32   `json:"intermediate_size"`
-	NumAttentionHeads int32   `json:"num_attention_heads"`
-	NumKeyValueHeads  int32   `json:"num_key_value_heads"`
-	VocabSize         int32   `json:"vocab_size"`
-	RMSNormEps        float32 `json:"rms_norm_eps"`
-	RopeTheta         float32 `json:"rope_theta"`
-	HeadDim           int32   `json:"head_dim"`
-}
-
-// Attention implements Qwen3 attention with QK norms
-type Attention struct {
-	QProj nn.LinearLayer `weight:"q_proj"`
-	KProj nn.LinearLayer `weight:"k_proj"`
-	VProj nn.LinearLayer `weight:"v_proj"`
-	OProj nn.LinearLayer `weight:"o_proj"`
-	QNorm *nn.RMSNorm    `weight:"q_norm"`
-	KNorm *nn.RMSNorm    `weight:"k_norm"`
-	// Computed fields
-	NHeads    int32
-	NKVHeads  int32
-	HeadDim   int32
-	Scale     float32
-	RopeTheta float32
-}
-
-// applyRoPEQwen3 applies the custom RoPE for Qwen3 text encoder
-func applyRoPEQwen3(x *mlx.Array, seqLen int32, theta float32) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	L := shape[1]
-	H := shape[2]
-	D := shape[3]
-	half := D / 2
-
-	freqsArr := make([]float32, half)
-	logTheta := float32(math.Log(float64(theta)))
-	for i := int32(0); i < half; i++ {
-		freqsArr[i] = float32(math.Exp(float64(-logTheta * float32(i) / float32(half))))
-	}
-	freqs := mlx.NewArray(freqsArr, []int32{half})
-
-	posArr := make([]float32, seqLen)
-	for i := int32(0); i < seqLen; i++ {
-		posArr[i] = float32(i)
-	}
-	pos := mlx.NewArray(posArr, []int32{seqLen})
-
-	posExpanded := mlx.Reshape(pos, seqLen, 1)
-	freqsExpanded := mlx.Reshape(freqs, 1, half)
-	args := mlx.Mul(posExpanded, freqsExpanded)
-
-	cosVals := mlx.Cos(args)
-	sinVals := mlx.Sin(args)
-	cosVals = mlx.Reshape(cosVals, seqLen, 1, half)
-	sinVals = mlx.Reshape(sinVals, seqLen, 1, half)
-
-	x1 := mlx.Slice(x, []int32{0, 0, 0, 0}, []int32{B, L, H, half})
-	x2 := mlx.Slice(x, []int32{0, 0, 0, half}, []int32{B, L, H, D})
-
-	part1 := mlx.Sub(mlx.Mul(x1, cosVals), mlx.Mul(x2, sinVals))
-	part2 := mlx.Add(mlx.Mul(x1, sinVals), mlx.Mul(x2, cosVals))
-
-	return mlx.Concatenate([]*mlx.Array{part1, part2}, 3)
-}
-
-// Forward computes attention with causal masking and optional padding mask
-func (attn *Attention) Forward(x *mlx.Array, mask *mlx.Array, maskMode string) *mlx.Array {
-	shape := x.Shape()
-	B := shape[0]
-	L := shape[1]
-
-	q := attn.QProj.Forward(x)
-	k := attn.KProj.Forward(x)
-	v := attn.VProj.Forward(x)
-
-	q = mlx.Reshape(q, B, L, attn.NHeads, attn.HeadDim)
-	k = mlx.Reshape(k, B, L, attn.NKVHeads, attn.HeadDim)
-	v = mlx.Reshape(v, B, L, attn.NKVHeads, attn.HeadDim)
-
-	// QK norm uses 1e-6 hardcoded (Qwen3 specific)
-	q = attn.QNorm.Forward(q, 1e-6)
-	k = attn.KNorm.Forward(k, 1e-6)
-
-	q = applyRoPEQwen3(q, L, attn.RopeTheta)
-	k = applyRoPEQwen3(k, L, attn.RopeTheta)
-
-	q = mlx.Transpose(q, 0, 2, 1, 3)
-	k = mlx.Transpose(k, 0, 2, 1, 3)
-	v = mlx.Transpose(v, 0, 2, 1, 3)
-
-	if attn.NKVHeads < attn.NHeads {
-		repeats := attn.NHeads / attn.NKVHeads
-		k = repeatKV(k, repeats)
-		v = repeatKV(v, repeats)
-	}
-
-	out := mlx.ScaledDotProductAttentionWithSinks(q, k, v, attn.Scale, maskMode, mask, nil)
-
-	out = mlx.Transpose(out, 0, 2, 1, 3)
-	out = mlx.Reshape(out, B, L, attn.NHeads*attn.HeadDim)
-
-	out = attn.OProj.Forward(out)
-
-	return out
-}
-
-// repeatKV repeats key/value heads for GQA
-func repeatKV(x *mlx.Array, repeats int32) *mlx.Array {
-	if repeats == 1 {
-		return x
-	}
-	shape := x.Shape()
-	x = mlx.ExpandDims(x, 2)
-	x = mlx.Tile(x, []int32{1, 1, repeats, 1, 1})
-	return mlx.Reshape(x, shape[0], shape[1]*repeats, shape[2], shape[3])
-}
-
-// MLP implements Qwen3 SwiGLU MLP
-type MLP struct {
-	GateProj nn.LinearLayer `weight:"gate_proj"`
-	UpProj   nn.LinearLayer `weight:"up_proj"`
-	DownProj nn.LinearLayer `weight:"down_proj"`
-}
-
-// Forward applies the MLP
-func (m *MLP) Forward(x *mlx.Array) *mlx.Array {
-	gate := m.GateProj.Forward(x)
-	gate = mlx.SiLU(gate)
-	up := m.UpProj.Forward(x)
-	h := mlx.Mul(gate, up)
-	return m.DownProj.Forward(h)
-}
-
-// Block represents a single Qwen3 transformer block
-type Block struct {
-	Attention         *Attention  `weight:"self_attn"`
-	MLP               *MLP        `weight:"mlp"`
-	InputLayerNorm    *nn.RMSNorm `weight:"input_layernorm"`
-	PostAttnLayerNorm *nn.RMSNorm `weight:"post_attention_layernorm"`
-}
-
-// Forward applies the Qwen3 block
-func (qb *Block) Forward(x *mlx.Array, eps float32, mask *mlx.Array, maskMode string) *mlx.Array {
-	h := qb.InputLayerNorm.Forward(x, eps)
-	attnOut := qb.Attention.Forward(h, mask, maskMode)
-	x = mlx.Add(x, attnOut)
-
-	h = qb.PostAttnLayerNorm.Forward(x, eps)
-	mlpOut := qb.MLP.Forward(h)
-	x = mlx.Add(x, mlpOut)
-
-	return x
-}
-
-// TextEncoder is the full Qwen3 encoder
-type TextEncoder struct {
-	EmbedTokens *nn.Embedding `weight:"model.embed_tokens"`
-	Layers      []*Block      `weight:"model.layers"`
-	FinalNorm   *nn.RMSNorm   `weight:"model.norm"`
-	*Config
-}
-
-// Load loads the Qwen3 text encoder from ollama blob storage.
-func (m *TextEncoder) Load(manifest *imagegen.ModelManifest, configPath string) error {
-	fmt.Print("  Loading text encoder... ")
-
-	// Load config from blob
-	var cfg Config
-	if err := manifest.ReadConfigJSON(configPath, &cfg); err != nil {
-		return fmt.Errorf("config: %w", err)
-	}
-	m.Config = &cfg
-	m.Layers = make([]*Block, cfg.NumHiddenLayers)
-
-	// Load weights from tensor blobs
-	weights, err := imagegen.LoadWeightsFromManifest(manifest, "text_encoder")
-	if err != nil {
-		return fmt.Errorf("weights: %w", err)
-	}
-	if err := weights.Load(0); err != nil {
-		return fmt.Errorf("load weights: %w", err)
-	}
-	defer weights.ReleaseAll()
-
-	return m.loadWeights(weights)
-}
-
-// loadWeights loads weights from any WeightSource into the model
-func (m *TextEncoder) loadWeights(weights safetensors.WeightSource) error {
-	if err := safetensors.LoadModule(m, weights, ""); err != nil {
-		return fmt.Errorf("load module: %w", err)
-	}
-	m.initComputedFields()
-	fmt.Println("✓")
-	return nil
-}
-
-// initComputedFields initializes computed fields after loading weights
-func (m *TextEncoder) initComputedFields() {
-	cfg := m.Config
-	m.FinalNorm.Eps = cfg.RMSNormEps
-	for _, block := range m.Layers {
-		// Attention
-		block.Attention.NHeads = cfg.NumAttentionHeads
-		block.Attention.NKVHeads = cfg.NumKeyValueHeads
-		block.Attention.HeadDim = cfg.HeadDim
-		block.Attention.Scale = float32(1.0 / math.Sqrt(float64(cfg.HeadDim)))
-		block.Attention.RopeTheta = cfg.RopeTheta
-		block.Attention.QNorm.Eps = cfg.RMSNormEps
-		block.Attention.KNorm.Eps = cfg.RMSNormEps
-		// Block norms
-		block.InputLayerNorm.Eps = cfg.RMSNormEps
-		block.PostAttnLayerNorm.Eps = cfg.RMSNormEps
-	}
-}
-
-// Forward encodes text tokens with provided attention mask (LxL) and mask mode.
-func (te *TextEncoder) Forward(tokens *mlx.Array, attnMask *mlx.Array, maskMode string) *mlx.Array {
-	h := te.EmbedTokens.Forward(tokens)
-	eps := te.RMSNormEps
-
-	for _, layer := range te.Layers {
-		h = layer.Forward(h, eps, attnMask, maskMode)
-	}
-
-	// Apply final RMS norm
-	h = te.FinalNorm.Forward(h, eps)
-
-	return h
-}
-
-// ForwardWithLayerOutputs encodes text tokens and returns hidden states from specified layers.
-// This is used by Flux2 which needs embeddings from specific intermediate layers.
-func (te *TextEncoder) ForwardWithLayerOutputs(tokens *mlx.Array, layerIndices []int, attnMask *mlx.Array, maskMode string) []*mlx.Array {
-	h := te.EmbedTokens.Forward(tokens)
-	eps := te.RMSNormEps
-
-	outputs := make([]*mlx.Array, len(layerIndices))
-	layerSet := make(map[int]int)
-	for i, idx := range layerIndices {
-		layerSet[idx] = i
-	}
-
-	for i, layer := range te.Layers {
-		h = layer.Forward(h, eps, attnMask, maskMode)
-		if outIdx, ok := layerSet[i]; ok {
-			outputs[outIdx] = h
-		}
-	}
-
-	return outputs
-}
-
-// ApplyChatTemplate wraps prompt in Qwen3 chat format.
-// If think is true, adds the <think></think> block after the assistant tag
-// (matches tokenizer.apply_chat_template with enable_thinking=False in Python).
-func ApplyChatTemplate(prompt string, think bool) string {
-	base := "<|im_start|>user\n" + prompt + "<|im_end|>\n<|im_start|>assistant\n"
-	if think {
-		return base + "<think>\n\n</think>\n\n"
-	}
-	return base
-}
-
-// EncodePrompt encodes a text prompt using the tokenizer and encoder.
-// If think is true, includes the <think></think> block in the chat template.
-func (te *TextEncoder) EncodePrompt(tok *tokenizer.Tokenizer, prompt string, maxLen int, think bool) (*mlx.Array, *mlx.Array) {
-	formattedPrompt := ApplyChatTemplate(prompt, think)
-
-	tokens := tok.Encode(formattedPrompt, false)
-
-	if len(tokens) > maxLen {
-		tokens = tokens[:maxLen]
-	}
-
-	maskData := make([]float32, maxLen)
-	for i := 0; i < len(tokens); i++ {
-		maskData[i] = 1.0
-	}
-
-	// Get PAD token (different from EOS for Qwen3)
-	padToken := tok.PAD()
-	if padToken < 0 {
-		padToken = tok.EOS() // fallback
-	}
-
-	paddedTokens := make([]int32, maxLen)
-	copy(paddedTokens, tokens)
-	for i := len(tokens); i < maxLen; i++ {
-		paddedTokens[i] = padToken
-	}
-
-	tokensArr := mlx.NewArrayInt32(paddedTokens, []int32{1, int32(maxLen)})
-	maskArr := mlx.NewArray(maskData, []int32{1, int32(maxLen)})
-
-	// Build combined causal + PAD mask [L, L]
-	// mask[i,j] = 0 if (j <= i AND valid[j]) else -inf
-	L := int32(maxLen)
-	validLen := int32(len(tokens))
-	combinedMaskData := make([]float32, L*L)
-	negInf := float32(-1e9)
-	for i := int32(0); i < L; i++ {
-		for j := int32(0); j < L; j++ {
-			idx := i*L + j
-			if j <= i && j < validLen {
-				combinedMaskData[idx] = 0
-			} else {
-				combinedMaskData[idx] = negInf
-			}
-		}
-	}
-	maskMat := mlx.NewArray(combinedMaskData, []int32{L, L})
-
-	embeddings := te.Forward(tokensArr, maskMat, "")
-
-	return embeddings, maskArr
-}
-
-// EncodePromptWithLayers encodes a text prompt and returns embeddings from specified layers.
-// Used by Flux2 which concatenates embeddings from multiple intermediate layers.
-// If think is true, includes the <think></think> block in the chat template.
-// Returns embeddings and padded sequence length.
-func (te *TextEncoder) EncodePromptWithLayers(tok *tokenizer.Tokenizer, prompt string, maxLen int, layerIndices []int, think bool) (*mlx.Array, int32) {
-	formattedPrompt := ApplyChatTemplate(prompt, think)
-	tokens := tok.Encode(formattedPrompt, false)
-
-	if len(tokens) > maxLen {
-		tokens = tokens[:maxLen]
-	}
-
-	// Pad to maxLen
-	padToken := tok.PAD()
-	if padToken < 0 {
-		padToken = tok.EOS() // fallback
-	}
-	padded := make([]int32, maxLen)
-	copy(padded, tokens)
-	for i := len(tokens); i < maxLen; i++ {
-		padded[i] = padToken
-	}
-	tokensArr := mlx.NewArrayInt32(padded, []int32{1, int32(maxLen)})
-
-	// Build combined causal + PAD mask [L, L]
-	// mask[i,j] = 0 if (j <= i AND valid[j]) else -inf
-	// This combines causal masking with PAD token masking
-	L := int32(maxLen)
-	validLen := int32(len(tokens))
-	maskData := make([]float32, L*L)
-	negInf := float32(-1e9)
-	for i := int32(0); i < L; i++ {
-		for j := int32(0); j < L; j++ {
-			idx := i*L + j
-			if j <= i && j < validLen {
-				maskData[idx] = 0 // allowed: causal OK and not PAD
-			} else {
-				maskData[idx] = negInf // blocked: future or PAD
-			}
-		}
-	}
-	maskMat := mlx.NewArray(maskData, []int32{L, L})
-
-	layerOutputs := te.ForwardWithLayerOutputs(tokensArr, layerIndices, maskMat, "")
-
-	// Concatenate layer outputs along the hidden dimension
-	// Each output is [B, L, hidden_dim], result is [B, L, num_layers * hidden_dim]
-	embeddings := mlx.Concatenate(layerOutputs, 2)
-
-	// Return embeddings and padded length
-	return embeddings, int32(maxLen)
-}
--- a/x/imagegen/models/qwen_image/pipeline_test.go
+++ b/x/imagegen/models/qwen_image/pipeline_test.go
@@ -0,0 +1,87 @@
+//go:build mlx
+
+package qwen_image
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	"github.com/ollama/ollama/x/imagegen/mlx"
+)
+
+// TestMain initializes MLX before running tests.
+// If MLX libraries are not available, tests are skipped.
+func TestMain(m *testing.M) {
+	// Change to repo root so ./build/lib/ollama/ path works
+	_, thisFile, _, _ := runtime.Caller(0)
+	repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "..")
+	if err := os.Chdir(repoRoot); err != nil {
+		fmt.Printf("Failed to change to repo root: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := mlx.InitMLX(); err != nil {
+		fmt.Printf("Skipping qwen_image tests: %v\n", err)
+		os.Exit(0)
+	}
+	os.Exit(m.Run())
+}
+
+// TestPipelineOutput runs the full pipeline (integration test).
+// Skips if model weights not found. Requires ~50GB VRAM.
+func TestPipelineOutput(t *testing.T) {
+	modelPath := "../../../weights/Qwen-Image-2512"
+	if _, err := os.Stat(modelPath); os.IsNotExist(err) {
+		t.Skip("Skipping: model weights not found at " + modelPath)
+	}
+
+	// Load model
+	pm, err := LoadPersistent(modelPath)
+	if err != nil {
+		t.Skipf("Skipping: failed to load model: %v", err)
+	}
+
+	// Run 2-step pipeline (minimum for stable scheduler)
+	cfg := &GenerateConfig{
+		Prompt: "a cat",
+		Width:  256,
+		Height: 256,
+		Steps:  2,
+		Seed:   42,
+	}
+
+	output, err := pm.GenerateFromConfig(cfg)
+	if err != nil {
+		t.Fatalf("Pipeline failed: %v", err)
+	}
+	mlx.Eval(output)
+
+	// Verify output shape [1, C, H, W]
+	shape := output.Shape()
+	if len(shape) != 4 {
+		t.Errorf("Expected 4D output, got %v", shape)
+	}
+	if shape[0] != 1 || shape[1] != 3 || shape[2] != cfg.Height || shape[3] != cfg.Width {
+		t.Errorf("Shape mismatch: got %v, expected [1, 3, %d, %d]", shape, cfg.Height, cfg.Width)
+	}
+
+	// Verify values in expected range [0, 1]
+	data := output.Data()
+	minVal, maxVal := float32(1.0), float32(0.0)
+	for _, v := range data {
+		if v < minVal {
+			minVal = v
+		}
+		if v > maxVal {
+			maxVal = v
+		}
+	}
+	t.Logf("Output range: [%.4f, %.4f]", minVal, maxVal)
+
+	if minVal < -0.1 || maxVal > 1.1 {
+		t.Errorf("Output values out of range: [%.4f, %.4f]", minVal, maxVal)
+	}
+}
--- a/x/imagegen/models/qwen_image/qwen25vl.go
+++ b/x/imagegen/models/qwen_image/qwen25vl.go
--- a/x/imagegen/models/qwen_image/qwen_image.go
+++ b/x/imagegen/models/qwen_image/qwen_image.go
@@ -0,0 +1,370 @@
+//go:build mlx
+
+// Package qwen_image implements the Qwen-Image diffusion transformer model.
+package qwen_image
+
+import (
+	"context"
+	"fmt"
+	"path/filepath"
+	"time"
+
+	"github.com/ollama/ollama/x/imagegen/cache"
+	"github.com/ollama/ollama/x/imagegen/mlx"
+	"github.com/ollama/ollama/x/imagegen/tokenizer"
+)
+
+// GenerateConfig holds all options for image generation.
+type GenerateConfig struct {
+	Prompt         string
+	NegativePrompt string       // Empty = no CFG
+	CFGScale       float32      // Only used if NegativePrompt is set (default: 4.0)
+	Width          int32        // Image width (default: 1024)
+	Height         int32        // Image height (default: 1024)
+	Steps          int          // Denoising steps (default: 30)
+	Seed           int64        // Random seed
+	Progress       ProgressFunc // Optional progress callback
+
+	// Layer caching (DeepCache/Learning-to-Cache speedup)
+	LayerCache    bool // Enable layer caching (default: false)
+	CacheInterval int  // Refresh cache every N steps (default: 3)
+	CacheLayers   int  // Number of shallow layers to cache (default: 25)
+}
+
+// ProgressFunc is called during generation with step progress.
+type ProgressFunc func(step, totalSteps int)
+
+// Model represents a Qwen-Image diffusion model.
+type Model struct {
+	ModelPath   string
+	Tokenizer   *tokenizer.Tokenizer
+	TextEncoder *Qwen25VL
+	Transformer *Transformer
+	VAEDecoder  *VAEDecoder
+}
+
+// Load loads the Qwen-Image model from a directory.
+func (m *Model) Load(modelPath string) error {
+	fmt.Println("Loading Qwen-Image model...")
+	start := time.Now()
+
+	if mlx.GPUIsAvailable() {
+		mlx.SetDefaultDeviceGPU()
+		mlx.EnableCompile()
+	}
+
+	m.ModelPath = modelPath
+
+	// Load tokenizer
+	fmt.Print("  Loading tokenizer... ")
+	tokenizerPath := filepath.Join(modelPath, "tokenizer")
+	tok, err := tokenizer.Load(tokenizerPath)
+	if err != nil {
+		return fmt.Errorf("tokenizer: %w", err)
+	}
+	m.Tokenizer = tok
+	fmt.Println("✓")
+
+	// Load text encoder (Qwen2.5-VL in text-only mode - skip vision tower for efficiency)
+	m.TextEncoder = &Qwen25VL{}
+	if err := m.TextEncoder.LoadTextOnly(filepath.Join(modelPath, "text_encoder")); err != nil {
+		return fmt.Errorf("text encoder: %w", err)
+	}
+	mlx.Eval(mlx.Collect(m.TextEncoder)...)
+	fmt.Printf("  (%.1f GB, peak %.1f GB)\n",
+		float64(mlx.MetalGetActiveMemory())/(1024*1024*1024),
+		float64(mlx.MetalGetPeakMemory())/(1024*1024*1024))
+
+	// Load transformer
+	m.Transformer = &Transformer{}
+	if err := m.Transformer.Load(filepath.Join(modelPath, "transformer")); err != nil {
+		return fmt.Errorf("transformer: %w", err)
+	}
+	mlx.Eval(mlx.Collect(m.Transformer)...)
+	fmt.Printf("  (%.1f GB, peak %.1f GB)\n",
+		float64(mlx.MetalGetActiveMemory())/(1024*1024*1024),
+		float64(mlx.MetalGetPeakMemory())/(1024*1024*1024))
+
+	// Load VAE decoder
+	m.VAEDecoder = &VAEDecoder{}
+	if err := m.VAEDecoder.Load(filepath.Join(modelPath, "vae")); err != nil {
+		return fmt.Errorf("VAE decoder: %w", err)
+	}
+	mlx.Eval(mlx.Collect(m.VAEDecoder)...)
+	fmt.Printf("  (%.1f GB, peak %.1f GB)\n",
+		float64(mlx.MetalGetActiveMemory())/(1024*1024*1024),
+		float64(mlx.MetalGetPeakMemory())/(1024*1024*1024))
+
+	mem := mlx.MetalGetActiveMemory()
+	peak := mlx.MetalGetPeakMemory()
+	fmt.Printf("  Loaded in %.2fs (%.1f GB active, %.1f GB peak)\n",
+		time.Since(start).Seconds(),
+		float64(mem)/(1024*1024*1024),
+		float64(peak)/(1024*1024*1024))
+
+	return nil
+}
+
+// Generate creates an image from a prompt.
+func (m *Model) Generate(prompt string, width, height int32, steps int, seed int64) (*mlx.Array, error) {
+	return m.GenerateFromConfig(&GenerateConfig{
+		Prompt: prompt,
+		Width:  width,
+		Height: height,
+		Steps:  steps,
+		Seed:   seed,
+	})
+}
+
+// GenerateWithProgress creates an image with progress callback.
+func (m *Model) GenerateWithProgress(prompt string, width, height int32, steps int, seed int64, progress ProgressFunc) (*mlx.Array, error) {
+	return m.GenerateFromConfig(&GenerateConfig{
+		Prompt:   prompt,
+		Width:    width,
+		Height:   height,
+		Steps:    steps,
+		Seed:     seed,
+		Progress: progress,
+	})
+}
+
+// GenerateWithCFG creates an image with classifier-free guidance.
+func (m *Model) GenerateWithCFG(prompt, negativePrompt string, width, height int32, steps int, seed int64, cfgScale float32, progress ProgressFunc) (*mlx.Array, error) {
+	return m.GenerateFromConfig(&GenerateConfig{
+		Prompt:         prompt,
+		NegativePrompt: negativePrompt,
+		CFGScale:       cfgScale,
+		Width:          width,
+		Height:         height,
+		Steps:          steps,
+		Seed:           seed,
+		Progress:       progress,
+	})
+}
+
+// GenerateFromConfig generates an image using the unified config struct.
+func (m *Model) GenerateFromConfig(cfg *GenerateConfig) (*mlx.Array, error) {
+	start := time.Now()
+	result, err := m.generate(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if cfg.NegativePrompt != "" {
+		fmt.Printf("Generated with CFG (scale=%.1f) in %.2fs (%d steps)\n", cfg.CFGScale, time.Since(start).Seconds(), cfg.Steps)
+	} else {
+		fmt.Printf("Generated in %.2fs (%d steps)\n", time.Since(start).Seconds(), cfg.Steps)
+	}
+	return result, nil
+}
+
+// GenerateImage implements model.ImageModel interface.
+func (m *Model) GenerateImage(ctx context.Context, prompt string, width, height int32, steps int, seed int64) (*mlx.Array, error) {
+	return m.Generate(prompt, width, height, steps, seed)
+}
+
+// generate is the internal denoising pipeline.
+func (m *Model) generate(cfg *GenerateConfig) (*mlx.Array, error) {
+	// Apply defaults
+	if cfg.Width <= 0 {
+		cfg.Width = 1024
+	}
+	if cfg.Height <= 0 {
+		cfg.Height = 1024
+	}
+	if cfg.Steps <= 0 {
+		cfg.Steps = 50
+	}
+	if cfg.CFGScale <= 0 {
+		cfg.CFGScale = 4.0
+	}
+	if cfg.CacheInterval <= 0 {
+		cfg.CacheInterval = 3
+	}
+	if cfg.CacheLayers <= 0 {
+		cfg.CacheLayers = 25 // ~42% of 60 layers (similar ratio to Z-Image's 15/38)
+	}
+
+	useCFG := cfg.NegativePrompt != ""
+	tcfg := m.Transformer.Config
+	latentH := cfg.Height / 8
+	latentW := cfg.Width / 8
+	pH := latentH / tcfg.PatchSize
+	pW := latentW / tcfg.PatchSize
+	imgSeqLen := pH * pW
+
+	// Text encoding
+	var posEmb, negEmb *mlx.Array
+	{
+		posEmb = m.TextEncoder.EncodePrompt(m.Tokenizer, cfg.Prompt)
+		if useCFG {
+			negEmb = m.TextEncoder.EncodePrompt(m.Tokenizer, cfg.NegativePrompt)
+			mlx.Keep(posEmb, negEmb)
+			mlx.Eval(posEmb, negEmb)
+		} else {
+			mlx.Keep(posEmb)
+			mlx.Eval(posEmb)
+		}
+	}
+
+	// Pad sequences to same length for CFG
+	txtLen := posEmb.Shape()[1]
+	if useCFG {
+		negLen := negEmb.Shape()[1]
+		if negLen > txtLen {
+			txtLen = negLen
+		}
+		if posEmb.Shape()[1] < txtLen {
+			posEmb = padSequence(posEmb, txtLen)
+		}
+		if negEmb.Shape()[1] < txtLen {
+			negEmb = padSequence(negEmb, txtLen)
+		}
+		mlx.Keep(posEmb, negEmb)
+	}
+
+	// Pre-compute batched embeddings for CFG (single forward pass optimization)
+	var batchedEmb *mlx.Array
+	if useCFG {
+		batchedEmb = mlx.Concatenate([]*mlx.Array{posEmb, negEmb}, 0)
+		mlx.Keep(batchedEmb)
+		mlx.Eval(batchedEmb)
+	}
+
+	// Scheduler
+	scheduler := NewFlowMatchScheduler(DefaultSchedulerConfig())
+	scheduler.SetTimesteps(cfg.Steps, imgSeqLen)
+
+	// Init latents [B, C, T, H, W]
+	var latents *mlx.Array
+	{
+		latents = scheduler.InitNoise([]int32{1, tcfg.OutChannels, 1, latentH, latentW}, cfg.Seed)
+		mlx.Eval(latents)
+	}
+
+	// RoPE cache
+	var ropeCache *RoPECache
+	{
+		ropeCache = PrepareRoPE(pH, pW, txtLen, tcfg.AxesDimsRope)
+		mlx.Keep(ropeCache.ImgFreqs, ropeCache.TxtFreqs)
+		mlx.Eval(ropeCache.ImgFreqs)
+	}
+
+	// Layer cache for DeepCache/Learning-to-Cache speedup
+	var stepCache *cache.StepCache
+	if cfg.LayerCache {
+		stepCache = cache.NewStepCache(cfg.CacheLayers)
+		fmt.Printf("  Layer caching: %d layers, refresh every %d steps\n", cfg.CacheLayers, cfg.CacheInterval)
+	}
+
+	// Denoising loop
+	for i := 0; i < cfg.Steps; i++ {
+		stepStart := time.Now()
+		if cfg.Progress != nil {
+			cfg.Progress(i+1, cfg.Steps)
+		}
+
+		t := scheduler.Timesteps[i]
+		timestep := mlx.ToBFloat16(mlx.NewArray([]float32{t}, []int32{1}))
+
+		// Squeeze temporal dim: [B, C, T, H, W] -> [B, C, H, W]
+		latents2D := mlx.Squeeze(latents, 2)
+		patches := PackLatents(latents2D, tcfg.PatchSize)
+
+		var output *mlx.Array
+		if useCFG {
+			// CFG Batching: single forward pass with batch=2
+			// Note: layer caching with CFG is not supported yet (would need 2 caches)
+			batchedPatches := mlx.Tile(patches, []int32{2, 1, 1})
+			batchedTimestep := mlx.Tile(timestep, []int32{2})
+
+			// Single batched forward pass
+			batchedOutput := m.Transformer.Forward(batchedPatches, batchedEmb, batchedTimestep, ropeCache.ImgFreqs, ropeCache.TxtFreqs)
+
+			// Split output: [2, L, D] -> pos [1, L, D], neg [1, L, D]
+			L := batchedOutput.Shape()[1]
+			D := batchedOutput.Shape()[2]
+			posOutput := mlx.Slice(batchedOutput, []int32{0, 0, 0}, []int32{1, L, D})
+			negOutput := mlx.Slice(batchedOutput, []int32{1, 0, 0}, []int32{2, L, D})
+
+			diff := mlx.Sub(posOutput, negOutput)
+			scaledDiff := mlx.MulScalar(diff, cfg.CFGScale)
+			combPred := mlx.Add(negOutput, scaledDiff)
+
+			// Norm rescaling: rescale combined prediction to match conditional prediction's norm
+			condNorm := mlx.Sqrt(mlx.Sum(mlx.Square(posOutput), -1, true))
+			combNorm := mlx.Sqrt(mlx.Sum(mlx.Square(combPred), -1, true))
+			output = mlx.Mul(combPred, mlx.Div(condNorm, combNorm))
+		} else if stepCache != nil {
+			output = m.Transformer.ForwardWithCache(patches, posEmb, timestep, ropeCache.ImgFreqs, ropeCache.TxtFreqs,
+				stepCache, i, cfg.CacheInterval, cfg.CacheLayers)
+		} else {
+			output = m.Transformer.Forward(patches, posEmb, timestep, ropeCache.ImgFreqs, ropeCache.TxtFreqs)
+		}
+
+		noisePred := UnpackLatents(output, latentH, latentW, tcfg.PatchSize)
+		oldLatents := latents
+		latents = scheduler.Step(noisePred, latents, i)
+
+		// Keep cached arrays alive across cleanup
+		if stepCache != nil {
+			mlx.Keep(stepCache.Arrays()...)
+		}
+		mlx.Eval(latents)
+		oldLatents.Free()
+
+		activeMem := float64(mlx.MetalGetActiveMemory()) / (1024 * 1024 * 1024)
+		peakMem := float64(mlx.MetalGetPeakMemory()) / (1024 * 1024 * 1024)
+		fmt.Printf("  Step %d/%d: t=%.4f (%.2fs) [%.1f GB active, %.1f GB peak]\n", i+1, cfg.Steps, t, time.Since(stepStart).Seconds(), activeMem, peakMem)
+	}
+
+	// Free denoising temporaries before VAE decode
+	posEmb.Free()
+	if negEmb != nil {
+		negEmb.Free()
+	}
+	if batchedEmb != nil {
+		batchedEmb.Free()
+	}
+	ropeCache.ImgFreqs.Free()
+	ropeCache.TxtFreqs.Free()
+	if stepCache != nil {
+		stepCache.Free()
+	}
+
+	// VAE decode (Decode manages its own pools for staged memory)
+	decoded := m.VAEDecoder.Decode(latents)
+	latents.Free()
+	// Post-process: squeeze temporal dim and rescale to [0, 1]
+	{
+		decoded = mlx.Squeeze(decoded, 2)
+		decoded = mlx.AddScalar(decoded, 1.0)
+		decoded = mlx.DivScalar(decoded, 2.0)
+		mlx.Eval(decoded)
+	}
+
+	fmt.Printf("  Peak memory: %.2f GB\n", float64(mlx.MetalGetPeakMemory())/(1024*1024*1024))
+
+	return decoded, nil
+}
+
+// padSequence pads a sequence tensor to the target length with zeros
+func padSequence(x *mlx.Array, targetLen int32) *mlx.Array {
+	shape := x.Shape()
+	currentLen := shape[1]
+	if currentLen >= targetLen {
+		return x
+	}
+	padLen := targetLen - currentLen
+	// Pad on sequence dimension (axis 1)
+	return mlx.Pad(x, []int32{0, 0, 0, padLen, 0, 0})
+}
+
+// LoadPersistent is an alias for backward compatibility.
+// Use m := &Model{}; m.Load(path) instead.
+func LoadPersistent(modelPath string) (*Model, error) {
+	m := &Model{}
+	if err := m.Load(modelPath); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
--- a/x/imagegen/models/qwen_image/scheduler.go
+++ b/x/imagegen/models/qwen_image/scheduler.go
@@ -0,0 +1,218 @@
+//go:build mlx
+
+package qwen_image
+
+import (
+	"math"
+
+	"github.com/ollama/ollama/x/imagegen/mlx"
+)
+
+// SchedulerConfig holds FlowMatchEulerDiscreteScheduler configuration
+type SchedulerConfig struct {
+	NumTrainTimesteps int32   `json:"num_train_timesteps"` // 1000
+	BaseShift         float32 `json:"base_shift"`          // 0.5
+	MaxShift          float32 `json:"max_shift"`           // 0.9
+	BaseImageSeqLen   int32   `json:"base_image_seq_len"`  // 256
+	MaxImageSeqLen    int32   `json:"max_image_seq_len"`   // 8192
+	ShiftTerminal     float32 `json:"shift_terminal"`      // 0.02
+	UseDynamicShift   bool    `json:"use_dynamic_shifting"` // true
+}
+
+// DefaultSchedulerConfig returns config for FlowMatchEulerDiscreteScheduler
+func DefaultSchedulerConfig() *SchedulerConfig {
+	return &SchedulerConfig{
+		NumTrainTimesteps: 1000,
+		BaseShift:         0.5,
+		MaxShift:          0.9, // Matches scheduler_config.json
+		BaseImageSeqLen:   256,
+		MaxImageSeqLen:    8192,
+		ShiftTerminal:     0.02,
+		UseDynamicShift:   true,
+	}
+}
+
+// FlowMatchScheduler implements the Flow Match Euler discrete scheduler
+type FlowMatchScheduler struct {
+	Config    *SchedulerConfig
+	Timesteps []float32
+	Sigmas    []float32
+	NumSteps  int
+}
+
+// NewFlowMatchScheduler creates a new scheduler
+func NewFlowMatchScheduler(cfg *SchedulerConfig) *FlowMatchScheduler {
+	return &FlowMatchScheduler{
+		Config: cfg,
+	}
+}
+
+// CalculateShift computes the dynamic shift based on image sequence length
+// This matches Python's calculate_shift function
+func CalculateShift(imageSeqLen int32, baseSeqLen int32, maxSeqLen int32, baseShift float32, maxShift float32) float32 {
+	m := (maxShift - baseShift) / float32(maxSeqLen-baseSeqLen)
+	b := baseShift - m*float32(baseSeqLen)
+	mu := float32(imageSeqLen)*m + b
+	return mu
+}
+
+// SetTimesteps sets up the scheduler for the given number of inference steps
+// Matches Python diffusers FlowMatchEulerDiscreteScheduler behavior:
+// 1. Create sigmas from sigma_max to sigma_min (linspace)
+// 2. Apply time_shift with mu (if dynamic shifting)
+// 3. Apply stretch_shift_to_terminal to make final value = shift_terminal
+func (s *FlowMatchScheduler) SetTimesteps(numSteps int, imageSeqLen int32) {
+	s.NumSteps = numSteps
+
+	// Calculate mu for dynamic shifting
+	var mu float32
+	if s.Config.UseDynamicShift {
+		mu = CalculateShift(
+			imageSeqLen,
+			s.Config.BaseImageSeqLen,
+			s.Config.MaxImageSeqLen,
+			s.Config.BaseShift,
+			s.Config.MaxShift,
+		)
+	}
+
+	// Step 1: Create sigmas from 1.0 to 1/num_steps
+	// Python (pipeline_qwenimage.py:639):
+	//   sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
+	// This gives sigmas from 1.0 to 1/30 = 0.033 for 30 steps
+	sigmas := make([]float32, numSteps)
+	sigmaMax := float32(1.0)
+	sigmaMin := 1.0 / float32(numSteps) // 1/30 = 0.033 for 30 steps
+	if numSteps == 1 {
+		sigmas[0] = sigmaMax
+	} else {
+		for i := 0; i < numSteps; i++ {
+			sigmas[i] = sigmaMax + float32(i)*(sigmaMin-sigmaMax)/float32(numSteps-1)
+		}
+	}
+
+	// Step 2: Apply time shift if using dynamic shifting
+	if s.Config.UseDynamicShift && mu != 0 {
+		for i := range sigmas {
+			sigmas[i] = s.timeShift(mu, sigmas[i])
+		}
+	}
+
+	// Step 3: Apply stretch_shift_to_terminal
+	if s.Config.ShiftTerminal > 0 {
+		sigmas = s.stretchShiftToTerminal(sigmas)
+	}
+
+	// Step 4: Append terminal sigma (0) and store
+	// Note: Python's scheduler.timesteps are sigmas*1000, but the pipeline divides by 1000
+	// before passing to transformer. We skip both steps and just use sigmas directly.
+	s.Sigmas = make([]float32, numSteps+1)
+	s.Timesteps = make([]float32, numSteps+1)
+	for i := 0; i < numSteps; i++ {
+		s.Sigmas[i] = sigmas[i]
+		s.Timesteps[i] = sigmas[i]
+	}
+	s.Sigmas[numSteps] = 0.0
+	s.Timesteps[numSteps] = 0.0
+}
+
+// stretchShiftToTerminal stretches and shifts the timestep schedule
+// so the final value equals shift_terminal (matches Python behavior)
+func (s *FlowMatchScheduler) stretchShiftToTerminal(sigmas []float32) []float32 {
+	if len(sigmas) == 0 {
+		return sigmas
+	}
+
+	// one_minus_z = 1 - t
+	// scale_factor = one_minus_z[-1] / (1 - shift_terminal)
+	// stretched_t = 1 - (one_minus_z / scale_factor)
+	lastSigma := sigmas[len(sigmas)-1]
+	scaleFactor := (1.0 - lastSigma) / (1.0 - s.Config.ShiftTerminal)
+
+	// Handle edge case: if scaleFactor is 0 or near 0, skip stretch
+	// This happens when lastSigma ≈ 1.0 (e.g., single step with timeshift)
+	if scaleFactor < 1e-6 {
+		return sigmas
+	}
+
+	result := make([]float32, len(sigmas))
+	for i, t := range sigmas {
+		oneMinusZ := 1.0 - t
+		result[i] = 1.0 - (oneMinusZ / scaleFactor)
+	}
+	return result
+}
+
+// timeShift applies the dynamic time shift (exponential)
+// exp(mu) / (exp(mu) + (1/t - 1))
+func (s *FlowMatchScheduler) timeShift(mu float32, t float32) float32 {
+	if t <= 0 {
+		return 0
+	}
+	expMu := float32(math.Exp(float64(mu)))
+	return expMu / (expMu + (1.0/t - 1.0))
+}
+
+// Step performs one denoising step
+// modelOutput: predicted velocity from the transformer
+// sample: current noisy sample
+// timestepIdx: current timestep index
+func (s *FlowMatchScheduler) Step(modelOutput, sample *mlx.Array, timestepIdx int) *mlx.Array {
+	// Get current and next sigma
+	sigma := s.Sigmas[timestepIdx]
+	sigmaNext := s.Sigmas[timestepIdx+1]
+
+	// Euler step: x_{t-dt} = x_t + (sigma_next - sigma) * v_t
+	dt := sigmaNext - sigma
+
+	// Upcast to float32 to avoid precision issues (matches Python diffusers)
+	sampleF32 := mlx.AsType(sample, mlx.DtypeFloat32)
+	modelOutputF32 := mlx.AsType(modelOutput, mlx.DtypeFloat32)
+
+	scaledOutput := mlx.MulScalar(modelOutputF32, dt)
+	result := mlx.Add(sampleF32, scaledOutput)
+
+	// Cast back to original dtype
+	return mlx.ToBFloat16(result)
+}
+
+// GetTimestep returns the timestep value at the given index
+func (s *FlowMatchScheduler) GetTimestep(idx int) float32 {
+	if idx < len(s.Timesteps) {
+		return s.Timesteps[idx]
+	}
+	return 0.0
+}
+
+// InitNoise creates initial noise for sampling in unpacked format [B, C, T, H, W]
+func (s *FlowMatchScheduler) InitNoise(shape []int32, seed int64) *mlx.Array {
+	return mlx.RandomNormal(shape, uint64(seed))
+}
+
+// InitNoisePacked creates initial noise directly in packed format [B, L, C*4]
+// This matches how Python diffusers generates noise - directly in packed space.
+// Generating in unpacked format and then packing produces different spatial
+// correlation structure, which affects model output quality.
+func (s *FlowMatchScheduler) InitNoisePacked(batchSize, seqLen, channels int32, seed int64) *mlx.Array {
+	shape := []int32{batchSize, seqLen, channels}
+	return mlx.RandomNormal(shape, uint64(seed))
+}
+
+// GetLatentShape returns the latent shape for a given image size
+// For qwen_image: VAE downscale is 8x (spatial), latent has 16 channels
+func GetLatentShape(batchSize, height, width int32) []int32 {
+	latentH := height / 8
+	latentW := width / 8
+	return []int32{batchSize, 16, 1, latentH, latentW} // [B, C, T, H, W]
+}
+
+// GetPatchedLatentShape returns the patchified latent shape
+// After patchification: [B, L, C*patch_size^2] where L = H/2 * W/2
+func GetPatchedLatentShape(batchSize, height, width, patchSize int32) []int32 {
+	latentH := height / 8
+	latentW := width / 8
+	pH := latentH / patchSize
+	pW := latentW / patchSize
+	inChannels := int32(64) // 16 * patch_size^2
+	return []int32{batchSize, pH * pW, inChannels}
+}
--- a/x/imagegen/models/qwen_image/scheduler_test.go
+++ b/x/imagegen/models/qwen_image/scheduler_test.go
@@ -0,0 +1,135 @@
+//go:build mlx
+
+package qwen_image
+
+import (
+	"math"
+	"testing"
+)
+
+// TestSchedulerSetTimesteps verifies scheduler sigmas match Python diffusers reference.
+// Golden values generated via:
+//
+//	python3 -c "
+//	from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
+//	import numpy as np
+//	s = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, base_shift=0.5, max_shift=0.9,
+//	    base_image_seq_len=256, max_image_seq_len=8192, shift_terminal=0.02, use_dynamic_shifting=True)
+//	mu = 4096 * (0.9-0.5)/(8192-256) + 0.5 - (0.9-0.5)/(8192-256)*256
+//	sigmas = np.linspace(1.0, 1.0/30, 30)
+//	s.set_timesteps(sigmas=sigmas, mu=mu)
+//	print(s.sigmas.numpy())"
+func TestSchedulerSetTimesteps(t *testing.T) {
+	cfg := DefaultSchedulerConfig()
+	scheduler := NewFlowMatchScheduler(cfg)
+	scheduler.SetTimesteps(30, 4096)
+
+	// Golden values from Python diffusers (first 3, last 3 before terminal)
+	wantFirst := []float32{1.000000, 0.982251, 0.963889}
+	wantLast := []float32{0.142924, 0.083384, 0.020000}
+
+	// Check first 3
+	for i, want := range wantFirst {
+		got := scheduler.Sigmas[i]
+		if abs32(got-want) > 1e-4 {
+			t.Errorf("sigma[%d]: got %v, want %v", i, got, want)
+		}
+	}
+
+	// Check last 3 (indices 27, 28, 29)
+	for i, want := range wantLast {
+		idx := 27 + i
+		got := scheduler.Sigmas[idx]
+		if abs32(got-want) > 1e-4 {
+			t.Errorf("sigma[%d]: got %v, want %v", idx, got, want)
+		}
+	}
+
+	// Check terminal is 0
+	if scheduler.Sigmas[30] != 0.0 {
+		t.Errorf("terminal sigma: got %v, want 0", scheduler.Sigmas[30])
+	}
+
+	// Check length
+	if len(scheduler.Sigmas) != 31 {
+		t.Errorf("sigmas length: got %d, want 31", len(scheduler.Sigmas))
+	}
+}
+
+// TestSchedulerProperties tests mathematical invariants of the scheduler.
+func TestSchedulerProperties(t *testing.T) {
+	cfg := DefaultSchedulerConfig()
+	scheduler := NewFlowMatchScheduler(cfg)
+	scheduler.SetTimesteps(30, 4096)
+
+	// Property: sigmas monotonically decreasing
+	for i := 1; i < len(scheduler.Sigmas); i++ {
+		if scheduler.Sigmas[i] > scheduler.Sigmas[i-1] {
+			t.Errorf("sigmas not monotonically decreasing at %d: %v > %v",
+				i, scheduler.Sigmas[i], scheduler.Sigmas[i-1])
+		}
+	}
+
+	// Property: first sigma should be ~1.0 (with time shift)
+	if scheduler.Sigmas[0] < 0.9 || scheduler.Sigmas[0] > 1.01 {
+		t.Errorf("first sigma out of expected range [0.9, 1.01]: %v", scheduler.Sigmas[0])
+	}
+
+	// Property: terminal sigma should be exactly 0
+	if scheduler.Sigmas[len(scheduler.Sigmas)-1] != 0.0 {
+		t.Errorf("terminal sigma should be 0, got %v", scheduler.Sigmas[len(scheduler.Sigmas)-1])
+	}
+
+	// Property: last non-terminal sigma should be shift_terminal (0.02)
+	lastNonTerminal := scheduler.Sigmas[len(scheduler.Sigmas)-2]
+	if abs32(lastNonTerminal-0.02) > 1e-5 {
+		t.Errorf("last non-terminal sigma should be 0.02, got %v", lastNonTerminal)
+	}
+
+	// Property: length = steps + 1
+	if len(scheduler.Sigmas) != scheduler.NumSteps+1 {
+		t.Errorf("sigmas length should be steps+1: got %d, want %d",
+			len(scheduler.Sigmas), scheduler.NumSteps+1)
+	}
+}
+
+// TestCalculateShift verifies the mu calculation against Python reference.
+// Golden values from: mu = img_seq_len * m + b where m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
+func TestCalculateShift(t *testing.T) {
+	cases := []struct {
+		imgSeqLen int32
+		want      float32
+	}{
+		{256, 0.5},     // base case
+		{8192, 0.9},    // max case
+		{4096, 0.6935}, // middle case (rounded)
+	}
+
+	for _, c := range cases {
+		got := CalculateShift(c.imgSeqLen, 256, 8192, 0.5, 0.9)
+		if abs32(got-c.want) > 0.001 {
+			t.Errorf("CalculateShift(%d): got %v, want %v", c.imgSeqLen, got, c.want)
+		}
+	}
+}
+
+// TestSchedulerStep verifies the Euler step formula.
+func TestSchedulerStep(t *testing.T) {
+	cfg := DefaultSchedulerConfig()
+	scheduler := NewFlowMatchScheduler(cfg)
+	scheduler.SetTimesteps(30, 4096)
+
+	// Verify dt calculation for first step
+	sigma0 := scheduler.Sigmas[0]
+	sigma1 := scheduler.Sigmas[1]
+	expectedDt := sigma1 - sigma0
+
+	// dt should be negative (sigmas decrease)
+	if expectedDt >= 0 {
+		t.Errorf("expected negative dt, got %v (sigma0=%v, sigma1=%v)", expectedDt, sigma0, sigma1)
+	}
+}
+
+func abs32(x float32) float32 {
+	return float32(math.Abs(float64(x)))
+}
--- a/x/imagegen/models/qwen_image/text_encoder_test.go
+++ b/x/imagegen/models/qwen_image/text_encoder_test.go
@@ -0,0 +1,174 @@
+//go:build mlx
+
+package qwen_image
+
+import (
+	"encoding/json"
+	"math"
+	"os"
+	"path/filepath"
+	"slices"
+	"testing"
+
+	"github.com/ollama/ollama/x/imagegen/mlx"
+	"github.com/ollama/ollama/x/imagegen/safetensors"
+)
+
+// TinyTextEncoderConfig holds config for the tiny test text encoder
+type TinyTextEncoderConfig struct {
+	HiddenSize        int32   `json:"hidden_size"`
+	NumHiddenLayers   int32   `json:"num_hidden_layers"`
+	IntermediateSize  int32   `json:"intermediate_size"`
+	NumAttentionHeads int32   `json:"num_attention_heads"`
+	NumKeyValueHeads  int32   `json:"num_key_value_heads"`
+	VocabSize         int32   `json:"vocab_size"`
+	RMSNormEps        float32 `json:"rms_norm_eps"`
+	RopeTheta         float32 `json:"rope_theta"`
+	HeadDim           int32   `json:"head_dim"`
+	MRoPESection      []int32 `json:"mrope_section"`
+}
+
+// loadTinyTextEncoder loads the tiny text encoder from testdata
+func loadTinyTextEncoder(t *testing.T) (*Qwen25VL, *TinyTextEncoderConfig) {
+	t.Helper()
+
+	testdataDir := filepath.Join("testdata", "tiny_text_encoder")
+
+	// Load config
+	configData, err := os.ReadFile(filepath.Join(testdataDir, "config.json"))
+	if err != nil {
+		t.Skipf("Skipping: tiny weights not found. Regenerate with Python (see models/CLAUDE.md)")
+	}
+
+	var tinyCfg TinyTextEncoderConfig
+	if err := json.Unmarshal(configData, &tinyCfg); err != nil {
+		t.Fatalf("Failed to parse config: %v", err)
+	}
+
+	// Create encoder config (using Qwen25VLConfig)
+	cfg := &Qwen25VLConfig{
+		HiddenSize:        tinyCfg.HiddenSize,
+		NumHiddenLayers:   tinyCfg.NumHiddenLayers,
+		IntermediateSize:  tinyCfg.IntermediateSize,
+		NumAttentionHeads: tinyCfg.NumAttentionHeads,
+		NumKeyValueHeads:  tinyCfg.NumKeyValueHeads,
+		VocabSize:         tinyCfg.VocabSize,
+		RMSNormEps:        tinyCfg.RMSNormEps,
+		RopeTheta:         tinyCfg.RopeTheta,
+		HeadDim:           tinyCfg.HeadDim,
+		MRoPESection:      tinyCfg.MRoPESection,
+	}
+
+	// Load weights
+	weights, err := safetensors.LoadModelWeights(testdataDir)
+	if err != nil {
+		t.Fatalf("Failed to load weights: %v", err)
+	}
+
+	if err := weights.Load(mlx.DtypeBFloat16); err != nil {
+		t.Fatalf("Failed to bulk load weights: %v", err)
+	}
+
+	// Build encoder
+	embedding, err := weights.Get("model.embed_tokens.weight")
+	if err != nil {
+		t.Fatalf("Failed to get embedding: %v", err)
+	}
+
+	blocks := make([]*VLTextBlock, cfg.NumHiddenLayers)
+	for i := int32(0); i < cfg.NumHiddenLayers; i++ {
+		block, err := newVLTextBlock(weights, int(i), cfg)
+		if err != nil {
+			t.Fatalf("Failed to load block %d: %v", i, err)
+		}
+		blocks[i] = block
+	}
+
+	finalNorm, err := weights.Get("model.norm.weight")
+	if err != nil {
+		t.Fatalf("Failed to get final norm: %v", err)
+	}
+
+	encoder := &Qwen25VL{
+		Config:    cfg,
+		Embedding: embedding,
+		Blocks:    blocks,
+		FinalNorm: finalNorm,
+		HasVision: false, // Text-only mode
+	}
+
+	return encoder, &tinyCfg
+}
+
+// TestTextEncoderForward verifies the text encoder forward pass with tiny weights.
+func TestTextEncoderForward(t *testing.T) {
+	encoder, cfg := loadTinyTextEncoder(t)
+
+	// Create test tokens (within vocab range)
+	tokens := []int32{1, 2, 3, 4, 5}
+
+	// Forward pass using EncodeTextOnly
+	out := encoder.EncodeTextOnly(tokens)
+	mlx.Eval(out)
+
+	// Verify output shape: [batch, seq_len, hidden_size]
+	wantShape := []int32{1, 5, cfg.HiddenSize}
+	if !slices.Equal(out.Shape(), wantShape) {
+		t.Errorf("output shape: got %v, want %v", out.Shape(), wantShape)
+	}
+
+	// Verify output is finite (not NaN or Inf)
+	data := out.Data()
+	for i, v := range data {
+		if math.IsNaN(float64(v)) || math.IsInf(float64(v), 0) {
+			t.Errorf("output[%d] is not finite: %v", i, v)
+			break
+		}
+	}
+}
+
+// TestTextEncoderBatch tests batch processing.
+func TestTextEncoderBatch(t *testing.T) {
+	encoder, cfg := loadTinyTextEncoder(t)
+
+	// For batch test, we'll use EncodeTextOnly with a single sequence
+	// (EncodeTextOnly doesn't support batch, but we can verify single sequence works)
+	tokens := []int32{1, 2, 3}
+
+	out := encoder.EncodeTextOnly(tokens)
+	mlx.Eval(out)
+
+	wantShape := []int32{1, 3, cfg.HiddenSize}
+	if !slices.Equal(out.Shape(), wantShape) {
+		t.Errorf("shape: got %v, want %v", out.Shape(), wantShape)
+	}
+}
+
+// TestMRoPEComputation verifies M-RoPE frequency computation produces valid values.
+func TestMRoPEComputation(t *testing.T) {
+	encoder, cfg := loadTinyTextEncoder(t)
+
+	cossin := encoder.computeTextRoPE(10, 1)
+	mlx.Eval(cossin[0], cossin[1])
+
+	// Verify shapes: [3, B, L, head_dim]
+	wantShape := []int32{3, 1, 10, cfg.HeadDim}
+	if !slices.Equal(cossin[0].Shape(), wantShape) {
+		t.Errorf("cos shape: got %v, want %v", cossin[0].Shape(), wantShape)
+	}
+	if !slices.Equal(cossin[1].Shape(), wantShape) {
+		t.Errorf("sin shape: got %v, want %v", cossin[1].Shape(), wantShape)
+	}
+
+	// Verify cos/sin values are in valid range [-1, 1]
+	cosData := cossin[0].Data()
+	sinData := cossin[1].Data()
+	for i := 0; i < min(100, len(cosData)); i++ {
+		if cosData[i] < -1.01 || cosData[i] > 1.01 {
+			t.Errorf("cos[%d] out of range: %v", i, cosData[i])
+		}
+		if sinData[i] < -1.01 || sinData[i] > 1.01 {
+			t.Errorf("sin[%d] out of range: %v", i, sinData[i])
+		}
+	}
+}
--- a/Show More
+++ b/Show More