cmd: add usage cmd to chat to see token consumption

Adding a `/usage` command to interactive cli chat sessions that displays the tokens used in the current sessions. This can be used alongside the models context window to understand when a context shift is going to happen.
cmd/config: use envconfig.Host() for base API in launch config packages (#13937 )
2026-01-28 09:20:33 -05:00 · 2026-01-27 17:14:25 -08:00 · 2026-01-27 13:30:00 -08:00 · 2026-01-26 18:32:54 -08:00 · 2026-01-26 15:03:43 -08:00 · 2026-01-26 14:34:29 -08:00
131 changed files with 13093 additions and 8461 deletions
--- a/4
+++ b/4
@@ -169,8 +169,10 @@ COPY . .
 RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
-ENV CGO_CFLAGS="-I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
+ARG CGO_CFLAGS
 ARG CGO_CXXFLAGS
+ENV CGO_CFLAGS="${CGO_CFLAGS} -I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
+ENV CGO_CXXFLAGS="${CGO_CXXFLAGS}"
 RUN --mount=type=cache,target=/root/.cache/go-build \
    go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .

--- a/README.md
+++ b/README.md
@@ -558,7 +558,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
+- [Ollama for Ruby](https://github.com/crmne/ruby_llm)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/ollama4j/ollama4j)
--- a/api/types.go
+++ b/api/types.go
@@ -749,7 +749,7 @@ type ShowResponse struct {
 	Messages      []Message          `json:"messages,omitempty"`
 	RemoteModel   string             `json:"remote_model,omitempty"`
 	RemoteHost    string             `json:"remote_host,omitempty"`
-	ModelInfo     map[string]any     `json:"model_info,omitempty"`
+	ModelInfo     map[string]any     `json:"model_info"`
 	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
--- a/app/README.md
+++ b/app/README.md
@@ -75,9 +75,9 @@ The `-dev` flag enables:
 CI builds with Xcode 14.1 for OS compatibility prior to v13.  If you want to manually build v11+ support, you can download the older Xcode [here](https://developer.apple.com/services-account/download?path=/Developer_Tools/Xcode_14.1/Xcode_14.1.xip), extract, then `mv ./Xcode.app /Applications/Xcode_14.1.0.app` then activate with:

 ```
-export CGO_CFLAGS=-mmacosx-version-min=12.0
-export CGO_CXXFLAGS=-mmacosx-version-min=12.0
-export CGO_LDFLAGS=-mmacosx-version-min=12.0
+export CGO_CFLAGS="-O3 -mmacosx-version-min=12.0"
+export CGO_CXXFLAGS="-O3 -mmacosx-version-min=12.0"
+export CGO_LDFLAGS="-mmacosx-version-min=12.0"
 export SDKROOT=/Applications/Xcode_14.1.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
 export DEVELOPER_DIR=/Applications/Xcode_14.1.0.app/Contents/Developer
 ```
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -35,6 +35,7 @@ import (
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/parser"
@@ -1018,8 +1019,10 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 		}

 		if resp.ModelInfo != nil {
-			arch := resp.ModelInfo["general.architecture"].(string)
-			rows = append(rows, []string{"", "architecture", arch})
+			arch, _ := resp.ModelInfo["general.architecture"].(string)
+			if arch != "" {
+				rows = append(rows, []string{"", "architecture", arch})
+			}

 			var paramStr string
 			if resp.Details.ParameterSize != "" {
@@ -1029,7 +1032,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 					paramStr = format.HumanNumber(uint64(f))
 				}
 			}
-			rows = append(rows, []string{"", "parameters", paramStr})
+			if paramStr != "" {
+				rows = append(rows, []string{"", "parameters", paramStr})
+			}

 			if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
 				if f, ok := v.(float64); ok {
@@ -1414,10 +1419,10 @@ func thinkingOutputClosingText(plainText bool) string {
 	return readline.ColorGrey + readline.ColorBold + text + readline.ColorDefault
 }

-func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
+func chat(cmd *cobra.Command, opts runOptions) (*api.Message, *api.Metrics, error) {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}

 	p := progress.NewProgress(os.Stderr)
@@ -1510,7 +1515,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {

 	if err := client.Chat(cancelCtx, req, fn); err != nil {
 		if errors.Is(err, context.Canceled) {
-			return nil, nil
+			return nil, nil, nil
 		}

 		// this error should ideally be wrapped properly by the client
@@ -1518,9 +1523,9 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 			p.StopAndClear()
 			fmt.Println("An error occurred while processing your message. Please try again.")
 			fmt.Println()
-			return nil, nil
+			return nil, nil, nil
 		}
-		return nil, err
+		return nil, nil, err
 	}

 	if len(opts.Messages) > 0 {
@@ -1530,14 +1535,14 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {

 	verbose, err := cmd.Flags().GetBool("verbose")
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}

 	if verbose {
 		latest.Summary()
 	}

-	return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, nil
+	return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, &latest.Metrics, nil
 }

 func generate(cmd *cobra.Command, opts runOptions) error {
@@ -2026,6 +2031,7 @@ func NewCLI() *cobra.Command {
 		copyCmd,
 		deleteCmd,
 		runnerCmd,
+		config.LaunchCmd(checkServerHeartbeat),
 	)

 	return rootCmd
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -0,0 +1,60 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+// Claude implements Runner for Claude Code integration
+type Claude struct{}
+
+func (c *Claude) String() string { return "Claude Code" }
+
+func (c *Claude) args(model string) []string {
+	if model != "" {
+		return []string{"--model", model}
+	}
+	return nil
+}
+
+func (c *Claude) findPath() (string, error) {
+	if p, err := exec.LookPath("claude"); err == nil {
+		return p, nil
+	}
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", err
+	}
+	name := "claude"
+	if runtime.GOOS == "windows" {
+		name = "claude.exe"
+	}
+	fallback := filepath.Join(home, ".claude", "local", name)
+	if _, err := os.Stat(fallback); err != nil {
+		return "", err
+	}
+	return fallback, nil
+}
+
+func (c *Claude) Run(model string) error {
+	claudePath, err := c.findPath()
+	if err != nil {
+		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
+	}
+
+	cmd := exec.Command(claudePath, c.args(model)...)
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Env = append(os.Environ(),
+		"ANTHROPIC_BASE_URL="+envconfig.Host().String(),
+		"ANTHROPIC_API_KEY=",
+		"ANTHROPIC_AUTH_TOKEN=ollama",
+	)
+	return cmd.Run()
+}
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -0,0 +1,101 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"slices"
+	"testing"
+)
+
+func TestClaudeIntegration(t *testing.T) {
+	c := &Claude{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := c.String(); got != "Claude Code" {
+			t.Errorf("String() = %q, want %q", got, "Claude Code")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = c
+	})
+}
+
+func TestClaudeFindPath(t *testing.T) {
+	c := &Claude{}
+
+	t.Run("finds claude in PATH", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		name := "claude"
+		if runtime.GOOS == "windows" {
+			name = "claude.exe"
+		}
+		fakeBin := filepath.Join(tmpDir, name)
+		os.WriteFile(fakeBin, []byte("#!/bin/sh\n"), 0o755)
+		t.Setenv("PATH", tmpDir)
+
+		got, err := c.findPath()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != fakeBin {
+			t.Errorf("findPath() = %q, want %q", got, fakeBin)
+		}
+	})
+
+	t.Run("falls back to ~/.claude/local/claude", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
+
+		name := "claude"
+		if runtime.GOOS == "windows" {
+			name = "claude.exe"
+		}
+		fallback := filepath.Join(tmpDir, ".claude", "local", name)
+		os.MkdirAll(filepath.Dir(fallback), 0o755)
+		os.WriteFile(fallback, []byte("#!/bin/sh\n"), 0o755)
+
+		got, err := c.findPath()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != fallback {
+			t.Errorf("findPath() = %q, want %q", got, fallback)
+		}
+	})
+
+	t.Run("returns error when neither PATH nor fallback exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
+
+		_, err := c.findPath()
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+	})
+}
+
+func TestClaudeArgs(t *testing.T) {
+	c := &Claude{}
+
+	tests := []struct {
+		name  string
+		model string
+		want  []string
+	}{
+		{"with model", "llama3.2", []string{"--model", "llama3.2"}},
+		{"empty model", "", nil},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := c.args(tt.model)
+			if !slices.Equal(got, tt.want) {
+				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
+			}
+		})
+	}
+}
--- a/cmd/config/clawdbot.go
+++ b/cmd/config/clawdbot.go
@@ -0,0 +1,195 @@
+package config
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+type Clawdbot struct{}
+
+func (c *Clawdbot) String() string { return "Clawdbot" }
+
+const ansiGreen = "\033[32m"
+
+func (c *Clawdbot) Run(model string) error {
+	if _, err := exec.LookPath("clawdbot"); err != nil {
+		return fmt.Errorf("clawdbot is not installed, install from https://docs.clawd.bot")
+	}
+
+	models := []string{model}
+	if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
+		models = config.Models
+	}
+	if err := c.Edit(models); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	cmd := exec.Command("clawdbot", "gateway")
+	cmd.Stdin = os.Stdin
+
+	// Capture output to detect "already running" message
+	var outputBuf bytes.Buffer
+	cmd.Stdout = io.MultiWriter(os.Stdout, &outputBuf)
+	cmd.Stderr = io.MultiWriter(os.Stderr, &outputBuf)
+
+	err := cmd.Run()
+	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
+		fmt.Fprintf(os.Stderr, "%sClawdbot has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
+		return nil
+	}
+	return err
+}
+
+func (c *Clawdbot) Paths() []string {
+	home, _ := os.UserHomeDir()
+	p := filepath.Join(home, ".clawdbot", "clawdbot.json")
+	if _, err := os.Stat(p); err == nil {
+		return []string{p}
+	}
+	return nil
+}
+
+func (c *Clawdbot) Edit(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	configPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return err
+	}
+
+	// Read into map[string]any to preserve unknown fields
+	config := make(map[string]any)
+	if data, err := os.ReadFile(configPath); err == nil {
+		_ = json.Unmarshal(data, &config)
+	}
+
+	// Navigate/create: models.providers.ollama (preserving other providers)
+	modelsSection, _ := config["models"].(map[string]any)
+	if modelsSection == nil {
+		modelsSection = make(map[string]any)
+	}
+	providers, _ := modelsSection["providers"].(map[string]any)
+	if providers == nil {
+		providers = make(map[string]any)
+	}
+	ollama, _ := providers["ollama"].(map[string]any)
+	if ollama == nil {
+		ollama = make(map[string]any)
+	}
+
+	ollama["baseUrl"] = envconfig.Host().String() + "/v1"
+	// needed to register provider
+	ollama["apiKey"] = "ollama-local"
+	// TODO(parthsareen): potentially move to responses
+	ollama["api"] = "openai-completions"
+
+	// Build map of existing models to preserve user customizations
+	existingModels, _ := ollama["models"].([]any)
+	existingByID := make(map[string]map[string]any)
+	for _, m := range existingModels {
+		if entry, ok := m.(map[string]any); ok {
+			if id, ok := entry["id"].(string); ok {
+				existingByID[id] = entry
+			}
+		}
+	}
+
+	var newModels []any
+	for _, model := range models {
+		entry := map[string]any{
+			"id":        model,
+			"name":      model,
+			"reasoning": false,
+			"input":     []any{"text"},
+			"cost": map[string]any{
+				"input":      0,
+				"output":     0,
+				"cacheRead":  0,
+				"cacheWrite": 0,
+			},
+			// TODO(parthsareen): get these values from API
+			"contextWindow": 131072,
+			"maxTokens":     16384,
+		}
+		// Merge existing fields (user customizations)
+		if existing, ok := existingByID[model]; ok {
+			for k, v := range existing {
+				if _, isNew := entry[k]; !isNew {
+					entry[k] = v
+				}
+			}
+		}
+		newModels = append(newModels, entry)
+	}
+	ollama["models"] = newModels
+
+	providers["ollama"] = ollama
+	modelsSection["providers"] = providers
+	config["models"] = modelsSection
+
+	// Update agents.defaults.model.primary (preserving other agent settings)
+	agents, _ := config["agents"].(map[string]any)
+	if agents == nil {
+		agents = make(map[string]any)
+	}
+	defaults, _ := agents["defaults"].(map[string]any)
+	if defaults == nil {
+		defaults = make(map[string]any)
+	}
+	modelConfig, _ := defaults["model"].(map[string]any)
+	if modelConfig == nil {
+		modelConfig = make(map[string]any)
+	}
+	modelConfig["primary"] = "ollama/" + models[0]
+	defaults["model"] = modelConfig
+	agents["defaults"] = defaults
+	config["agents"] = agents
+
+	data, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeWithBackup(configPath, data)
+}
+
+func (c *Clawdbot) Models() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	config, err := readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
+	if err != nil {
+		return nil
+	}
+
+	modelsSection, _ := config["models"].(map[string]any)
+	providers, _ := modelsSection["providers"].(map[string]any)
+	ollama, _ := providers["ollama"].(map[string]any)
+	modelList, _ := ollama["models"].([]any)
+
+	var result []string
+	for _, m := range modelList {
+		if entry, ok := m.(map[string]any); ok {
+			if id, ok := entry["id"].(string); ok {
+				result = append(result, id)
+			}
+		}
+	}
+	return result
+}
--- a/cmd/config/clawdbot_test.go
+++ b/cmd/config/clawdbot_test.go
@@ -0,0 +1,625 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestClawdbotIntegration(t *testing.T) {
+	c := &Clawdbot{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := c.String(); got != "Clawdbot" {
+			t.Errorf("String() = %q, want %q", got, "Clawdbot")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = c
+	})
+
+	t.Run("implements Editor", func(t *testing.T) {
+		var _ Editor = c
+	})
+}
+
+func TestClawdbotEdit(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+
+	cleanup := func() { os.RemoveAll(configDir) }
+
+	t.Run("fresh install", func(t *testing.T) {
+		cleanup()
+		if err := c.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
+	})
+
+	t.Run("multiple models - first is primary", func(t *testing.T) {
+		cleanup()
+		if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotModelExists(t, configPath, "mistral")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
+	})
+
+	t.Run("preserve other providers", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":{"providers":{"anthropic":{"apiKey":"xxx"}}}}`), 0o644)
+		if err := c.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		models := cfg["models"].(map[string]any)
+		providers := models["providers"].(map[string]any)
+		if providers["anthropic"] == nil {
+			t.Error("anthropic provider was removed")
+		}
+	})
+
+	t.Run("preserve top-level keys", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"theme":"dark","mcp":{"servers":{}}}`), 0o644)
+		if err := c.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		if cfg["theme"] != "dark" {
+			t.Error("theme was removed")
+		}
+		if cfg["mcp"] == nil {
+			t.Error("mcp was removed")
+		}
+	})
+
+	t.Run("preserve user customizations on models", func(t *testing.T) {
+		cleanup()
+		c.Edit([]string{"llama3.2"})
+
+		// User adds custom field
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		models := cfg["models"].(map[string]any)
+		providers := models["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+		modelList := ollama["models"].([]any)
+		entry := modelList[0].(map[string]any)
+		entry["customField"] = "user-value"
+		configData, _ := json.MarshalIndent(cfg, "", "  ")
+		os.WriteFile(configPath, configData, 0o644)
+
+		// Re-run Edit
+		c.Edit([]string{"llama3.2"})
+
+		data, _ = os.ReadFile(configPath)
+		json.Unmarshal(data, &cfg)
+		models = cfg["models"].(map[string]any)
+		providers = models["providers"].(map[string]any)
+		ollama = providers["ollama"].(map[string]any)
+		modelList = ollama["models"].([]any)
+		entry = modelList[0].(map[string]any)
+		if entry["customField"] != "user-value" {
+			t.Error("custom field was lost")
+		}
+	})
+
+	t.Run("edit replaces models list", func(t *testing.T) {
+		cleanup()
+		c.Edit([]string{"llama3.2", "mistral"})
+		c.Edit([]string{"llama3.2"})
+
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+		assertClawdbotModelNotExists(t, configPath, "mistral")
+	})
+
+	t.Run("empty models is no-op", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		original := `{"existing":"data"}`
+		os.WriteFile(configPath, []byte(original), 0o644)
+
+		c.Edit([]string{})
+
+		data, _ := os.ReadFile(configPath)
+		if string(data) != original {
+			t.Error("empty models should not modify file")
+		}
+	})
+
+	t.Run("corrupted JSON treated as empty", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
+
+		if err := c.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		if err := json.Unmarshal(data, &cfg); err != nil {
+			t.Error("result should be valid JSON")
+		}
+	})
+
+	t.Run("wrong type models section", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":"not a map"}`), 0o644)
+
+		if err := c.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "llama3.2")
+	})
+}
+
+func TestClawdbotModels(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("no config returns nil", func(t *testing.T) {
+		if models := c.Models(); len(models) > 0 {
+			t.Errorf("expected nil/empty, got %v", models)
+		}
+	})
+
+	t.Run("returns all ollama models", func(t *testing.T) {
+		configDir := filepath.Join(tmpDir, ".clawdbot")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{
+			"models":{"providers":{"ollama":{"models":[
+				{"id":"llama3.2"},
+				{"id":"mistral"}
+			]}}}
+		}`), 0o644)
+
+		models := c.Models()
+		if len(models) != 2 {
+			t.Errorf("expected 2 models, got %v", models)
+		}
+	})
+}
+
+// Helper functions
+func assertClawdbotModelExists(t *testing.T, path, model string) {
+	t.Helper()
+	data, _ := os.ReadFile(path)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+	models := cfg["models"].(map[string]any)
+	providers := models["providers"].(map[string]any)
+	ollama := providers["ollama"].(map[string]any)
+	modelList := ollama["models"].([]any)
+	for _, m := range modelList {
+		if entry, ok := m.(map[string]any); ok {
+			if entry["id"] == model {
+				return
+			}
+		}
+	}
+	t.Errorf("model %s not found", model)
+}
+
+func assertClawdbotModelNotExists(t *testing.T, path, model string) {
+	t.Helper()
+	data, _ := os.ReadFile(path)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+	models, _ := cfg["models"].(map[string]any)
+	providers, _ := models["providers"].(map[string]any)
+	ollama, _ := providers["ollama"].(map[string]any)
+	modelList, _ := ollama["models"].([]any)
+	for _, m := range modelList {
+		if entry, ok := m.(map[string]any); ok {
+			if entry["id"] == model {
+				t.Errorf("model %s should not exist", model)
+			}
+		}
+	}
+}
+
+func assertClawdbotPrimaryModel(t *testing.T, path, expected string) {
+	t.Helper()
+	data, _ := os.ReadFile(path)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+	agents := cfg["agents"].(map[string]any)
+	defaults := agents["defaults"].(map[string]any)
+	model := defaults["model"].(map[string]any)
+	if model["primary"] != expected {
+		t.Errorf("primary model = %v, want %v", model["primary"], expected)
+	}
+}
+
+func TestClawdbotPaths(t *testing.T) {
+	c := &Clawdbot{}
+
+	t.Run("returns path when config exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		configDir := filepath.Join(tmpDir, ".clawdbot")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{}`), 0o644)
+
+		paths := c.Paths()
+		if len(paths) != 1 {
+			t.Errorf("expected 1 path, got %d", len(paths))
+		}
+	})
+
+	t.Run("returns nil when config missing", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		if paths := c.Paths(); paths != nil {
+			t.Errorf("expected nil, got %v", paths)
+		}
+	})
+}
+
+func TestClawdbotModelsEdgeCases(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+	cleanup := func() { os.RemoveAll(configDir) }
+
+	t.Run("corrupted JSON returns nil", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
+		if models := c.Models(); models != nil {
+			t.Errorf("expected nil, got %v", models)
+		}
+	})
+
+	t.Run("wrong type at models level", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":"string"}`), 0o644)
+		if models := c.Models(); models != nil {
+			t.Errorf("expected nil, got %v", models)
+		}
+	})
+
+	t.Run("wrong type at providers level", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":{"providers":"string"}}`), 0o644)
+		if models := c.Models(); models != nil {
+			t.Errorf("expected nil, got %v", models)
+		}
+	})
+
+	t.Run("wrong type at ollama level", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":"string"}}}`), 0o644)
+		if models := c.Models(); models != nil {
+			t.Errorf("expected nil, got %v", models)
+		}
+	})
+
+	t.Run("model entry missing id", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"name":"test"}]}}}}`), 0o644)
+		if len(c.Models()) != 0 {
+			t.Error("expected empty for missing id")
+		}
+	})
+
+	t.Run("model id is not string", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"id":123}]}}}}`), 0o644)
+		if len(c.Models()) != 0 {
+			t.Error("expected empty for non-string id")
+		}
+	})
+}
+
+func TestClawdbotEditSchemaFields(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")
+
+	if err := c.Edit([]string{"llama3.2"}); err != nil {
+		t.Fatal(err)
+	}
+
+	data, _ := os.ReadFile(configPath)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+	models := cfg["models"].(map[string]any)
+	providers := models["providers"].(map[string]any)
+	ollama := providers["ollama"].(map[string]any)
+	modelList := ollama["models"].([]any)
+	entry := modelList[0].(map[string]any)
+
+	// Verify required schema fields
+	if entry["reasoning"] != false {
+		t.Error("reasoning should be false")
+	}
+	if entry["input"] == nil {
+		t.Error("input should be set")
+	}
+	if entry["contextWindow"] == nil {
+		t.Error("contextWindow should be set")
+	}
+	if entry["maxTokens"] == nil {
+		t.Error("maxTokens should be set")
+	}
+	cost := entry["cost"].(map[string]any)
+	if cost["cacheRead"] == nil {
+		t.Error("cost.cacheRead should be set")
+	}
+	if cost["cacheWrite"] == nil {
+		t.Error("cost.cacheWrite should be set")
+	}
+}
+
+func TestClawdbotEditModelNames(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")
+	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".clawdbot")) }
+
+	t.Run("model with colon tag", func(t *testing.T) {
+		cleanup()
+		if err := c.Edit([]string{"llama3.2:70b"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "llama3.2:70b")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2:70b")
+	})
+
+	t.Run("model with slash", func(t *testing.T) {
+		cleanup()
+		if err := c.Edit([]string{"library/model:tag"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "library/model:tag")
+		assertClawdbotPrimaryModel(t, configPath, "ollama/library/model:tag")
+	})
+
+	t.Run("model with hyphen", func(t *testing.T) {
+		cleanup()
+		if err := c.Edit([]string{"test-model"}); err != nil {
+			t.Fatal(err)
+		}
+		assertClawdbotModelExists(t, configPath, "test-model")
+	})
+}
+
+func TestClawdbotEditAgentsPreservation(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+	cleanup := func() { os.RemoveAll(configDir) }
+
+	t.Run("preserve other agent defaults", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{"model":{"primary":"old"},"temperature":0.7}}}`), 0o644)
+
+		c.Edit([]string{"llama3.2"})
+
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		agents := cfg["agents"].(map[string]any)
+		defaults := agents["defaults"].(map[string]any)
+		if defaults["temperature"] != 0.7 {
+			t.Error("temperature setting was lost")
+		}
+	})
+
+	t.Run("preserve other agents besides defaults", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{},"custom-agent":{"foo":"bar"}}}`), 0o644)
+
+		c.Edit([]string{"llama3.2"})
+
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		agents := cfg["agents"].(map[string]any)
+		if agents["custom-agent"] == nil {
+			t.Error("custom-agent was lost")
+		}
+	})
+}
+
+const testClawdbotFixture = `{
+  "theme": "dark",
+  "mcp": {"servers": {"custom": {"enabled": true}}},
+  "models": {
+    "providers": {
+      "anthropic": {"apiKey": "xxx"},
+      "ollama": {
+        "baseUrl": "http://127.0.0.1:11434/v1",
+        "models": [{"id": "old-model", "customField": "preserved"}]
+      }
+    }
+  },
+  "agents": {
+    "defaults": {"model": {"primary": "old"}, "temperature": 0.7},
+    "custom-agent": {"foo": "bar"}
+  }
+}`
+
+func TestClawdbotEdit_RoundTrip(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+
+	os.MkdirAll(configDir, 0o755)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
+
+	if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
+		t.Fatal(err)
+	}
+
+	data, _ := os.ReadFile(configPath)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+
+	// Verify top-level preserved
+	if cfg["theme"] != "dark" {
+		t.Error("theme not preserved")
+	}
+	mcp := cfg["mcp"].(map[string]any)
+	servers := mcp["servers"].(map[string]any)
+	if servers["custom"] == nil {
+		t.Error("mcp.servers.custom not preserved")
+	}
+
+	// Verify other providers preserved
+	models := cfg["models"].(map[string]any)
+	providers := models["providers"].(map[string]any)
+	if providers["anthropic"] == nil {
+		t.Error("anthropic provider not preserved")
+	}
+
+	// Verify agents preserved
+	agents := cfg["agents"].(map[string]any)
+	if agents["custom-agent"] == nil {
+		t.Error("custom-agent not preserved")
+	}
+	defaults := agents["defaults"].(map[string]any)
+	if defaults["temperature"] != 0.7 {
+		t.Error("temperature not preserved")
+	}
+}
+
+func TestClawdbotEdit_Idempotent(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+
+	os.MkdirAll(configDir, 0o755)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
+
+	c.Edit([]string{"llama3.2", "mistral"})
+	firstData, _ := os.ReadFile(configPath)
+
+	c.Edit([]string{"llama3.2", "mistral"})
+	secondData, _ := os.ReadFile(configPath)
+
+	if string(firstData) != string(secondData) {
+		t.Error("repeated edits with same models produced different results")
+	}
+}
+
+func TestClawdbotEdit_MultipleConsecutiveEdits(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+
+	os.MkdirAll(configDir, 0o755)
+	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
+
+	for i := range 10 {
+		models := []string{"model-a", "model-b"}
+		if i%2 == 0 {
+			models = []string{"model-x", "model-y", "model-z"}
+		}
+		if err := c.Edit(models); err != nil {
+			t.Fatalf("edit %d failed: %v", i, err)
+		}
+	}
+
+	data, _ := os.ReadFile(configPath)
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatalf("file is not valid JSON after multiple edits: %v", err)
+	}
+
+	if cfg["theme"] != "dark" {
+		t.Error("theme lost after multiple edits")
+	}
+}
+
+func TestClawdbotEdit_BackupCreated(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+	configPath := filepath.Join(configDir, "clawdbot.json")
+	backupDir := filepath.Join(os.TempDir(), "ollama-backups")
+
+	os.MkdirAll(configDir, 0o755)
+	uniqueMarker := fmt.Sprintf("test-marker-%d", os.Getpid())
+	original := fmt.Sprintf(`{"theme": "%s"}`, uniqueMarker)
+	os.WriteFile(configPath, []byte(original), 0o644)
+
+	if err := c.Edit([]string{"model-a"}); err != nil {
+		t.Fatal(err)
+	}
+
+	backups, _ := filepath.Glob(filepath.Join(backupDir, "clawdbot.json.*"))
+	foundBackup := false
+	for _, backup := range backups {
+		data, _ := os.ReadFile(backup)
+		if string(data) == original {
+			foundBackup = true
+			break
+		}
+	}
+
+	if !foundBackup {
+		t.Error("backup with original content not found")
+	}
+}
+
+func TestClawdbotEdit_CreatesDirectoryIfMissing(t *testing.T) {
+	c := &Clawdbot{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	configDir := filepath.Join(tmpDir, ".clawdbot")
+
+	if _, err := os.Stat(configDir); !os.IsNotExist(err) {
+		t.Fatal("directory should not exist before test")
+	}
+
+	if err := c.Edit([]string{"model-a"}); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := os.Stat(configDir); os.IsNotExist(err) {
+		t.Fatal("directory was not created")
+	}
+}
--- a/cmd/config/codex.go
+++ b/cmd/config/codex.go
@@ -0,0 +1,61 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+
+	"golang.org/x/mod/semver"
+)
+
+// Codex implements Runner for Codex integration
+type Codex struct{}
+
+func (c *Codex) String() string { return "Codex" }
+
+func (c *Codex) args(model string) []string {
+	args := []string{"--oss"}
+	if model != "" {
+		args = append(args, "-m", model)
+	}
+	return args
+}
+
+func (c *Codex) Run(model string) error {
+	if err := checkCodexVersion(); err != nil {
+		return err
+	}
+
+	cmd := exec.Command("codex", c.args(model)...)
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func checkCodexVersion() error {
+	if _, err := exec.LookPath("codex"); err != nil {
+		return fmt.Errorf("codex is not installed, install with: npm install -g @openai/codex")
+	}
+
+	out, err := exec.Command("codex", "--version").Output()
+	if err != nil {
+		return fmt.Errorf("failed to get codex version: %w", err)
+	}
+
+	// Parse output like "codex-cli 0.87.0"
+	fields := strings.Fields(strings.TrimSpace(string(out)))
+	if len(fields) < 2 {
+		return fmt.Errorf("unexpected codex version output: %s", string(out))
+	}
+
+	version := "v" + fields[len(fields)-1]
+	minVersion := "v0.81.0"
+
+	if semver.Compare(version, minVersion) < 0 {
+		return fmt.Errorf("codex version %s is too old, minimum required is %s, update with: npm update -g @openai/codex", fields[len(fields)-1], "0.81.0")
+	}
+
+	return nil
+}
--- a/cmd/config/codex_test.go
+++ b/cmd/config/codex_test.go
@@ -0,0 +1,28 @@
+package config
+
+import (
+	"slices"
+	"testing"
+)
+
+func TestCodexArgs(t *testing.T) {
+	c := &Codex{}
+
+	tests := []struct {
+		name  string
+		model string
+		want  []string
+	}{
+		{"with model", "llama3.2", []string{"--oss", "-m", "llama3.2"}},
+		{"empty model", "", []string{"--oss"}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := c.args(tt.model)
+			if !slices.Equal(got, tt.want) {
+				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
+			}
+		})
+	}
+}
--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -0,0 +1,115 @@
+// Package config provides integration configuration for external coding tools
+// (Claude Code, Codex, Droid, OpenCode) to use Ollama models.
+package config
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type integration struct {
+	Models []string `json:"models"`
+}
+
+type config struct {
+	Integrations map[string]*integration `json:"integrations"`
+}
+
+func configPath() (string, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(home, ".ollama", "config", "config.json"), nil
+}
+
+func load() (*config, error) {
+	path, err := configPath()
+	if err != nil {
+		return nil, err
+	}
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return &config{Integrations: make(map[string]*integration)}, nil
+		}
+		return nil, err
+	}
+
+	var cfg config
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("failed to parse config: %w, at: %s", err, path)
+	}
+	if cfg.Integrations == nil {
+		cfg.Integrations = make(map[string]*integration)
+	}
+	return &cfg, nil
+}
+
+func save(cfg *config) error {
+	path, err := configPath()
+	if err != nil {
+		return err
+	}
+
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		return err
+	}
+
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return err
+	}
+
+	return writeWithBackup(path, data)
+}
+
+func saveIntegration(appName string, models []string) error {
+	if appName == "" {
+		return errors.New("app name cannot be empty")
+	}
+
+	cfg, err := load()
+	if err != nil {
+		return err
+	}
+
+	cfg.Integrations[strings.ToLower(appName)] = &integration{
+		Models: models,
+	}
+
+	return save(cfg)
+}
+
+func loadIntegration(appName string) (*integration, error) {
+	cfg, err := load()
+	if err != nil {
+		return nil, err
+	}
+
+	ic, ok := cfg.Integrations[strings.ToLower(appName)]
+	if !ok {
+		return nil, os.ErrNotExist
+	}
+
+	return ic, nil
+}
+
+func listIntegrations() ([]integration, error) {
+	cfg, err := load()
+	if err != nil {
+		return nil, err
+	}
+
+	result := make([]integration, 0, len(cfg.Integrations))
+	for _, ic := range cfg.Integrations {
+		result = append(result, *ic)
+	}
+
+	return result, nil
+}
--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -0,0 +1,373 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// setTestHome sets both HOME (Unix) and USERPROFILE (Windows) for cross-platform tests
+func setTestHome(t *testing.T, dir string) {
+	t.Setenv("HOME", dir)
+	t.Setenv("USERPROFILE", dir)
+}
+
+// editorPaths is a test helper that safely calls Paths if the runner implements Editor
+func editorPaths(r Runner) []string {
+	if editor, ok := r.(Editor); ok {
+		return editor.Paths()
+	}
+	return nil
+}
+
+func TestIntegrationConfig(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("save and load round-trip", func(t *testing.T) {
+		models := []string{"llama3.2", "mistral", "qwen2.5"}
+		if err := saveIntegration("claude", models); err != nil {
+			t.Fatal(err)
+		}
+
+		config, err := loadIntegration("claude")
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if len(config.Models) != len(models) {
+			t.Errorf("expected %d models, got %d", len(models), len(config.Models))
+		}
+		for i, m := range models {
+			if config.Models[i] != m {
+				t.Errorf("model %d: expected %s, got %s", i, m, config.Models[i])
+			}
+		}
+	})
+
+	t.Run("defaultModel returns first model", func(t *testing.T) {
+		saveIntegration("codex", []string{"model-a", "model-b"})
+
+		config, _ := loadIntegration("codex")
+		defaultModel := ""
+		if len(config.Models) > 0 {
+			defaultModel = config.Models[0]
+		}
+		if defaultModel != "model-a" {
+			t.Errorf("expected model-a, got %s", defaultModel)
+		}
+	})
+
+	t.Run("defaultModel returns empty for no models", func(t *testing.T) {
+		config := &integration{Models: []string{}}
+		defaultModel := ""
+		if len(config.Models) > 0 {
+			defaultModel = config.Models[0]
+		}
+		if defaultModel != "" {
+			t.Errorf("expected empty string, got %s", defaultModel)
+		}
+	})
+
+	t.Run("app name is case-insensitive", func(t *testing.T) {
+		saveIntegration("Claude", []string{"model-x"})
+
+		config, err := loadIntegration("claude")
+		if err != nil {
+			t.Fatal(err)
+		}
+		defaultModel := ""
+		if len(config.Models) > 0 {
+			defaultModel = config.Models[0]
+		}
+		if defaultModel != "model-x" {
+			t.Errorf("expected model-x, got %s", defaultModel)
+		}
+	})
+
+	t.Run("multiple integrations in single file", func(t *testing.T) {
+		saveIntegration("app1", []string{"model-1"})
+		saveIntegration("app2", []string{"model-2"})
+
+		config1, _ := loadIntegration("app1")
+		config2, _ := loadIntegration("app2")
+
+		defaultModel1 := ""
+		if len(config1.Models) > 0 {
+			defaultModel1 = config1.Models[0]
+		}
+		defaultModel2 := ""
+		if len(config2.Models) > 0 {
+			defaultModel2 = config2.Models[0]
+		}
+		if defaultModel1 != "model-1" {
+			t.Errorf("expected model-1, got %s", defaultModel1)
+		}
+		if defaultModel2 != "model-2" {
+			t.Errorf("expected model-2, got %s", defaultModel2)
+		}
+	})
+}
+
+func TestListIntegrations(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("returns empty when no integrations", func(t *testing.T) {
+		configs, err := listIntegrations()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(configs) != 0 {
+			t.Errorf("expected 0 integrations, got %d", len(configs))
+		}
+	})
+
+	t.Run("returns all saved integrations", func(t *testing.T) {
+		saveIntegration("claude", []string{"model-1"})
+		saveIntegration("droid", []string{"model-2"})
+
+		configs, err := listIntegrations()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(configs) != 2 {
+			t.Errorf("expected 2 integrations, got %d", len(configs))
+		}
+	})
+}
+
+func TestEditorPaths(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("returns empty for claude (no Editor)", func(t *testing.T) {
+		r := integrations["claude"]
+		paths := editorPaths(r)
+		if len(paths) != 0 {
+			t.Errorf("expected no paths for claude, got %v", paths)
+		}
+	})
+
+	t.Run("returns empty for codex (no Editor)", func(t *testing.T) {
+		r := integrations["codex"]
+		paths := editorPaths(r)
+		if len(paths) != 0 {
+			t.Errorf("expected no paths for codex, got %v", paths)
+		}
+	})
+
+	t.Run("returns empty for droid when no config exists", func(t *testing.T) {
+		r := integrations["droid"]
+		paths := editorPaths(r)
+		if len(paths) != 0 {
+			t.Errorf("expected no paths, got %v", paths)
+		}
+	})
+
+	t.Run("returns path for droid when config exists", func(t *testing.T) {
+		settingsDir, _ := os.UserHomeDir()
+		settingsDir = filepath.Join(settingsDir, ".factory")
+		os.MkdirAll(settingsDir, 0o755)
+		os.WriteFile(filepath.Join(settingsDir, "settings.json"), []byte(`{}`), 0o644)
+
+		r := integrations["droid"]
+		paths := editorPaths(r)
+		if len(paths) != 1 {
+			t.Errorf("expected 1 path, got %d", len(paths))
+		}
+	})
+
+	t.Run("returns paths for opencode when configs exist", func(t *testing.T) {
+		home, _ := os.UserHomeDir()
+		configDir := filepath.Join(home, ".config", "opencode")
+		stateDir := filepath.Join(home, ".local", "state", "opencode")
+		os.MkdirAll(configDir, 0o755)
+		os.MkdirAll(stateDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "opencode.json"), []byte(`{}`), 0o644)
+		os.WriteFile(filepath.Join(stateDir, "model.json"), []byte(`{}`), 0o644)
+
+		r := integrations["opencode"]
+		paths := editorPaths(r)
+		if len(paths) != 2 {
+			t.Errorf("expected 2 paths, got %d: %v", len(paths), paths)
+		}
+	})
+}
+
+func TestLoadIntegration_CorruptedJSON(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	// Create corrupted config.json file
+	dir := filepath.Join(tmpDir, ".ollama", "config")
+	os.MkdirAll(dir, 0o755)
+	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{corrupted json`), 0o644)
+
+	// Corrupted file is treated as empty, so loadIntegration returns not found
+	_, err := loadIntegration("test")
+	if err == nil {
+		t.Error("expected error for nonexistent integration in corrupted file")
+	}
+}
+
+func TestSaveIntegration_NilModels(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	if err := saveIntegration("test", nil); err != nil {
+		t.Fatalf("saveIntegration with nil models failed: %v", err)
+	}
+
+	config, err := loadIntegration("test")
+	if err != nil {
+		t.Fatalf("loadIntegration failed: %v", err)
+	}
+
+	if config.Models == nil {
+		// nil is acceptable
+	} else if len(config.Models) != 0 {
+		t.Errorf("expected empty or nil models, got %v", config.Models)
+	}
+}
+
+func TestSaveIntegration_EmptyAppName(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	err := saveIntegration("", []string{"model"})
+	if err == nil {
+		t.Error("expected error for empty app name, got nil")
+	}
+	if err != nil && !strings.Contains(err.Error(), "app name cannot be empty") {
+		t.Errorf("expected 'app name cannot be empty' error, got: %v", err)
+	}
+}
+
+func TestLoadIntegration_NonexistentIntegration(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	_, err := loadIntegration("nonexistent")
+	if err == nil {
+		t.Error("expected error for nonexistent integration, got nil")
+	}
+	if !os.IsNotExist(err) {
+		t.Logf("error type is os.ErrNotExist as expected: %v", err)
+	}
+}
+
+func TestConfigPath(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	path, err := configPath()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := filepath.Join(tmpDir, ".ollama", "config", "config.json")
+	if path != expected {
+		t.Errorf("expected %s, got %s", expected, path)
+	}
+}
+
+func TestLoad(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("returns empty config when file does not exist", func(t *testing.T) {
+		cfg, err := load()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if cfg == nil {
+			t.Fatal("expected non-nil config")
+		}
+		if cfg.Integrations == nil {
+			t.Error("expected non-nil Integrations map")
+		}
+		if len(cfg.Integrations) != 0 {
+			t.Errorf("expected empty Integrations, got %d", len(cfg.Integrations))
+		}
+	})
+
+	t.Run("loads existing config", func(t *testing.T) {
+		path, _ := configPath()
+		os.MkdirAll(filepath.Dir(path), 0o755)
+		os.WriteFile(path, []byte(`{"integrations":{"test":{"models":["model-a"]}}}`), 0o644)
+
+		cfg, err := load()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if cfg.Integrations["test"] == nil {
+			t.Fatal("expected test integration")
+		}
+		if len(cfg.Integrations["test"].Models) != 1 {
+			t.Errorf("expected 1 model, got %d", len(cfg.Integrations["test"].Models))
+		}
+	})
+
+	t.Run("returns error for corrupted JSON", func(t *testing.T) {
+		path, _ := configPath()
+		os.MkdirAll(filepath.Dir(path), 0o755)
+		os.WriteFile(path, []byte(`{corrupted`), 0o644)
+
+		_, err := load()
+		if err == nil {
+			t.Error("expected error for corrupted JSON")
+		}
+	})
+}
+
+func TestSave(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("creates config file", func(t *testing.T) {
+		cfg := &config{
+			Integrations: map[string]*integration{
+				"test": {Models: []string{"model-a", "model-b"}},
+			},
+		}
+
+		if err := save(cfg); err != nil {
+			t.Fatal(err)
+		}
+
+		path, _ := configPath()
+		if _, err := os.Stat(path); os.IsNotExist(err) {
+			t.Error("config file was not created")
+		}
+	})
+
+	t.Run("round-trip preserves data", func(t *testing.T) {
+		cfg := &config{
+			Integrations: map[string]*integration{
+				"claude": {Models: []string{"llama3.2", "mistral"}},
+				"codex":  {Models: []string{"qwen2.5"}},
+			},
+		}
+
+		if err := save(cfg); err != nil {
+			t.Fatal(err)
+		}
+
+		loaded, err := load()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if len(loaded.Integrations) != 2 {
+			t.Errorf("expected 2 integrations, got %d", len(loaded.Integrations))
+		}
+		if loaded.Integrations["claude"] == nil {
+			t.Error("missing claude integration")
+		}
+		if len(loaded.Integrations["claude"].Models) != 2 {
+			t.Errorf("expected 2 models for claude, got %d", len(loaded.Integrations["claude"].Models))
+		}
+	})
+}
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -0,0 +1,186 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"slices"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+// Droid implements Runner and Editor for Droid integration
+type Droid struct{}
+
+// droidSettings represents the Droid settings.json file (only fields we use)
+type droidSettings struct {
+	CustomModels           []modelEntry    `json:"customModels"`
+	SessionDefaultSettings sessionSettings `json:"sessionDefaultSettings"`
+}
+
+type sessionSettings struct {
+	Model           string `json:"model"`
+	ReasoningEffort string `json:"reasoningEffort"`
+}
+
+type modelEntry struct {
+	Model           string `json:"model"`
+	DisplayName     string `json:"displayName"`
+	BaseURL         string `json:"baseUrl"`
+	APIKey          string `json:"apiKey"`
+	Provider        string `json:"provider"`
+	MaxOutputTokens int    `json:"maxOutputTokens"`
+	SupportsImages  bool   `json:"supportsImages"`
+	ID              string `json:"id"`
+	Index           int    `json:"index"`
+}
+
+func (d *Droid) String() string { return "Droid" }
+
+func (d *Droid) Run(model string) error {
+	if _, err := exec.LookPath("droid"); err != nil {
+		return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
+	}
+
+	// Call Edit() to ensure config is up-to-date before launch
+	models := []string{model}
+	if config, err := loadIntegration("droid"); err == nil && len(config.Models) > 0 {
+		models = config.Models
+	}
+	if err := d.Edit(models); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	cmd := exec.Command("droid")
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func (d *Droid) Paths() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+	p := filepath.Join(home, ".factory", "settings.json")
+	if _, err := os.Stat(p); err == nil {
+		return []string{p}
+	}
+	return nil
+}
+
+func (d *Droid) Edit(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	settingsPath := filepath.Join(home, ".factory", "settings.json")
+	if err := os.MkdirAll(filepath.Dir(settingsPath), 0o755); err != nil {
+		return err
+	}
+
+	// Read file once, unmarshal twice:
+	// map preserves unknown fields for writing back (including extra fields in model entries)
+	settingsMap := make(map[string]any)
+	var settings droidSettings
+	if data, err := os.ReadFile(settingsPath); err == nil {
+		if err := json.Unmarshal(data, &settingsMap); err != nil {
+			return fmt.Errorf("failed to parse settings file: %w, at: %s", err, settingsPath)
+		}
+		json.Unmarshal(data, &settings) // ignore error, zero values are fine
+	}
+
+	// Keep only non-Ollama models from the raw map (preserves extra fields)
+	// Rebuild Ollama models
+	var nonOllamaModels []any
+	if rawModels, ok := settingsMap["customModels"].([]any); ok {
+		for _, raw := range rawModels {
+			if m, ok := raw.(map[string]any); ok {
+				if m["apiKey"] != "ollama" {
+					nonOllamaModels = append(nonOllamaModels, raw)
+				}
+			}
+		}
+	}
+
+	// Build new Ollama model entries with sequential indices (0, 1, 2, ...)
+	var newModels []any
+	var defaultModelID string
+	for i, model := range models {
+		modelID := fmt.Sprintf("custom:%s-%d", model, i)
+		newModels = append(newModels, modelEntry{
+			Model:           model,
+			DisplayName:     model,
+			BaseURL:         envconfig.Host().String() + "/v1",
+			APIKey:          "ollama",
+			Provider:        "generic-chat-completion-api",
+			MaxOutputTokens: 64000,
+			SupportsImages:  false,
+			ID:              modelID,
+			Index:           i,
+		})
+		if i == 0 {
+			defaultModelID = modelID
+		}
+	}
+
+	settingsMap["customModels"] = append(newModels, nonOllamaModels...)
+
+	// Update session default settings (preserve unknown fields in the nested object)
+	sessionSettings, ok := settingsMap["sessionDefaultSettings"].(map[string]any)
+	if !ok {
+		sessionSettings = make(map[string]any)
+	}
+	sessionSettings["model"] = defaultModelID
+
+	if !isValidReasoningEffort(settings.SessionDefaultSettings.ReasoningEffort) {
+		sessionSettings["reasoningEffort"] = "none"
+	}
+
+	settingsMap["sessionDefaultSettings"] = sessionSettings
+
+	data, err := json.MarshalIndent(settingsMap, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeWithBackup(settingsPath, data)
+}
+
+func (d *Droid) Models() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	data, err := os.ReadFile(filepath.Join(home, ".factory", "settings.json"))
+	if err != nil {
+		return nil
+	}
+
+	var settings droidSettings
+	if err := json.Unmarshal(data, &settings); err != nil {
+		return nil
+	}
+
+	var result []string
+	for _, m := range settings.CustomModels {
+		if m.APIKey == "ollama" {
+			result = append(result, m.Model)
+		}
+	}
+	return result
+}
+
+var validReasoningEfforts = []string{"high", "medium", "low", "none"}
+
+func isValidReasoningEffort(effort string) bool {
+	return slices.Contains(validReasoningEfforts, effort)
+}
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
--- a/cmd/config/files.go
+++ b/cmd/config/files.go
@@ -0,0 +1,99 @@
+package config
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+func readJSONFile(path string) (map[string]any, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var result map[string]any
+	if err := json.Unmarshal(data, &result); err != nil {
+		return nil, err
+	}
+	return result, nil
+}
+
+func copyFile(src, dst string) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
+	data, err := os.ReadFile(src)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(dst, data, info.Mode().Perm())
+}
+
+func backupDir() string {
+	return filepath.Join(os.TempDir(), "ollama-backups")
+}
+
+func backupToTmp(srcPath string) (string, error) {
+	dir := backupDir()
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return "", err
+	}
+
+	backupPath := filepath.Join(dir, fmt.Sprintf("%s.%d", filepath.Base(srcPath), time.Now().Unix()))
+	if err := copyFile(srcPath, backupPath); err != nil {
+		return "", err
+	}
+	return backupPath, nil
+}
+
+// writeWithBackup writes data to path via temp file + rename, backing up any existing file first
+func writeWithBackup(path string, data []byte) error {
+	var backupPath string
+	// backup must be created before any writes to the target file
+	if existingContent, err := os.ReadFile(path); err == nil {
+		if !bytes.Equal(existingContent, data) {
+			backupPath, err = backupToTmp(path)
+			if err != nil {
+				return fmt.Errorf("backup failed: %w", err)
+			}
+		}
+	} else if !os.IsNotExist(err) {
+		return fmt.Errorf("read existing file: %w", err)
+	}
+
+	dir := filepath.Dir(path)
+	tmp, err := os.CreateTemp(dir, ".tmp-*")
+	if err != nil {
+		return fmt.Errorf("create temp failed: %w", err)
+	}
+	tmpPath := tmp.Name()
+
+	if _, err := tmp.Write(data); err != nil {
+		_ = tmp.Close()
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("write failed: %w", err)
+	}
+	if err := tmp.Sync(); err != nil {
+		_ = tmp.Close()
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("sync failed: %w", err)
+	}
+	if err := tmp.Close(); err != nil {
+		_ = os.Remove(tmpPath)
+		return fmt.Errorf("close failed: %w", err)
+	}
+
+	if err := os.Rename(tmpPath, path); err != nil {
+		_ = os.Remove(tmpPath)
+		if backupPath != "" {
+			_ = copyFile(backupPath, path)
+		}
+		return fmt.Errorf("rename failed: %w", err)
+	}
+
+	return nil
+}
--- a/cmd/config/files_test.go
+++ b/cmd/config/files_test.go
@@ -0,0 +1,502 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+func mustMarshal(t *testing.T, v any) []byte {
+	t.Helper()
+	data, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return data
+}
+
+func TestWriteWithBackup(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	t.Run("creates file", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "new.json")
+		data := mustMarshal(t, map[string]string{"key": "value"})
+
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		content, err := os.ReadFile(path)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var result map[string]string
+		if err := json.Unmarshal(content, &result); err != nil {
+			t.Fatal(err)
+		}
+		if result["key"] != "value" {
+			t.Errorf("expected value, got %s", result["key"])
+		}
+	})
+
+	t.Run("creates backup in /tmp/ollama-backups", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "backup.json")
+
+		os.WriteFile(path, []byte(`{"original": true}`), 0o644)
+
+		data := mustMarshal(t, map[string]bool{"updated": true})
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		entries, err := os.ReadDir(backupDir())
+		if err != nil {
+			t.Fatal("backup directory not created")
+		}
+
+		var foundBackup bool
+		for _, entry := range entries {
+			if filepath.Ext(entry.Name()) != ".json" {
+				name := entry.Name()
+				if len(name) > len("backup.json.") && name[:len("backup.json.")] == "backup.json." {
+					backupPath := filepath.Join(backupDir(), name)
+					backup, err := os.ReadFile(backupPath)
+					if err == nil {
+						var backupData map[string]bool
+						json.Unmarshal(backup, &backupData)
+						if backupData["original"] {
+							foundBackup = true
+							os.Remove(backupPath)
+							break
+						}
+					}
+				}
+			}
+		}
+
+		if !foundBackup {
+			t.Error("backup file not created in /tmp/ollama-backups")
+		}
+
+		current, _ := os.ReadFile(path)
+		var currentData map[string]bool
+		json.Unmarshal(current, &currentData)
+		if !currentData["updated"] {
+			t.Error("file doesn't contain updated data")
+		}
+	})
+
+	t.Run("no backup for new file", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "nobak.json")
+
+		data := mustMarshal(t, map[string]string{"new": "file"})
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		entries, _ := os.ReadDir(backupDir())
+		for _, entry := range entries {
+			if len(entry.Name()) > len("nobak.json.") && entry.Name()[:len("nobak.json.")] == "nobak.json." {
+				t.Error("backup should not exist for new file")
+			}
+		}
+	})
+
+	t.Run("no backup when content unchanged", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "unchanged.json")
+
+		data := mustMarshal(t, map[string]string{"key": "value"})
+
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		entries1, _ := os.ReadDir(backupDir())
+		countBefore := 0
+		for _, e := range entries1 {
+			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
+				countBefore++
+			}
+		}
+
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		entries2, _ := os.ReadDir(backupDir())
+		countAfter := 0
+		for _, e := range entries2 {
+			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
+				countAfter++
+			}
+		}
+
+		if countAfter != countBefore {
+			t.Errorf("backup was created when content unchanged (before=%d, after=%d)", countBefore, countAfter)
+		}
+	})
+
+	t.Run("backup filename contains unix timestamp", func(t *testing.T) {
+		path := filepath.Join(tmpDir, "timestamped.json")
+
+		os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
+		data := mustMarshal(t, map[string]int{"v": 2})
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatal(err)
+		}
+
+		entries, _ := os.ReadDir(backupDir())
+		var found bool
+		for _, entry := range entries {
+			name := entry.Name()
+			if len(name) > len("timestamped.json.") && name[:len("timestamped.json.")] == "timestamped.json." {
+				timestamp := name[len("timestamped.json."):]
+				for _, c := range timestamp {
+					if c < '0' || c > '9' {
+						t.Errorf("backup filename timestamp contains non-numeric character: %s", name)
+					}
+				}
+				found = true
+				os.Remove(filepath.Join(backupDir(), name))
+				break
+			}
+		}
+		if !found {
+			t.Error("backup file with timestamp not found")
+		}
+	})
+}
+
+// Edge case tests for files.go
+
+// TestWriteWithBackup_FailsIfBackupFails documents critical behavior: if backup fails, we must not proceed.
+// User could lose their config with no way to recover.
+func TestWriteWithBackup_FailsIfBackupFails(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission tests unreliable on Windows")
+	}
+
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "config.json")
+
+	// Create original file
+	originalContent := []byte(`{"original": true}`)
+	os.WriteFile(path, originalContent, 0o644)
+
+	// Make backup directory read-only to force backup failure
+	backupDir := backupDir()
+	os.MkdirAll(backupDir, 0o755)
+	os.Chmod(backupDir, 0o444) // Read-only
+	defer os.Chmod(backupDir, 0o755)
+
+	newContent := []byte(`{"updated": true}`)
+	err := writeWithBackup(path, newContent)
+
+	// Should fail because backup couldn't be created
+	if err == nil {
+		t.Error("expected error when backup fails, got nil")
+	}
+
+	// Original file should be preserved
+	current, _ := os.ReadFile(path)
+	if string(current) != string(originalContent) {
+		t.Errorf("original file was modified despite backup failure: got %s", string(current))
+	}
+}
+
+// TestWriteWithBackup_PermissionDenied verifies clear error when target file has wrong permissions.
+// Common issue when config owned by root or wrong perms.
+func TestWriteWithBackup_PermissionDenied(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission tests unreliable on Windows")
+	}
+
+	tmpDir := t.TempDir()
+
+	// Create a read-only directory
+	readOnlyDir := filepath.Join(tmpDir, "readonly")
+	os.MkdirAll(readOnlyDir, 0o755)
+	os.Chmod(readOnlyDir, 0o444)
+	defer os.Chmod(readOnlyDir, 0o755)
+
+	path := filepath.Join(readOnlyDir, "config.json")
+	err := writeWithBackup(path, []byte(`{"test": true}`))
+
+	if err == nil {
+		t.Error("expected permission error, got nil")
+	}
+}
+
+// TestWriteWithBackup_DirectoryDoesNotExist verifies behavior when target directory doesn't exist.
+// writeWithBackup doesn't create directories - caller is responsible.
+func TestWriteWithBackup_DirectoryDoesNotExist(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "nonexistent", "subdir", "config.json")
+
+	err := writeWithBackup(path, []byte(`{"test": true}`))
+
+	// Should fail because directory doesn't exist
+	if err == nil {
+		t.Error("expected error for nonexistent directory, got nil")
+	}
+}
+
+// TestWriteWithBackup_SymlinkTarget documents behavior when target is a symlink.
+// Documents what happens if user symlinks their config file.
+func TestWriteWithBackup_SymlinkTarget(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("symlink tests may require admin on Windows")
+	}
+
+	tmpDir := t.TempDir()
+	realFile := filepath.Join(tmpDir, "real.json")
+	symlink := filepath.Join(tmpDir, "link.json")
+
+	// Create real file and symlink
+	os.WriteFile(realFile, []byte(`{"v": 1}`), 0o644)
+	os.Symlink(realFile, symlink)
+
+	// Write through symlink
+	err := writeWithBackup(symlink, []byte(`{"v": 2}`))
+	if err != nil {
+		t.Fatalf("writeWithBackup through symlink failed: %v", err)
+	}
+
+	// The real file should be updated (symlink followed for temp file creation)
+	content, _ := os.ReadFile(symlink)
+	if string(content) != `{"v": 2}` {
+		t.Errorf("symlink target not updated correctly: got %s", string(content))
+	}
+}
+
+// TestBackupToTmp_SpecialCharsInFilename verifies backup works with special characters.
+// User may have config files with unusual names.
+func TestBackupToTmp_SpecialCharsInFilename(t *testing.T) {
+	tmpDir := t.TempDir()
+
+	// File with spaces and special chars
+	path := filepath.Join(tmpDir, "my config (backup).json")
+	os.WriteFile(path, []byte(`{"test": true}`), 0o644)
+
+	backupPath, err := backupToTmp(path)
+	if err != nil {
+		t.Fatalf("backupToTmp with special chars failed: %v", err)
+	}
+
+	// Verify backup exists and has correct content
+	content, err := os.ReadFile(backupPath)
+	if err != nil {
+		t.Fatalf("could not read backup: %v", err)
+	}
+	if string(content) != `{"test": true}` {
+		t.Errorf("backup content mismatch: got %s", string(content))
+	}
+
+	os.Remove(backupPath)
+}
+
+// TestCopyFile_PreservesPermissions verifies that copyFile preserves file permissions.
+func TestCopyFile_PreservesPermissions(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission preservation tests unreliable on Windows")
+	}
+
+	tmpDir := t.TempDir()
+	src := filepath.Join(tmpDir, "src.json")
+	dst := filepath.Join(tmpDir, "dst.json")
+
+	// Create source with specific permissions
+	os.WriteFile(src, []byte(`{"test": true}`), 0o600)
+
+	err := copyFile(src, dst)
+	if err != nil {
+		t.Fatalf("copyFile failed: %v", err)
+	}
+
+	srcInfo, _ := os.Stat(src)
+	dstInfo, _ := os.Stat(dst)
+
+	if srcInfo.Mode().Perm() != dstInfo.Mode().Perm() {
+		t.Errorf("permissions not preserved: src=%v, dst=%v", srcInfo.Mode().Perm(), dstInfo.Mode().Perm())
+	}
+}
+
+// TestCopyFile_SourceNotFound verifies clear error when source doesn't exist.
+func TestCopyFile_SourceNotFound(t *testing.T) {
+	tmpDir := t.TempDir()
+	src := filepath.Join(tmpDir, "nonexistent.json")
+	dst := filepath.Join(tmpDir, "dst.json")
+
+	err := copyFile(src, dst)
+	if err == nil {
+		t.Error("expected error for nonexistent source, got nil")
+	}
+}
+
+// TestWriteWithBackup_TargetIsDirectory verifies error when path points to a directory.
+func TestWriteWithBackup_TargetIsDirectory(t *testing.T) {
+	tmpDir := t.TempDir()
+	dirPath := filepath.Join(tmpDir, "actualdir")
+	os.MkdirAll(dirPath, 0o755)
+
+	err := writeWithBackup(dirPath, []byte(`{"test": true}`))
+	if err == nil {
+		t.Error("expected error when target is a directory, got nil")
+	}
+}
+
+// TestWriteWithBackup_EmptyData verifies writing zero bytes works correctly.
+func TestWriteWithBackup_EmptyData(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "empty.json")
+
+	err := writeWithBackup(path, []byte{})
+	if err != nil {
+		t.Fatalf("writeWithBackup with empty data failed: %v", err)
+	}
+
+	content, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("could not read file: %v", err)
+	}
+	if len(content) != 0 {
+		t.Errorf("expected empty file, got %d bytes", len(content))
+	}
+}
+
+// TestWriteWithBackup_FileUnreadableButDirWritable verifies behavior when existing file
+// cannot be read (for backup comparison) but directory is writable.
+func TestWriteWithBackup_FileUnreadableButDirWritable(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission tests unreliable on Windows")
+	}
+
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "unreadable.json")
+
+	// Create file and make it unreadable
+	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
+	os.Chmod(path, 0o000)
+	defer os.Chmod(path, 0o644)
+
+	// Should fail because we can't read the file to compare/backup
+	err := writeWithBackup(path, []byte(`{"updated": true}`))
+	if err == nil {
+		t.Error("expected error when file is unreadable, got nil")
+	}
+}
+
+// TestWriteWithBackup_RapidSuccessiveWrites verifies backup works with multiple writes
+// within the same second (timestamp collision scenario).
+func TestWriteWithBackup_RapidSuccessiveWrites(t *testing.T) {
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "rapid.json")
+
+	// Create initial file
+	os.WriteFile(path, []byte(`{"v": 0}`), 0o644)
+
+	// Rapid successive writes
+	for i := 1; i <= 3; i++ {
+		data := []byte(fmt.Sprintf(`{"v": %d}`, i))
+		if err := writeWithBackup(path, data); err != nil {
+			t.Fatalf("write %d failed: %v", i, err)
+		}
+	}
+
+	// Verify final content
+	content, _ := os.ReadFile(path)
+	if string(content) != `{"v": 3}` {
+		t.Errorf("expected final content {\"v\": 3}, got %s", string(content))
+	}
+
+	// Verify at least one backup exists
+	entries, _ := os.ReadDir(backupDir())
+	var backupCount int
+	for _, e := range entries {
+		if len(e.Name()) > len("rapid.json.") && e.Name()[:len("rapid.json.")] == "rapid.json." {
+			backupCount++
+		}
+	}
+	if backupCount == 0 {
+		t.Error("expected at least one backup file from rapid writes")
+	}
+}
+
+// TestWriteWithBackup_BackupDirIsFile verifies error when backup directory path is a file.
+func TestWriteWithBackup_BackupDirIsFile(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("test modifies system temp directory")
+	}
+
+	// Create a file at the backup directory path
+	backupPath := backupDir()
+	// Clean up any existing directory first
+	os.RemoveAll(backupPath)
+	// Create a file instead of directory
+	os.WriteFile(backupPath, []byte("not a directory"), 0o644)
+	defer func() {
+		os.Remove(backupPath)
+		os.MkdirAll(backupPath, 0o755)
+	}()
+
+	tmpDir := t.TempDir()
+	path := filepath.Join(tmpDir, "test.json")
+	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
+
+	err := writeWithBackup(path, []byte(`{"updated": true}`))
+	if err == nil {
+		t.Error("expected error when backup dir is a file, got nil")
+	}
+}
+
+// TestWriteWithBackup_NoOrphanTempFiles verifies temp files are cleaned up on failure.
+func TestWriteWithBackup_NoOrphanTempFiles(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("permission tests unreliable on Windows")
+	}
+
+	tmpDir := t.TempDir()
+
+	// Count existing temp files
+	countTempFiles := func() int {
+		entries, _ := os.ReadDir(tmpDir)
+		count := 0
+		for _, e := range entries {
+			if len(e.Name()) > 4 && e.Name()[:4] == ".tmp" {
+				count++
+			}
+		}
+		return count
+	}
+
+	before := countTempFiles()
+
+	// Create a file, then make directory read-only to cause rename failure
+	path := filepath.Join(tmpDir, "orphan.json")
+	os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
+
+	// Make a subdirectory and try to write there after making parent read-only
+	subDir := filepath.Join(tmpDir, "subdir")
+	os.MkdirAll(subDir, 0o755)
+	subPath := filepath.Join(subDir, "config.json")
+	os.WriteFile(subPath, []byte(`{"v": 1}`), 0o644)
+
+	// Make subdir read-only after creating temp file would succeed but rename would fail
+	// This is tricky to test - the temp file is created in the same dir, so if we can't
+	// rename, we also couldn't create. Let's just verify normal failure cleanup works.
+
+	// Force a failure by making the target a directory
+	badPath := filepath.Join(tmpDir, "isdir")
+	os.MkdirAll(badPath, 0o755)
+
+	_ = writeWithBackup(badPath, []byte(`{"test": true}`))
+
+	after := countTempFiles()
+	if after > before {
+		t.Errorf("orphan temp files left behind: before=%d, after=%d", before, after)
+	}
+}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -0,0 +1,355 @@
+package config
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"maps"
+	"os"
+	"os/exec"
+	"runtime"
+	"slices"
+	"strings"
+	"time"
+
+	"github.com/ollama/ollama/api"
+	"github.com/spf13/cobra"
+)
+
+// Runners execute the launching of a model with the integration - claude, codex
+// Editors can edit config files (supports multi-model selection) - opencode, droid
+// They are composable interfaces where in some cases an editor is also a runner - opencode, droid
+// Runner can run an integration with a model.
+
+type Runner interface {
+	Run(model string) error
+	// String returns the human-readable name of the integration
+	String() string
+}
+
+// Editor can edit config files (supports multi-model selection)
+type Editor interface {
+	// Paths returns the paths to the config files for the integration
+	Paths() []string
+	// Edit updates the config files for the integration with the given models
+	Edit(models []string) error
+	// Models returns the models currently configured for the integration
+	// TODO(parthsareen): add error return to Models()
+	Models() []string
+}
+
+// integrations is the registry of available integrations.
+var integrations = map[string]Runner{
+	"claude":   &Claude{},
+	"clawdbot": &Clawdbot{},
+	"codex":    &Codex{},
+	"droid":    &Droid{},
+	"opencode": &OpenCode{},
+}
+
+func selectIntegration() (string, error) {
+	if len(integrations) == 0 {
+		return "", fmt.Errorf("no integrations available")
+	}
+
+	names := slices.Sorted(maps.Keys(integrations))
+	var items []selectItem
+	for _, name := range names {
+		r := integrations[name]
+		description := r.String()
+		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
+			description = fmt.Sprintf("%s (%s)", r.String(), conn.Models[0])
+		}
+		items = append(items, selectItem{Name: name, Description: description})
+	}
+
+	return selectPrompt("Select integration:", items)
+}
+
+// selectModels lets the user select models for an integration
+func selectModels(ctx context.Context, name, current string) ([]string, error) {
+	r, ok := integrations[name]
+	if !ok {
+		return nil, fmt.Errorf("unknown integration: %s", name)
+	}
+
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return nil, err
+	}
+
+	models, err := client.List(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(models.Models) == 0 {
+		return nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
+	}
+
+	var items []selectItem
+	cloudModels := make(map[string]bool)
+	for _, m := range models.Models {
+		if m.RemoteModel != "" {
+			cloudModels[m.Name] = true
+		}
+		items = append(items, selectItem{Name: m.Name})
+	}
+
+	if len(items) == 0 {
+		return nil, fmt.Errorf("no local models available, run 'ollama pull <model>' first")
+	}
+
+	// Get previously configured models (saved config takes precedence)
+	var preChecked []string
+	if saved, err := loadIntegration(name); err == nil {
+		preChecked = saved.Models
+	} else if editor, ok := r.(Editor); ok {
+		preChecked = editor.Models()
+	}
+	checked := make(map[string]bool, len(preChecked))
+	for _, n := range preChecked {
+		checked[n] = true
+	}
+
+	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
+	for _, item := range items {
+		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
+			current = item.Name
+			break
+		}
+	}
+
+	// If current model is configured, move to front of preChecked
+	if checked[current] {
+		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
+	}
+
+	// Sort: checked first, then alphabetical
+	slices.SortFunc(items, func(a, b selectItem) int {
+		ac, bc := checked[a.Name], checked[b.Name]
+		if ac != bc {
+			if ac {
+				return -1
+			}
+			return 1
+		}
+		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
+	})
+
+	var selected []string
+	// only editors support multi-model selection
+	if _, ok := r.(Editor); ok {
+		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		model, err := selectPrompt(fmt.Sprintf("Select model for %s:", r), items)
+		if err != nil {
+			return nil, err
+		}
+		selected = []string{model}
+	}
+
+	// if any model in selected is a cloud model, ensure signed in
+	var selectedCloudModels []string
+	for _, m := range selected {
+		if cloudModels[m] {
+			selectedCloudModels = append(selectedCloudModels, m)
+		}
+	}
+	if len(selectedCloudModels) > 0 {
+		// ensure user is signed in
+		user, err := client.Whoami(ctx)
+		if err == nil && user != nil && user.Name != "" {
+			return selected, nil
+		}
+
+		var aErr api.AuthorizationError
+		if !errors.As(err, &aErr) || aErr.SigninURL == "" {
+			return nil, err
+		}
+
+		modelList := strings.Join(selectedCloudModels, ", ")
+		yes, err := confirmPrompt(fmt.Sprintf("sign in to use %s?", modelList))
+		if err != nil || !yes {
+			return nil, fmt.Errorf("%s requires sign in", modelList)
+		}
+
+		fmt.Fprintf(os.Stderr, "\nTo sign in, navigate to:\n    %s\n\n", aErr.SigninURL)
+
+		// TODO(parthsareen): extract into auth package for cmd
+		// Auto-open browser (best effort, fail silently)
+		switch runtime.GOOS {
+		case "darwin":
+			_ = exec.Command("open", aErr.SigninURL).Start()
+		case "linux":
+			_ = exec.Command("xdg-open", aErr.SigninURL).Start()
+		case "windows":
+			_ = exec.Command("rundll32", "url.dll,FileProtocolHandler", aErr.SigninURL).Start()
+		}
+
+		spinnerFrames := []string{"|", "/", "-", "\\"}
+		frame := 0
+
+		fmt.Fprintf(os.Stderr, "\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[0])
+
+		ticker := time.NewTicker(200 * time.Millisecond)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+				fmt.Fprintf(os.Stderr, "\r\033[K")
+				return nil, ctx.Err()
+			case <-ticker.C:
+				frame++
+				fmt.Fprintf(os.Stderr, "\r\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
+
+				// poll every 10th frame (~2 seconds)
+				if frame%10 == 0 {
+					u, err := client.Whoami(ctx)
+					if err == nil && u != nil && u.Name != "" {
+						fmt.Fprintf(os.Stderr, "\r\033[K\033[A\r\033[K\033[1msigned in:\033[0m %s\n", u.Name)
+						return selected, nil
+					}
+				}
+			}
+		}
+	}
+
+	return selected, nil
+}
+
+func runIntegration(name, modelName string) error {
+	r, ok := integrations[name]
+	if !ok {
+		return fmt.Errorf("unknown integration: %s", name)
+	}
+	fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
+	return r.Run(modelName)
+}
+
+// LaunchCmd returns the cobra command for launching integrations.
+func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) error) *cobra.Command {
+	var modelFlag string
+	var configFlag bool
+
+	cmd := &cobra.Command{
+		Use:   "launch [INTEGRATION]",
+		Short: "Launch an integration with Ollama",
+		Long: `Launch an integration configured with Ollama models.
+
+Supported integrations:
+  claude    Claude Code
+  clawdbot  Clawdbot
+  codex     Codex
+  droid     Droid
+  opencode  OpenCode
+
+Examples:
+  ollama launch
+  ollama launch claude
+  ollama launch claude --model <model>
+  ollama launch droid --config (does not auto-launch)`,
+		Args:    cobra.MaximumNArgs(1),
+		PreRunE: checkServerHeartbeat,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			var name string
+			if len(args) > 0 {
+				name = args[0]
+			} else {
+				var err error
+				name, err = selectIntegration()
+				if errors.Is(err, errCancelled) {
+					return nil
+				}
+				if err != nil {
+					return err
+				}
+			}
+
+			r, ok := integrations[strings.ToLower(name)]
+			if !ok {
+				return fmt.Errorf("unknown integration: %s", name)
+			}
+
+			// If launching without --model, use saved config if available
+			if !configFlag && modelFlag == "" {
+				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
+					return runIntegration(name, config.Models[0])
+				}
+			}
+
+			var models []string
+			if modelFlag != "" {
+				// When --model is specified, merge with existing models (new model becomes default)
+				models = []string{modelFlag}
+				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
+					for _, m := range existing.Models {
+						if m != modelFlag {
+							models = append(models, m)
+						}
+					}
+				}
+			} else {
+				var err error
+				models, err = selectModels(cmd.Context(), name, "")
+				if errors.Is(err, errCancelled) {
+					return nil
+				}
+				if err != nil {
+					return err
+				}
+			}
+
+			if editor, isEditor := r.(Editor); isEditor {
+				paths := editor.Paths()
+				if len(paths) > 0 {
+					fmt.Fprintf(os.Stderr, "This will modify your %s configuration:\n", r)
+					for _, p := range paths {
+						fmt.Fprintf(os.Stderr, "  %s\n", p)
+					}
+					fmt.Fprintf(os.Stderr, "Backups will be saved to %s/\n\n", backupDir())
+
+					if ok, _ := confirmPrompt("Proceed?"); !ok {
+						return nil
+					}
+				}
+			}
+
+			if err := saveIntegration(name, models); err != nil {
+				return fmt.Errorf("failed to save: %w", err)
+			}
+
+			if editor, isEditor := r.(Editor); isEditor {
+				if err := editor.Edit(models); err != nil {
+					return fmt.Errorf("setup failed: %w", err)
+				}
+			}
+
+			if _, isEditor := r.(Editor); isEditor {
+				if len(models) == 1 {
+					fmt.Fprintf(os.Stderr, "Added %s to %s\n", models[0], r)
+				} else {
+					fmt.Fprintf(os.Stderr, "Added %d models to %s (default: %s)\n", len(models), r, models[0])
+				}
+			}
+
+			if configFlag {
+				if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
+					return runIntegration(name, models[0])
+				}
+				fmt.Fprintf(os.Stderr, "Run 'ollama launch %s' to start with %s\n", strings.ToLower(name), models[0])
+				return nil
+			}
+
+			return runIntegration(name, models[0])
+		},
+	}
+
+	cmd.Flags().StringVar(&modelFlag, "model", "", "Model to use")
+	cmd.Flags().BoolVar(&configFlag, "config", false, "Configure without launching")
+	return cmd
+}
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -0,0 +1,188 @@
+package config
+
+import (
+	"slices"
+	"strings"
+	"testing"
+
+	"github.com/spf13/cobra"
+)
+
+func TestIntegrationLookup(t *testing.T) {
+	tests := []struct {
+		name      string
+		input     string
+		wantFound bool
+		wantName  string
+	}{
+		{"claude lowercase", "claude", true, "Claude Code"},
+		{"claude uppercase", "CLAUDE", true, "Claude Code"},
+		{"claude mixed case", "Claude", true, "Claude Code"},
+		{"codex", "codex", true, "Codex"},
+		{"droid", "droid", true, "Droid"},
+		{"opencode", "opencode", true, "OpenCode"},
+		{"unknown integration", "unknown", false, ""},
+		{"empty string", "", false, ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			r, found := integrations[strings.ToLower(tt.input)]
+			if found != tt.wantFound {
+				t.Errorf("integrations[%q] found = %v, want %v", tt.input, found, tt.wantFound)
+			}
+			if found && r.String() != tt.wantName {
+				t.Errorf("integrations[%q].String() = %q, want %q", tt.input, r.String(), tt.wantName)
+			}
+		})
+	}
+}
+
+func TestIntegrationRegistry(t *testing.T) {
+	expectedIntegrations := []string{"claude", "codex", "droid", "opencode"}
+
+	for _, name := range expectedIntegrations {
+		t.Run(name, func(t *testing.T) {
+			r, ok := integrations[name]
+			if !ok {
+				t.Fatalf("integration %q not found in registry", name)
+			}
+			if r.String() == "" {
+				t.Error("integration.String() should not be empty")
+			}
+		})
+	}
+}
+
+func TestHasLocalModel(t *testing.T) {
+	tests := []struct {
+		name   string
+		models []string
+		want   bool
+	}{
+		{"empty list", []string{}, false},
+		{"single local model", []string{"llama3.2"}, true},
+		{"single cloud model", []string{"cloud-model"}, false},
+		{"mixed models", []string{"cloud-model", "llama3.2"}, true},
+		{"multiple local models", []string{"llama3.2", "qwen2.5"}, true},
+		{"multiple cloud models", []string{"cloud-a", "cloud-b"}, false},
+		{"local model first", []string{"llama3.2", "cloud-model"}, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := slices.ContainsFunc(tt.models, func(m string) bool {
+				return !strings.Contains(m, "cloud")
+			})
+			if got != tt.want {
+				t.Errorf("hasLocalModel(%v) = %v, want %v", tt.models, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestLaunchCmd(t *testing.T) {
+	// Mock checkServerHeartbeat that always succeeds
+	mockCheck := func(cmd *cobra.Command, args []string) error {
+		return nil
+	}
+
+	cmd := LaunchCmd(mockCheck)
+
+	t.Run("command structure", func(t *testing.T) {
+		if cmd.Use != "launch [INTEGRATION]" {
+			t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION]")
+		}
+		if cmd.Short == "" {
+			t.Error("Short description should not be empty")
+		}
+		if cmd.Long == "" {
+			t.Error("Long description should not be empty")
+		}
+	})
+
+	t.Run("flags exist", func(t *testing.T) {
+		modelFlag := cmd.Flags().Lookup("model")
+		if modelFlag == nil {
+			t.Error("--model flag should exist")
+		}
+
+		configFlag := cmd.Flags().Lookup("config")
+		if configFlag == nil {
+			t.Error("--config flag should exist")
+		}
+	})
+
+	t.Run("PreRunE is set", func(t *testing.T) {
+		if cmd.PreRunE == nil {
+			t.Error("PreRunE should be set to checkServerHeartbeat")
+		}
+	})
+}
+
+func TestRunIntegration_UnknownIntegration(t *testing.T) {
+	err := runIntegration("unknown-integration", "model")
+	if err == nil {
+		t.Error("expected error for unknown integration, got nil")
+	}
+	if !strings.Contains(err.Error(), "unknown integration") {
+		t.Errorf("error should mention 'unknown integration', got: %v", err)
+	}
+}
+
+func TestHasLocalModel_DocumentsHeuristic(t *testing.T) {
+	tests := []struct {
+		name   string
+		models []string
+		want   bool
+		reason string
+	}{
+		{"empty list", []string{}, false, "empty list has no local models"},
+		{"contains-cloud-substring", []string{"deepseek-r1:cloud"}, false, "model with 'cloud' substring is considered cloud"},
+		{"cloud-in-name", []string{"my-cloud-model"}, false, "'cloud' anywhere in name = cloud model"},
+		{"cloudless", []string{"cloudless-model"}, false, "'cloudless' still contains 'cloud'"},
+		{"local-model", []string{"llama3.2"}, true, "no 'cloud' = local"},
+		{"mixed", []string{"cloud-model", "llama3.2"}, true, "one local model = hasLocalModel true"},
+		{"all-cloud", []string{"cloud-a", "cloud-b"}, false, "all contain 'cloud'"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := slices.ContainsFunc(tt.models, func(m string) bool {
+				return !strings.Contains(m, "cloud")
+			})
+			if got != tt.want {
+				t.Errorf("hasLocalModel(%v) = %v, want %v (%s)", tt.models, got, tt.want, tt.reason)
+			}
+		})
+	}
+}
+
+func TestLaunchCmd_NilHeartbeat(t *testing.T) {
+	// This should not panic - cmd creation should work even with nil
+	cmd := LaunchCmd(nil)
+	if cmd == nil {
+		t.Fatal("LaunchCmd returned nil")
+	}
+
+	// PreRunE should be nil when passed nil
+	if cmd.PreRunE != nil {
+		t.Log("Note: PreRunE is set even when nil is passed (acceptable)")
+	}
+}
+
+func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
+	for name, r := range integrations {
+		t.Run(name, func(t *testing.T) {
+			// Test String() doesn't panic and returns non-empty
+			displayName := r.String()
+			if displayName == "" {
+				t.Error("String() should not return empty")
+			}
+
+			// Test Run() exists (we can't call it without actually running the command)
+			// Just verify the method is available
+			var _ func(string) error = r.Run
+		})
+	}
+}
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -0,0 +1,226 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"maps"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"slices"
+	"strings"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+// OpenCode implements Runner and Editor for OpenCode integration
+type OpenCode struct{}
+
+func (o *OpenCode) String() string { return "OpenCode" }
+
+func (o *OpenCode) Run(model string) error {
+	if _, err := exec.LookPath("opencode"); err != nil {
+		return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
+	}
+
+	// Call Edit() to ensure config is up-to-date before launch
+	models := []string{model}
+	if config, err := loadIntegration("opencode"); err == nil && len(config.Models) > 0 {
+		models = config.Models
+	}
+	if err := o.Edit(models); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	cmd := exec.Command("opencode")
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func (o *OpenCode) Paths() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	var paths []string
+	p := filepath.Join(home, ".config", "opencode", "opencode.json")
+	if _, err := os.Stat(p); err == nil {
+		paths = append(paths, p)
+	}
+	sp := filepath.Join(home, ".local", "state", "opencode", "model.json")
+	if _, err := os.Stat(sp); err == nil {
+		paths = append(paths, sp)
+	}
+	return paths
+}
+
+func (o *OpenCode) Edit(modelList []string) error {
+	if len(modelList) == 0 {
+		return nil
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	configPath := filepath.Join(home, ".config", "opencode", "opencode.json")
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return err
+	}
+
+	config := make(map[string]any)
+	if data, err := os.ReadFile(configPath); err == nil {
+		_ = json.Unmarshal(data, &config) // Ignore parse errors; treat missing/corrupt files as empty
+	}
+
+	config["$schema"] = "https://opencode.ai/config.json"
+
+	provider, ok := config["provider"].(map[string]any)
+	if !ok {
+		provider = make(map[string]any)
+	}
+
+	ollama, ok := provider["ollama"].(map[string]any)
+	if !ok {
+		ollama = map[string]any{
+			"npm":  "@ai-sdk/openai-compatible",
+			"name": "Ollama (local)",
+			"options": map[string]any{
+				"baseURL": envconfig.Host().String() + "/v1",
+			},
+		}
+	}
+
+	models, ok := ollama["models"].(map[string]any)
+	if !ok {
+		models = make(map[string]any)
+	}
+
+	selectedSet := make(map[string]bool)
+	for _, m := range modelList {
+		selectedSet[m] = true
+	}
+
+	for name, cfg := range models {
+		if cfgMap, ok := cfg.(map[string]any); ok {
+			if isOllamaModel(cfgMap) && !selectedSet[name] {
+				delete(models, name)
+			}
+		}
+	}
+
+	for _, model := range modelList {
+		if existing, ok := models[model].(map[string]any); ok {
+			// migrate existing models without _launch marker
+			if isOllamaModel(existing) {
+				existing["_launch"] = true
+				if name, ok := existing["name"].(string); ok {
+					existing["name"] = strings.TrimSuffix(name, " [Ollama]")
+				}
+			}
+			continue
+		}
+		models[model] = map[string]any{
+			"name":    model,
+			"_launch": true,
+		}
+	}
+
+	ollama["models"] = models
+	provider["ollama"] = ollama
+	config["provider"] = provider
+
+	configData, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return err
+	}
+	if err := writeWithBackup(configPath, configData); err != nil {
+		return err
+	}
+
+	statePath := filepath.Join(home, ".local", "state", "opencode", "model.json")
+	if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
+		return err
+	}
+
+	state := map[string]any{
+		"recent":   []any{},
+		"favorite": []any{},
+		"variant":  map[string]any{},
+	}
+	if data, err := os.ReadFile(statePath); err == nil {
+		_ = json.Unmarshal(data, &state) // Ignore parse errors; use defaults
+	}
+
+	recent, _ := state["recent"].([]any)
+
+	modelSet := make(map[string]bool)
+	for _, m := range modelList {
+		modelSet[m] = true
+	}
+
+	// Filter out existing Ollama models we're about to re-add
+	newRecent := slices.DeleteFunc(slices.Clone(recent), func(entry any) bool {
+		e, ok := entry.(map[string]any)
+		if !ok || e["providerID"] != "ollama" {
+			return false
+		}
+		modelID, _ := e["modelID"].(string)
+		return modelSet[modelID]
+	})
+
+	// Prepend models in reverse order so first model ends up first
+	for _, model := range slices.Backward(modelList) {
+		newRecent = slices.Insert(newRecent, 0, any(map[string]any{
+			"providerID": "ollama",
+			"modelID":    model,
+		}))
+	}
+
+	const maxRecentModels = 10
+	newRecent = newRecent[:min(len(newRecent), maxRecentModels)]
+
+	state["recent"] = newRecent
+
+	stateData, err := json.MarshalIndent(state, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeWithBackup(statePath, stateData)
+}
+
+func (o *OpenCode) Models() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+	config, err := readJSONFile(filepath.Join(home, ".config", "opencode", "opencode.json"))
+	if err != nil {
+		return nil
+	}
+	provider, _ := config["provider"].(map[string]any)
+	ollama, _ := provider["ollama"].(map[string]any)
+	models, _ := ollama["models"].(map[string]any)
+	if len(models) == 0 {
+		return nil
+	}
+	keys := slices.Collect(maps.Keys(models))
+	slices.Sort(keys)
+	return keys
+}
+
+// isOllamaModel reports whether a model config entry is managed by us
+func isOllamaModel(cfg map[string]any) bool {
+	if v, ok := cfg["_launch"].(bool); ok && v {
+		return true
+	}
+	// previously used [Ollama] as a suffix for the model managed by ollama launch
+	if name, ok := cfg["name"].(string); ok {
+		return strings.HasSuffix(name, "[Ollama]")
+	}
+	return false
+}
--- a/cmd/config/opencode_test.go
+++ b/cmd/config/opencode_test.go
@@ -0,0 +1,507 @@
+package config
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestOpenCodeIntegration(t *testing.T) {
+	o := &OpenCode{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := o.String(); got != "OpenCode" {
+			t.Errorf("String() = %q, want %q", got, "OpenCode")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = o
+	})
+
+	t.Run("implements Editor", func(t *testing.T) {
+		var _ Editor = o
+	})
+}
+
+func TestOpenCodeEdit(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".config", "opencode")
+	configPath := filepath.Join(configDir, "opencode.json")
+	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
+	statePath := filepath.Join(stateDir, "model.json")
+
+	cleanup := func() {
+		os.RemoveAll(configDir)
+		os.RemoveAll(stateDir)
+	}
+
+	t.Run("fresh install", func(t *testing.T) {
+		cleanup()
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
+	})
+
+	t.Run("preserve other providers", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"provider":{"anthropic":{"apiKey":"xxx"}}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		provider := cfg["provider"].(map[string]any)
+		if provider["anthropic"] == nil {
+			t.Error("anthropic provider was removed")
+		}
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+	})
+
+	t.Run("preserve other models", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"mistral":{"name":"Mistral"}}}}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		assertOpenCodeModelExists(t, configPath, "mistral")
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+	})
+
+	t.Run("update existing model", func(t *testing.T) {
+		cleanup()
+		o.Edit([]string{"llama3.2"})
+		o.Edit([]string{"llama3.2"})
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+	})
+
+	t.Run("preserve top-level keys", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"theme":"dark","keybindings":{}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		if cfg["theme"] != "dark" {
+			t.Error("theme was removed")
+		}
+		if cfg["keybindings"] == nil {
+			t.Error("keybindings was removed")
+		}
+	})
+
+	t.Run("model state - insert at index 0", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(stateDir, 0o755)
+		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
+		assertOpenCodeRecentModel(t, statePath, 1, "anthropic", "claude")
+	})
+
+	t.Run("model state - preserve favorites and variants", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(stateDir, 0o755)
+		os.WriteFile(statePath, []byte(`{"recent":[],"favorite":[{"providerID":"x","modelID":"y"}],"variant":{"a":"b"}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(statePath)
+		var state map[string]any
+		json.Unmarshal(data, &state)
+		if len(state["favorite"].([]any)) != 1 {
+			t.Error("favorite was modified")
+		}
+		if state["variant"].(map[string]any)["a"] != "b" {
+			t.Error("variant was modified")
+		}
+	})
+
+	t.Run("model state - deduplicate on re-add", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(stateDir, 0o755)
+		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"ollama","modelID":"llama3.2"},{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+		data, _ := os.ReadFile(statePath)
+		var state map[string]any
+		json.Unmarshal(data, &state)
+		recent := state["recent"].([]any)
+		if len(recent) != 2 {
+			t.Errorf("expected 2 recent entries, got %d", len(recent))
+		}
+		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
+	})
+
+	t.Run("remove model", func(t *testing.T) {
+		cleanup()
+		// First add two models
+		o.Edit([]string{"llama3.2", "mistral"})
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+		assertOpenCodeModelExists(t, configPath, "mistral")
+
+		// Then remove one by only selecting the other
+		o.Edit([]string{"llama3.2"})
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+		assertOpenCodeModelNotExists(t, configPath, "mistral")
+	})
+
+	t.Run("preserve user customizations on managed models", func(t *testing.T) {
+		cleanup()
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		// Add custom fields to the model entry (simulating user edits)
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		provider := cfg["provider"].(map[string]any)
+		ollama := provider["ollama"].(map[string]any)
+		models := ollama["models"].(map[string]any)
+		entry := models["llama3.2"].(map[string]any)
+		entry["_myPref"] = "custom-value"
+		entry["_myNum"] = 42
+		configData, _ := json.MarshalIndent(cfg, "", "  ")
+		os.WriteFile(configPath, configData, 0o644)
+
+		// Re-run Edit — should preserve custom fields
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		data, _ = os.ReadFile(configPath)
+		json.Unmarshal(data, &cfg)
+		provider = cfg["provider"].(map[string]any)
+		ollama = provider["ollama"].(map[string]any)
+		models = ollama["models"].(map[string]any)
+		entry = models["llama3.2"].(map[string]any)
+
+		if entry["_myPref"] != "custom-value" {
+			t.Errorf("_myPref was lost: got %v", entry["_myPref"])
+		}
+		if entry["_myNum"] != float64(42) {
+			t.Errorf("_myNum was lost: got %v", entry["_myNum"])
+		}
+		if v, ok := entry["_launch"].(bool); !ok || !v {
+			t.Errorf("_launch marker missing or false: got %v", entry["_launch"])
+		}
+	})
+
+	t.Run("migrate legacy [Ollama] suffix entries", func(t *testing.T) {
+		cleanup()
+		// Write a config with a legacy entry (has [Ollama] suffix but no _launch marker)
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"llama3.2":{"name":"llama3.2 [Ollama]"}}}}}`), 0o644)
+
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		provider := cfg["provider"].(map[string]any)
+		ollama := provider["ollama"].(map[string]any)
+		models := ollama["models"].(map[string]any)
+		entry := models["llama3.2"].(map[string]any)
+
+		// _launch marker should be added
+		if v, ok := entry["_launch"].(bool); !ok || !v {
+			t.Errorf("_launch marker not added during migration: got %v", entry["_launch"])
+		}
+		// [Ollama] suffix should be stripped
+		if name, ok := entry["name"].(string); !ok || name != "llama3.2" {
+			t.Errorf("name suffix not stripped: got %q", entry["name"])
+		}
+	})
+
+	t.Run("remove model preserves non-ollama models", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+		// Add a non-Ollama model manually
+		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"external":{"name":"External Model"}}}}}`), 0o644)
+
+		o.Edit([]string{"llama3.2"})
+		assertOpenCodeModelExists(t, configPath, "llama3.2")
+		assertOpenCodeModelExists(t, configPath, "external") // Should be preserved
+	})
+}
+
+func assertOpenCodeModelExists(t *testing.T, path, model string) {
+	t.Helper()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatal(err)
+	}
+	provider, ok := cfg["provider"].(map[string]any)
+	if !ok {
+		t.Fatal("provider not found")
+	}
+	ollama, ok := provider["ollama"].(map[string]any)
+	if !ok {
+		t.Fatal("ollama provider not found")
+	}
+	models, ok := ollama["models"].(map[string]any)
+	if !ok {
+		t.Fatal("models not found")
+	}
+	if models[model] == nil {
+		t.Errorf("model %s not found", model)
+	}
+}
+
+func assertOpenCodeModelNotExists(t *testing.T, path, model string) {
+	t.Helper()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatal(err)
+	}
+	provider, ok := cfg["provider"].(map[string]any)
+	if !ok {
+		return // No provider means no model
+	}
+	ollama, ok := provider["ollama"].(map[string]any)
+	if !ok {
+		return // No ollama means no model
+	}
+	models, ok := ollama["models"].(map[string]any)
+	if !ok {
+		return // No models means no model
+	}
+	if models[model] != nil {
+		t.Errorf("model %s should not exist but was found", model)
+	}
+}
+
+func assertOpenCodeRecentModel(t *testing.T, path string, index int, providerID, modelID string) {
+	t.Helper()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var state map[string]any
+	if err := json.Unmarshal(data, &state); err != nil {
+		t.Fatal(err)
+	}
+	recent, ok := state["recent"].([]any)
+	if !ok {
+		t.Fatal("recent not found")
+	}
+	if index >= len(recent) {
+		t.Fatalf("index %d out of range (len=%d)", index, len(recent))
+	}
+	entry, ok := recent[index].(map[string]any)
+	if !ok {
+		t.Fatal("entry is not a map")
+	}
+	if entry["providerID"] != providerID {
+		t.Errorf("expected providerID %s, got %s", providerID, entry["providerID"])
+	}
+	if entry["modelID"] != modelID {
+		t.Errorf("expected modelID %s, got %s", modelID, entry["modelID"])
+	}
+}
+
+// Edge case tests for opencode.go
+
+func TestOpenCodeEdit_CorruptedConfigJSON(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".config", "opencode")
+	configPath := filepath.Join(configDir, "opencode.json")
+
+	os.MkdirAll(configDir, 0o755)
+	os.WriteFile(configPath, []byte(`{corrupted json content`), 0o644)
+
+	// Should not panic - corrupted JSON should be treated as empty
+	err := o.Edit([]string{"llama3.2"})
+	if err != nil {
+		t.Fatalf("Edit failed with corrupted config: %v", err)
+	}
+
+	// Verify valid JSON was created
+	data, _ := os.ReadFile(configPath)
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Errorf("resulting config is not valid JSON: %v", err)
+	}
+}
+
+func TestOpenCodeEdit_CorruptedStateJSON(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
+	statePath := filepath.Join(stateDir, "model.json")
+
+	os.MkdirAll(stateDir, 0o755)
+	os.WriteFile(statePath, []byte(`{corrupted state`), 0o644)
+
+	err := o.Edit([]string{"llama3.2"})
+	if err != nil {
+		t.Fatalf("Edit failed with corrupted state: %v", err)
+	}
+
+	// Verify valid state was created
+	data, _ := os.ReadFile(statePath)
+	var state map[string]any
+	if err := json.Unmarshal(data, &state); err != nil {
+		t.Errorf("resulting state is not valid JSON: %v", err)
+	}
+}
+
+func TestOpenCodeEdit_WrongTypeProvider(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".config", "opencode")
+	configPath := filepath.Join(configDir, "opencode.json")
+
+	os.MkdirAll(configDir, 0o755)
+	os.WriteFile(configPath, []byte(`{"provider": "not a map"}`), 0o644)
+
+	err := o.Edit([]string{"llama3.2"})
+	if err != nil {
+		t.Fatalf("Edit with wrong type provider failed: %v", err)
+	}
+
+	// Verify provider is now correct type
+	data, _ := os.ReadFile(configPath)
+	var cfg map[string]any
+	json.Unmarshal(data, &cfg)
+
+	provider, ok := cfg["provider"].(map[string]any)
+	if !ok {
+		t.Fatalf("provider should be map after setup, got %T", cfg["provider"])
+	}
+	if provider["ollama"] == nil {
+		t.Error("ollama provider should be created")
+	}
+}
+
+func TestOpenCodeEdit_WrongTypeRecent(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
+	statePath := filepath.Join(stateDir, "model.json")
+
+	os.MkdirAll(stateDir, 0o755)
+	os.WriteFile(statePath, []byte(`{"recent": "not an array", "favorite": [], "variant": {}}`), 0o644)
+
+	err := o.Edit([]string{"llama3.2"})
+	if err != nil {
+		t.Fatalf("Edit with wrong type recent failed: %v", err)
+	}
+
+	// The function should handle this gracefully
+	data, _ := os.ReadFile(statePath)
+	var state map[string]any
+	json.Unmarshal(data, &state)
+
+	// recent should be properly set after setup
+	recent, ok := state["recent"].([]any)
+	if !ok {
+		t.Logf("Note: recent type after setup is %T (documenting behavior)", state["recent"])
+	} else if len(recent) == 0 {
+		t.Logf("Note: recent is empty (documenting behavior)")
+	}
+}
+
+func TestOpenCodeEdit_EmptyModels(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".config", "opencode")
+	configPath := filepath.Join(configDir, "opencode.json")
+
+	os.MkdirAll(configDir, 0o755)
+	originalContent := `{"provider":{"ollama":{"models":{"existing":{}}}}}`
+	os.WriteFile(configPath, []byte(originalContent), 0o644)
+
+	// Empty models should be no-op
+	err := o.Edit([]string{})
+	if err != nil {
+		t.Fatalf("Edit with empty models failed: %v", err)
+	}
+
+	// Original content should be preserved (file not modified)
+	data, _ := os.ReadFile(configPath)
+	if string(data) != originalContent {
+		t.Errorf("empty models should not modify file, but content changed")
+	}
+}
+
+func TestOpenCodeEdit_SpecialCharsInModelName(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	// Model name with special characters (though unusual)
+	specialModel := `model-with-"quotes"`
+
+	err := o.Edit([]string{specialModel})
+	if err != nil {
+		t.Fatalf("Edit with special chars failed: %v", err)
+	}
+
+	// Verify it was stored correctly
+	configDir := filepath.Join(tmpDir, ".config", "opencode")
+	configPath := filepath.Join(configDir, "opencode.json")
+	data, _ := os.ReadFile(configPath)
+
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatalf("resulting config is invalid JSON: %v", err)
+	}
+
+	// Model should be accessible
+	provider, _ := cfg["provider"].(map[string]any)
+	ollama, _ := provider["ollama"].(map[string]any)
+	models, _ := ollama["models"].(map[string]any)
+
+	if models[specialModel] == nil {
+		t.Errorf("model with special chars not found in config")
+	}
+}
+
+func TestOpenCodeModels_NoConfig(t *testing.T) {
+	o := &OpenCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	models := o.Models()
+	if len(models) > 0 {
+		t.Errorf("expected nil/empty for missing config, got %v", models)
+	}
+}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -0,0 +1,499 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"golang.org/x/term"
+)
+
+// ANSI escape sequences for terminal formatting.
+const (
+	ansiHideCursor = "\033[?25l"
+	ansiShowCursor = "\033[?25h"
+	ansiBold       = "\033[1m"
+	ansiReset      = "\033[0m"
+	ansiGray       = "\033[37m"
+	ansiClearDown  = "\033[J"
+)
+
+const maxDisplayedItems = 10
+
+var errCancelled = errors.New("cancelled")
+
+type selectItem struct {
+	Name        string
+	Description string
+}
+
+type inputEvent int
+
+const (
+	eventNone inputEvent = iota
+	eventEnter
+	eventEscape
+	eventUp
+	eventDown
+	eventTab
+	eventBackspace
+	eventChar
+)
+
+type selectState struct {
+	items        []selectItem
+	filter       string
+	selected     int
+	scrollOffset int
+}
+
+func newSelectState(items []selectItem) *selectState {
+	return &selectState{items: items}
+}
+
+func (s *selectState) filtered() []selectItem {
+	return filterItems(s.items, s.filter)
+}
+
+func (s *selectState) handleInput(event inputEvent, char byte) (done bool, result string, err error) {
+	filtered := s.filtered()
+
+	switch event {
+	case eventEnter:
+		if len(filtered) > 0 && s.selected < len(filtered) {
+			return true, filtered[s.selected].Name, nil
+		}
+	case eventEscape:
+		return true, "", errCancelled
+	case eventBackspace:
+		if len(s.filter) > 0 {
+			s.filter = s.filter[:len(s.filter)-1]
+			s.selected = 0
+			s.scrollOffset = 0
+		}
+	case eventUp:
+		if s.selected > 0 {
+			s.selected--
+			if s.selected < s.scrollOffset {
+				s.scrollOffset = s.selected
+			}
+		}
+	case eventDown:
+		if s.selected < len(filtered)-1 {
+			s.selected++
+			if s.selected >= s.scrollOffset+maxDisplayedItems {
+				s.scrollOffset = s.selected - maxDisplayedItems + 1
+			}
+		}
+	case eventChar:
+		s.filter += string(char)
+		s.selected = 0
+		s.scrollOffset = 0
+	}
+
+	return false, "", nil
+}
+
+type multiSelectState struct {
+	items         []selectItem
+	itemIndex     map[string]int
+	filter        string
+	highlighted   int
+	scrollOffset  int
+	checked       map[int]bool
+	checkOrder    []int
+	focusOnButton bool
+}
+
+func newMultiSelectState(items []selectItem, preChecked []string) *multiSelectState {
+	s := &multiSelectState{
+		items:     items,
+		itemIndex: make(map[string]int, len(items)),
+		checked:   make(map[int]bool),
+	}
+
+	for i, item := range items {
+		s.itemIndex[item.Name] = i
+	}
+
+	for _, name := range preChecked {
+		if idx, ok := s.itemIndex[name]; ok {
+			s.checked[idx] = true
+			s.checkOrder = append(s.checkOrder, idx)
+		}
+	}
+
+	return s
+}
+
+func (s *multiSelectState) filtered() []selectItem {
+	return filterItems(s.items, s.filter)
+}
+
+func (s *multiSelectState) toggleItem() {
+	filtered := s.filtered()
+	if len(filtered) == 0 || s.highlighted >= len(filtered) {
+		return
+	}
+
+	item := filtered[s.highlighted]
+	origIdx := s.itemIndex[item.Name]
+
+	if s.checked[origIdx] {
+		delete(s.checked, origIdx)
+		for i, idx := range s.checkOrder {
+			if idx == origIdx {
+				s.checkOrder = append(s.checkOrder[:i], s.checkOrder[i+1:]...)
+				break
+			}
+		}
+	} else {
+		s.checked[origIdx] = true
+		s.checkOrder = append(s.checkOrder, origIdx)
+	}
+}
+
+func (s *multiSelectState) handleInput(event inputEvent, char byte) (done bool, result []string, err error) {
+	filtered := s.filtered()
+
+	switch event {
+	case eventEnter:
+		if s.focusOnButton && len(s.checkOrder) > 0 {
+			var res []string
+			for _, idx := range s.checkOrder {
+				res = append(res, s.items[idx].Name)
+			}
+			return true, res, nil
+		} else if !s.focusOnButton {
+			s.toggleItem()
+		}
+	case eventTab:
+		if len(s.checkOrder) > 0 {
+			s.focusOnButton = !s.focusOnButton
+		}
+	case eventEscape:
+		return true, nil, errCancelled
+	case eventBackspace:
+		if len(s.filter) > 0 {
+			s.filter = s.filter[:len(s.filter)-1]
+			s.highlighted = 0
+			s.scrollOffset = 0
+			s.focusOnButton = false
+		}
+	case eventUp:
+		if s.focusOnButton {
+			s.focusOnButton = false
+		} else if s.highlighted > 0 {
+			s.highlighted--
+			if s.highlighted < s.scrollOffset {
+				s.scrollOffset = s.highlighted
+			}
+		}
+	case eventDown:
+		if s.focusOnButton {
+			s.focusOnButton = false
+		} else if s.highlighted < len(filtered)-1 {
+			s.highlighted++
+			if s.highlighted >= s.scrollOffset+maxDisplayedItems {
+				s.scrollOffset = s.highlighted - maxDisplayedItems + 1
+			}
+		}
+	case eventChar:
+		s.filter += string(char)
+		s.highlighted = 0
+		s.scrollOffset = 0
+		s.focusOnButton = false
+	}
+
+	return false, nil, nil
+}
+
+func (s *multiSelectState) selectedCount() int {
+	return len(s.checkOrder)
+}
+
+// Terminal I/O handling
+
+type terminalState struct {
+	fd       int
+	oldState *term.State
+}
+
+func enterRawMode() (*terminalState, error) {
+	fd := int(os.Stdin.Fd())
+	oldState, err := term.MakeRaw(fd)
+	if err != nil {
+		return nil, err
+	}
+	fmt.Fprint(os.Stderr, ansiHideCursor)
+	return &terminalState{fd: fd, oldState: oldState}, nil
+}
+
+func (t *terminalState) restore() {
+	fmt.Fprint(os.Stderr, ansiShowCursor)
+	term.Restore(t.fd, t.oldState)
+}
+
+func clearLines(n int) {
+	if n > 0 {
+		fmt.Fprintf(os.Stderr, "\033[%dA", n)
+		fmt.Fprint(os.Stderr, ansiClearDown)
+	}
+}
+
+func parseInput(r io.Reader) (inputEvent, byte, error) {
+	buf := make([]byte, 3)
+	n, err := r.Read(buf)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	switch {
+	case n == 1 && buf[0] == 13:
+		return eventEnter, 0, nil
+	case n == 1 && (buf[0] == 3 || buf[0] == 27):
+		return eventEscape, 0, nil
+	case n == 1 && buf[0] == 9:
+		return eventTab, 0, nil
+	case n == 1 && buf[0] == 127:
+		return eventBackspace, 0, nil
+	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 65:
+		return eventUp, 0, nil
+	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 66:
+		return eventDown, 0, nil
+	case n == 1 && buf[0] >= 32 && buf[0] < 127:
+		return eventChar, buf[0], nil
+	}
+
+	return eventNone, 0, nil
+}
+
+// Rendering
+
+func renderSelect(w io.Writer, prompt string, s *selectState) int {
+	filtered := s.filtered()
+
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	lineCount := 1
+
+	if len(filtered) == 0 {
+		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
+		lineCount++
+	} else {
+		displayCount := min(len(filtered), maxDisplayedItems)
+
+		for i := range displayCount {
+			idx := s.scrollOffset + i
+			if idx >= len(filtered) {
+				break
+			}
+			item := filtered[idx]
+			prefix := "    "
+			if idx == s.selected {
+				prefix = "  " + ansiBold + "> "
+			}
+			if item.Description != "" {
+				fmt.Fprintf(w, "%s%s%s %s- %s%s\r\n", prefix, item.Name, ansiReset, ansiGray, item.Description, ansiReset)
+			} else {
+				fmt.Fprintf(w, "%s%s%s\r\n", prefix, item.Name, ansiReset)
+			}
+			lineCount++
+		}
+
+		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
+			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
+			lineCount++
+		}
+	}
+
+	return lineCount
+}
+
+func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
+	filtered := s.filtered()
+
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	lineCount := 1
+
+	if len(filtered) == 0 {
+		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
+		lineCount++
+	} else {
+		displayCount := min(len(filtered), maxDisplayedItems)
+
+		for i := range displayCount {
+			idx := s.scrollOffset + i
+			if idx >= len(filtered) {
+				break
+			}
+			item := filtered[idx]
+			origIdx := s.itemIndex[item.Name]
+
+			checkbox := "[ ]"
+			if s.checked[origIdx] {
+				checkbox = "[x]"
+			}
+
+			prefix := "  "
+			suffix := ""
+			if idx == s.highlighted && !s.focusOnButton {
+				prefix = "> "
+			}
+			if len(s.checkOrder) > 0 && s.checkOrder[0] == origIdx {
+				suffix = " " + ansiGray + "(default)" + ansiReset
+			}
+
+			if idx == s.highlighted && !s.focusOnButton {
+				fmt.Fprintf(w, "  %s%s %s %s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, suffix)
+			} else {
+				fmt.Fprintf(w, "  %s %s %s%s\r\n", prefix, checkbox, item.Name, suffix)
+			}
+			lineCount++
+		}
+
+		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
+			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
+			lineCount++
+		}
+	}
+
+	fmt.Fprintf(w, "\r\n")
+	lineCount++
+	count := s.selectedCount()
+	switch {
+	case count == 0:
+		fmt.Fprintf(w, "  %sSelect at least one model.%s\r\n", ansiGray, ansiReset)
+	case s.focusOnButton:
+		fmt.Fprintf(w, "  %s> [ Continue ]%s %s(%d selected)%s\r\n", ansiBold, ansiReset, ansiGray, count, ansiReset)
+	default:
+		fmt.Fprintf(w, "    %s[ Continue ] (%d selected) - press Tab%s\r\n", ansiGray, count, ansiReset)
+	}
+	lineCount++
+
+	return lineCount
+}
+
+// selectPrompt prompts the user to select a single item from a list.
+func selectPrompt(prompt string, items []selectItem) (string, error) {
+	if len(items) == 0 {
+		return "", fmt.Errorf("no items to select from")
+	}
+
+	ts, err := enterRawMode()
+	if err != nil {
+		return "", err
+	}
+	defer ts.restore()
+
+	state := newSelectState(items)
+	var lastLineCount int
+
+	render := func() {
+		clearLines(lastLineCount)
+		lastLineCount = renderSelect(os.Stderr, prompt, state)
+	}
+
+	render()
+
+	for {
+		event, char, err := parseInput(os.Stdin)
+		if err != nil {
+			return "", err
+		}
+
+		done, result, err := state.handleInput(event, char)
+		if done {
+			clearLines(lastLineCount)
+			if err != nil {
+				return "", err
+			}
+			return result, nil
+		}
+
+		render()
+	}
+}
+
+// multiSelectPrompt prompts the user to select multiple items from a list.
+func multiSelectPrompt(prompt string, items []selectItem, preChecked []string) ([]string, error) {
+	if len(items) == 0 {
+		return nil, fmt.Errorf("no items to select from")
+	}
+
+	ts, err := enterRawMode()
+	if err != nil {
+		return nil, err
+	}
+	defer ts.restore()
+
+	state := newMultiSelectState(items, preChecked)
+	var lastLineCount int
+
+	render := func() {
+		clearLines(lastLineCount)
+		lastLineCount = renderMultiSelect(os.Stderr, prompt, state)
+	}
+
+	render()
+
+	for {
+		event, char, err := parseInput(os.Stdin)
+		if err != nil {
+			return nil, err
+		}
+
+		done, result, err := state.handleInput(event, char)
+		if done {
+			clearLines(lastLineCount)
+			if err != nil {
+				return nil, err
+			}
+			return result, nil
+		}
+
+		render()
+	}
+}
+
+func confirmPrompt(prompt string) (bool, error) {
+	fd := int(os.Stdin.Fd())
+	oldState, err := term.MakeRaw(fd)
+	if err != nil {
+		return false, err
+	}
+	defer term.Restore(fd, oldState)
+
+	fmt.Fprintf(os.Stderr, "%s (\033[1my\033[0m/n) ", prompt)
+
+	buf := make([]byte, 1)
+	for {
+		if _, err := os.Stdin.Read(buf); err != nil {
+			return false, err
+		}
+
+		switch buf[0] {
+		case 'Y', 'y', 13:
+			fmt.Fprintf(os.Stderr, "yes\r\n")
+			return true, nil
+		case 'N', 'n', 27, 3:
+			fmt.Fprintf(os.Stderr, "no\r\n")
+			return false, nil
+		}
+	}
+}
+
+func filterItems(items []selectItem, filter string) []selectItem {
+	if filter == "" {
+		return items
+	}
+	var result []selectItem
+	filterLower := strings.ToLower(filter)
+	for _, item := range items {
+		if strings.Contains(strings.ToLower(item.Name), filterLower) {
+			result = append(result, item)
+		}
+	}
+	return result
+}
--- a/cmd/config/selector_test.go
+++ b/cmd/config/selector_test.go
@@ -0,0 +1,913 @@
+package config
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+)
+
+func TestFilterItems(t *testing.T) {
+	items := []selectItem{
+		{Name: "llama3.2:latest"},
+		{Name: "qwen2.5:7b"},
+		{Name: "deepseek-v3:cloud"},
+		{Name: "GPT-OSS:20b"},
+	}
+
+	t.Run("EmptyFilter_ReturnsAllItems", func(t *testing.T) {
+		result := filterItems(items, "")
+		if len(result) != len(items) {
+			t.Errorf("expected %d items, got %d", len(items), len(result))
+		}
+	})
+
+	t.Run("CaseInsensitive_UppercaseFilterMatchesLowercase", func(t *testing.T) {
+		result := filterItems(items, "LLAMA")
+		if len(result) != 1 || result[0].Name != "llama3.2:latest" {
+			t.Errorf("expected llama3.2:latest, got %v", result)
+		}
+	})
+
+	t.Run("CaseInsensitive_LowercaseFilterMatchesUppercase", func(t *testing.T) {
+		result := filterItems(items, "gpt")
+		if len(result) != 1 || result[0].Name != "GPT-OSS:20b" {
+			t.Errorf("expected GPT-OSS:20b, got %v", result)
+		}
+	})
+
+	t.Run("PartialMatch", func(t *testing.T) {
+		result := filterItems(items, "deep")
+		if len(result) != 1 || result[0].Name != "deepseek-v3:cloud" {
+			t.Errorf("expected deepseek-v3:cloud, got %v", result)
+		}
+	})
+
+	t.Run("NoMatch_ReturnsEmpty", func(t *testing.T) {
+		result := filterItems(items, "nonexistent")
+		if len(result) != 0 {
+			t.Errorf("expected 0 items, got %d", len(result))
+		}
+	})
+}
+
+func TestSelectState(t *testing.T) {
+	items := []selectItem{
+		{Name: "item1"},
+		{Name: "item2"},
+		{Name: "item3"},
+	}
+
+	t.Run("InitialState", func(t *testing.T) {
+		s := newSelectState(items)
+		if s.selected != 0 {
+			t.Errorf("expected selected=0, got %d", s.selected)
+		}
+		if s.filter != "" {
+			t.Errorf("expected empty filter, got %q", s.filter)
+		}
+		if s.scrollOffset != 0 {
+			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
+		}
+	})
+
+	t.Run("Enter_SelectsCurrentItem", func(t *testing.T) {
+		s := newSelectState(items)
+		done, result, err := s.handleInput(eventEnter, 0)
+		if !done || result != "item1" || err != nil {
+			t.Errorf("expected (true, item1, nil), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("Enter_WithFilter_SelectsFilteredItem", func(t *testing.T) {
+		s := newSelectState(items)
+		s.filter = "item3"
+		done, result, err := s.handleInput(eventEnter, 0)
+		if !done || result != "item3" || err != nil {
+			t.Errorf("expected (true, item3, nil), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("Enter_EmptyFilteredList_DoesNothing", func(t *testing.T) {
+		s := newSelectState(items)
+		s.filter = "nonexistent"
+		done, result, err := s.handleInput(eventEnter, 0)
+		if done || result != "" || err != nil {
+			t.Errorf("expected (false, '', nil), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
+		s := newSelectState(items)
+		done, result, err := s.handleInput(eventEscape, 0)
+		if !done || result != "" || err != errCancelled {
+			t.Errorf("expected (true, '', errCancelled), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("Down_MovesSelection", func(t *testing.T) {
+		s := newSelectState(items)
+		s.handleInput(eventDown, 0)
+		if s.selected != 1 {
+			t.Errorf("expected selected=1, got %d", s.selected)
+		}
+	})
+
+	t.Run("Down_AtBottom_StaysAtBottom", func(t *testing.T) {
+		s := newSelectState(items)
+		s.selected = 2
+		s.handleInput(eventDown, 0)
+		if s.selected != 2 {
+			t.Errorf("expected selected=2 (stayed at bottom), got %d", s.selected)
+		}
+	})
+
+	t.Run("Up_MovesSelection", func(t *testing.T) {
+		s := newSelectState(items)
+		s.selected = 2
+		s.handleInput(eventUp, 0)
+		if s.selected != 1 {
+			t.Errorf("expected selected=1, got %d", s.selected)
+		}
+	})
+
+	t.Run("Up_AtTop_StaysAtTop", func(t *testing.T) {
+		s := newSelectState(items)
+		s.handleInput(eventUp, 0)
+		if s.selected != 0 {
+			t.Errorf("expected selected=0 (stayed at top), got %d", s.selected)
+		}
+	})
+
+	t.Run("Char_AppendsToFilter", func(t *testing.T) {
+		s := newSelectState(items)
+		s.handleInput(eventChar, 'i')
+		s.handleInput(eventChar, 't')
+		s.handleInput(eventChar, 'e')
+		s.handleInput(eventChar, 'm')
+		s.handleInput(eventChar, '2')
+		if s.filter != "item2" {
+			t.Errorf("expected filter='item2', got %q", s.filter)
+		}
+		filtered := s.filtered()
+		if len(filtered) != 1 || filtered[0].Name != "item2" {
+			t.Errorf("expected [item2], got %v", filtered)
+		}
+	})
+
+	t.Run("Char_ResetsSelectionToZero", func(t *testing.T) {
+		s := newSelectState(items)
+		s.selected = 2
+		s.handleInput(eventChar, 'x')
+		if s.selected != 0 {
+			t.Errorf("expected selected=0 after typing, got %d", s.selected)
+		}
+	})
+
+	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
+		s := newSelectState(items)
+		s.filter = "test"
+		s.handleInput(eventBackspace, 0)
+		if s.filter != "tes" {
+			t.Errorf("expected filter='tes', got %q", s.filter)
+		}
+	})
+
+	t.Run("Backspace_EmptyFilter_DoesNothing", func(t *testing.T) {
+		s := newSelectState(items)
+		s.handleInput(eventBackspace, 0)
+		if s.filter != "" {
+			t.Errorf("expected filter='', got %q", s.filter)
+		}
+	})
+
+	t.Run("Backspace_ResetsSelectionToZero", func(t *testing.T) {
+		s := newSelectState(items)
+		s.filter = "test"
+		s.selected = 2
+		s.handleInput(eventBackspace, 0)
+		if s.selected != 0 {
+			t.Errorf("expected selected=0 after backspace, got %d", s.selected)
+		}
+	})
+
+	t.Run("Scroll_DownPastVisibleItems_ScrollsViewport", func(t *testing.T) {
+		// maxDisplayedItems is 10, so with 15 items we need to scroll
+		manyItems := make([]selectItem, 15)
+		for i := range manyItems {
+			manyItems[i] = selectItem{Name: string(rune('a' + i))}
+		}
+		s := newSelectState(manyItems)
+
+		// move down 12 times (past the 10-item viewport)
+		for range 12 {
+			s.handleInput(eventDown, 0)
+		}
+
+		if s.selected != 12 {
+			t.Errorf("expected selected=12, got %d", s.selected)
+		}
+		if s.scrollOffset != 3 {
+			t.Errorf("expected scrollOffset=3 (12-10+1), got %d", s.scrollOffset)
+		}
+	})
+
+	t.Run("Scroll_UpPastScrollOffset_ScrollsViewport", func(t *testing.T) {
+		manyItems := make([]selectItem, 15)
+		for i := range manyItems {
+			manyItems[i] = selectItem{Name: string(rune('a' + i))}
+		}
+		s := newSelectState(manyItems)
+		s.selected = 5
+		s.scrollOffset = 5
+
+		s.handleInput(eventUp, 0)
+
+		if s.selected != 4 {
+			t.Errorf("expected selected=4, got %d", s.selected)
+		}
+		if s.scrollOffset != 4 {
+			t.Errorf("expected scrollOffset=4, got %d", s.scrollOffset)
+		}
+	})
+}
+
+func TestMultiSelectState(t *testing.T) {
+	items := []selectItem{
+		{Name: "item1"},
+		{Name: "item2"},
+		{Name: "item3"},
+	}
+
+	t.Run("InitialState_NoPrechecked", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		if s.highlighted != 0 {
+			t.Errorf("expected highlighted=0, got %d", s.highlighted)
+		}
+		if s.selectedCount() != 0 {
+			t.Errorf("expected 0 selected, got %d", s.selectedCount())
+		}
+		if s.focusOnButton {
+			t.Error("expected focusOnButton=false initially")
+		}
+	})
+
+	t.Run("InitialState_WithPrechecked", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item2", "item3"})
+		if s.selectedCount() != 2 {
+			t.Errorf("expected 2 selected, got %d", s.selectedCount())
+		}
+		if !s.checked[1] || !s.checked[2] {
+			t.Error("expected item2 and item3 to be checked")
+		}
+	})
+
+	t.Run("Prechecked_PreservesSelectionOrder", func(t *testing.T) {
+		// order matters: first checked = default model
+		s := newMultiSelectState(items, []string{"item3", "item1"})
+		if len(s.checkOrder) != 2 {
+			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
+		}
+		if s.checkOrder[0] != 2 || s.checkOrder[1] != 0 {
+			t.Errorf("expected checkOrder=[2,0] (item3 first), got %v", s.checkOrder)
+		}
+	})
+
+	t.Run("Prechecked_IgnoresInvalidNames", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1", "nonexistent"})
+		if s.selectedCount() != 1 {
+			t.Errorf("expected 1 selected (nonexistent ignored), got %d", s.selectedCount())
+		}
+	})
+
+	t.Run("Toggle_ChecksUncheckedItem", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.toggleItem()
+		if !s.checked[0] {
+			t.Error("expected item1 to be checked after toggle")
+		}
+	})
+
+	t.Run("Toggle_UnchecksCheckedItem", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		s.toggleItem()
+		if s.checked[0] {
+			t.Error("expected item1 to be unchecked after toggle")
+		}
+	})
+
+	t.Run("Toggle_RemovesFromCheckOrder", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1", "item2", "item3"})
+		s.highlighted = 1 // toggle item2
+		s.toggleItem()
+
+		if len(s.checkOrder) != 2 {
+			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
+		}
+		// should be [0, 2] (item1, item3) with item2 removed
+		if s.checkOrder[0] != 0 || s.checkOrder[1] != 2 {
+			t.Errorf("expected checkOrder=[0,2], got %v", s.checkOrder)
+		}
+	})
+
+	t.Run("Enter_TogglesWhenNotOnButton", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.handleInput(eventEnter, 0)
+		if !s.checked[0] {
+			t.Error("expected item1 to be checked after enter")
+		}
+	})
+
+	t.Run("Enter_OnButton_ReturnsSelection", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item2", "item1"})
+		s.focusOnButton = true
+
+		done, result, err := s.handleInput(eventEnter, 0)
+
+		if !done || err != nil {
+			t.Errorf("expected done=true, err=nil, got done=%v, err=%v", done, err)
+		}
+		// result should preserve selection order
+		if len(result) != 2 || result[0] != "item2" || result[1] != "item1" {
+			t.Errorf("expected [item2, item1], got %v", result)
+		}
+	})
+
+	t.Run("Enter_OnButton_EmptySelection_DoesNothing", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.focusOnButton = true
+		done, result, err := s.handleInput(eventEnter, 0)
+		if done || result != nil || err != nil {
+			t.Errorf("expected (false, nil, nil), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("Tab_SwitchesToButton_WhenHasSelection", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		s.handleInput(eventTab, 0)
+		if !s.focusOnButton {
+			t.Error("expected focus on button after tab")
+		}
+	})
+
+	t.Run("Tab_DoesNothing_WhenNoSelection", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.handleInput(eventTab, 0)
+		if s.focusOnButton {
+			t.Error("tab should not focus button when nothing selected")
+		}
+	})
+
+	t.Run("Tab_TogglesButtonFocus", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		s.handleInput(eventTab, 0)
+		if !s.focusOnButton {
+			t.Error("expected focus on button after first tab")
+		}
+		s.handleInput(eventTab, 0)
+		if s.focusOnButton {
+			t.Error("expected focus back on list after second tab")
+		}
+	})
+
+	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		done, result, err := s.handleInput(eventEscape, 0)
+		if !done || result != nil || err != errCancelled {
+			t.Errorf("expected (true, nil, errCancelled), got (%v, %v, %v)", done, result, err)
+		}
+	})
+
+	t.Run("IsDefault_TrueForFirstChecked", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item2", "item1"})
+		if !(len(s.checkOrder) > 0 && s.checkOrder[0] == 1) {
+			t.Error("expected item2 (idx 1) to be default (first checked)")
+		}
+		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
+			t.Error("expected item1 (idx 0) to NOT be default")
+		}
+	})
+
+	t.Run("IsDefault_FalseWhenNothingChecked", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
+			t.Error("expected isDefault=false when nothing checked")
+		}
+	})
+
+	t.Run("Down_MovesHighlight", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.handleInput(eventDown, 0)
+		if s.highlighted != 1 {
+			t.Errorf("expected highlighted=1, got %d", s.highlighted)
+		}
+	})
+
+	t.Run("Up_MovesHighlight", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.highlighted = 1
+		s.handleInput(eventUp, 0)
+		if s.highlighted != 0 {
+			t.Errorf("expected highlighted=0, got %d", s.highlighted)
+		}
+	})
+
+	t.Run("Arrow_ReturnsFocusFromButton", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		s.focusOnButton = true
+		s.handleInput(eventDown, 0)
+		if s.focusOnButton {
+			t.Error("expected focus to return to list on arrow key")
+		}
+	})
+
+	t.Run("Char_AppendsToFilter", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.handleInput(eventChar, 'x')
+		if s.filter != "x" {
+			t.Errorf("expected filter='x', got %q", s.filter)
+		}
+	})
+
+	t.Run("Char_ResetsHighlightAndScroll", func(t *testing.T) {
+		manyItems := make([]selectItem, 15)
+		for i := range manyItems {
+			manyItems[i] = selectItem{Name: string(rune('a' + i))}
+		}
+		s := newMultiSelectState(manyItems, nil)
+		s.highlighted = 10
+		s.scrollOffset = 5
+
+		s.handleInput(eventChar, 'x')
+
+		if s.highlighted != 0 {
+			t.Errorf("expected highlighted=0, got %d", s.highlighted)
+		}
+		if s.scrollOffset != 0 {
+			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
+		}
+	})
+
+	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		s.filter = "test"
+		s.handleInput(eventBackspace, 0)
+		if s.filter != "tes" {
+			t.Errorf("expected filter='tes', got %q", s.filter)
+		}
+	})
+
+	t.Run("Backspace_RemovesFocusFromButton", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		s.filter = "x"
+		s.focusOnButton = true
+		s.handleInput(eventBackspace, 0)
+		if s.focusOnButton {
+			t.Error("expected focusOnButton=false after backspace")
+		}
+	})
+}
+
+func TestParseInput(t *testing.T) {
+	t.Run("Enter", func(t *testing.T) {
+		event, char, err := parseInput(bytes.NewReader([]byte{13}))
+		if err != nil || event != eventEnter || char != 0 {
+			t.Errorf("expected (eventEnter, 0, nil), got (%v, %v, %v)", event, char, err)
+		}
+	})
+
+	t.Run("Escape", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{27}))
+		if err != nil || event != eventEscape {
+			t.Errorf("expected eventEscape, got %v", event)
+		}
+	})
+
+	t.Run("CtrlC_TreatedAsEscape", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{3}))
+		if err != nil || event != eventEscape {
+			t.Errorf("expected eventEscape for Ctrl+C, got %v", event)
+		}
+	})
+
+	t.Run("Tab", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{9}))
+		if err != nil || event != eventTab {
+			t.Errorf("expected eventTab, got %v", event)
+		}
+	})
+
+	t.Run("Backspace", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{127}))
+		if err != nil || event != eventBackspace {
+			t.Errorf("expected eventBackspace, got %v", event)
+		}
+	})
+
+	t.Run("UpArrow", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 65}))
+		if err != nil || event != eventUp {
+			t.Errorf("expected eventUp, got %v", event)
+		}
+	})
+
+	t.Run("DownArrow", func(t *testing.T) {
+		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 66}))
+		if err != nil || event != eventDown {
+			t.Errorf("expected eventDown, got %v", event)
+		}
+	})
+
+	t.Run("PrintableChars", func(t *testing.T) {
+		tests := []struct {
+			name string
+			char byte
+		}{
+			{"lowercase", 'a'},
+			{"uppercase", 'Z'},
+			{"digit", '5'},
+			{"space", ' '},
+			{"tilde", '~'},
+		}
+		for _, tt := range tests {
+			t.Run(tt.name, func(t *testing.T) {
+				event, char, err := parseInput(bytes.NewReader([]byte{tt.char}))
+				if err != nil || event != eventChar || char != tt.char {
+					t.Errorf("expected (eventChar, %q), got (%v, %q)", tt.char, event, char)
+				}
+			})
+		}
+	})
+}
+
+func TestRenderSelect(t *testing.T) {
+	items := []selectItem{
+		{Name: "item1", Description: "first item"},
+		{Name: "item2"},
+	}
+
+	t.Run("ShowsPromptAndItems", func(t *testing.T) {
+		s := newSelectState(items)
+		var buf bytes.Buffer
+		lineCount := renderSelect(&buf, "Select:", s)
+
+		output := buf.String()
+		if !strings.Contains(output, "Select:") {
+			t.Error("expected prompt in output")
+		}
+		if !strings.Contains(output, "item1") {
+			t.Error("expected item1 in output")
+		}
+		if !strings.Contains(output, "first item") {
+			t.Error("expected description in output")
+		}
+		if !strings.Contains(output, "item2") {
+			t.Error("expected item2 in output")
+		}
+		if lineCount != 3 { // 1 prompt + 2 items
+			t.Errorf("expected 3 lines, got %d", lineCount)
+		}
+	})
+
+	t.Run("EmptyFilteredList_ShowsNoMatches", func(t *testing.T) {
+		s := newSelectState(items)
+		s.filter = "xyz"
+		var buf bytes.Buffer
+		renderSelect(&buf, "Select:", s)
+
+		if !strings.Contains(buf.String(), "no matches") {
+			t.Error("expected 'no matches' message")
+		}
+	})
+
+	t.Run("LongList_ShowsRemainingCount", func(t *testing.T) {
+		manyItems := make([]selectItem, 15)
+		for i := range manyItems {
+			manyItems[i] = selectItem{Name: string(rune('a' + i))}
+		}
+		s := newSelectState(manyItems)
+		var buf bytes.Buffer
+		renderSelect(&buf, "Select:", s)
+
+		// 15 items - 10 displayed = 5 more
+		if !strings.Contains(buf.String(), "5 more") {
+			t.Error("expected '5 more' indicator")
+		}
+	})
+}
+
+func TestRenderMultiSelect(t *testing.T) {
+	items := []selectItem{
+		{Name: "item1"},
+		{Name: "item2"},
+	}
+
+	t.Run("ShowsCheckboxes", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		var buf bytes.Buffer
+		renderMultiSelect(&buf, "Select:", s)
+
+		output := buf.String()
+		if !strings.Contains(output, "[x]") {
+			t.Error("expected checked checkbox [x]")
+		}
+		if !strings.Contains(output, "[ ]") {
+			t.Error("expected unchecked checkbox [ ]")
+		}
+	})
+
+	t.Run("ShowsDefaultMarker", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1"})
+		var buf bytes.Buffer
+		renderMultiSelect(&buf, "Select:", s)
+
+		if !strings.Contains(buf.String(), "(default)") {
+			t.Error("expected (default) marker for first checked item")
+		}
+	})
+
+	t.Run("ShowsSelectedCount", func(t *testing.T) {
+		s := newMultiSelectState(items, []string{"item1", "item2"})
+		var buf bytes.Buffer
+		renderMultiSelect(&buf, "Select:", s)
+
+		if !strings.Contains(buf.String(), "2 selected") {
+			t.Error("expected '2 selected' in output")
+		}
+	})
+
+	t.Run("NoSelection_ShowsHelperText", func(t *testing.T) {
+		s := newMultiSelectState(items, nil)
+		var buf bytes.Buffer
+		renderMultiSelect(&buf, "Select:", s)
+
+		if !strings.Contains(buf.String(), "Select at least one") {
+			t.Error("expected 'Select at least one' helper text")
+		}
+	})
+}
+
+func TestErrCancelled(t *testing.T) {
+	t.Run("NotNil", func(t *testing.T) {
+		if errCancelled == nil {
+			t.Error("errCancelled should not be nil")
+		}
+	})
+
+	t.Run("Message", func(t *testing.T) {
+		if errCancelled.Error() != "cancelled" {
+			t.Errorf("expected 'cancelled', got %q", errCancelled.Error())
+		}
+	})
+}
+
+// Edge case tests for selector.go
+
+// TestSelectState_SingleItem verifies that single item list works without crash.
+// List with only one item should still work.
+func TestSelectState_SingleItem(t *testing.T) {
+	items := []selectItem{{Name: "only-one"}}
+
+	s := newSelectState(items)
+
+	// Down should do nothing (already at bottom)
+	s.handleInput(eventDown, 0)
+	if s.selected != 0 {
+		t.Errorf("down on single item: expected selected=0, got %d", s.selected)
+	}
+
+	// Up should do nothing (already at top)
+	s.handleInput(eventUp, 0)
+	if s.selected != 0 {
+		t.Errorf("up on single item: expected selected=0, got %d", s.selected)
+	}
+
+	// Enter should select the only item
+	done, result, err := s.handleInput(eventEnter, 0)
+	if !done || result != "only-one" || err != nil {
+		t.Errorf("enter on single item: expected (true, 'only-one', nil), got (%v, %q, %v)", done, result, err)
+	}
+}
+
+// TestSelectState_ExactlyMaxItems verifies boundary condition at maxDisplayedItems.
+// List with exactly maxDisplayedItems items should not scroll.
+func TestSelectState_ExactlyMaxItems(t *testing.T) {
+	items := make([]selectItem, maxDisplayedItems)
+	for i := range items {
+		items[i] = selectItem{Name: string(rune('a' + i))}
+	}
+
+	s := newSelectState(items)
+
+	// Move to last item
+	for range maxDisplayedItems - 1 {
+		s.handleInput(eventDown, 0)
+	}
+
+	if s.selected != maxDisplayedItems-1 {
+		t.Errorf("expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
+	}
+
+	// Should not scroll when exactly at max
+	if s.scrollOffset != 0 {
+		t.Errorf("expected scrollOffset=0 for exactly maxDisplayedItems, got %d", s.scrollOffset)
+	}
+
+	// One more down should do nothing
+	s.handleInput(eventDown, 0)
+	if s.selected != maxDisplayedItems-1 {
+		t.Errorf("down at max: expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
+	}
+}
+
+// TestFilterItems_RegexSpecialChars verifies that filter is literal, not regex.
+// User typing "model.v1" shouldn't match "modelsv1".
+func TestFilterItems_RegexSpecialChars(t *testing.T) {
+	items := []selectItem{
+		{Name: "model.v1"},
+		{Name: "modelsv1"},
+		{Name: "model-v1"},
+	}
+
+	// Filter with dot should only match literal dot
+	result := filterItems(items, "model.v1")
+	if len(result) != 1 {
+		t.Errorf("expected 1 exact match, got %d", len(result))
+	}
+	if len(result) > 0 && result[0].Name != "model.v1" {
+		t.Errorf("expected 'model.v1', got %s", result[0].Name)
+	}
+
+	// Other regex special chars should be literal too
+	items2 := []selectItem{
+		{Name: "test[0]"},
+		{Name: "test0"},
+		{Name: "test(1)"},
+	}
+
+	result2 := filterItems(items2, "test[0]")
+	if len(result2) != 1 || result2[0].Name != "test[0]" {
+		t.Errorf("expected only 'test[0]', got %v", result2)
+	}
+}
+
+// TestMultiSelectState_DuplicateNames documents handling of duplicate item names.
+// itemIndex uses name as key - duplicates cause collision. This documents
+// the current behavior: the last index for a duplicate name is stored
+func TestMultiSelectState_DuplicateNames(t *testing.T) {
+	// Duplicate names - this is an edge case that shouldn't happen in practice
+	items := []selectItem{
+		{Name: "duplicate"},
+		{Name: "duplicate"},
+		{Name: "unique"},
+	}
+
+	s := newMultiSelectState(items, nil)
+
+	// DOCUMENTED BEHAVIOR: itemIndex maps name to LAST index
+	// When there are duplicates, only the last occurrence's index is stored
+	if s.itemIndex["duplicate"] != 1 {
+		t.Errorf("itemIndex should map 'duplicate' to last index (1), got %d", s.itemIndex["duplicate"])
+	}
+
+	// Toggle item at highlighted=0 (first "duplicate")
+	// Due to name collision, toggleItem uses itemIndex["duplicate"] = 1
+	// So it actually toggles the SECOND duplicate item, not the first
+	s.toggleItem()
+
+	// This documents the potentially surprising behavior:
+	// We toggled at highlighted=0, but itemIndex lookup returned 1
+	if !s.checked[1] {
+		t.Error("toggle should check index 1 (due to name collision in itemIndex)")
+	}
+	if s.checked[0] {
+		t.Log("Note: index 0 is NOT checked, even though highlighted=0 (name collision behavior)")
+	}
+}
+
+// TestSelectState_FilterReducesBelowSelection verifies selection resets when filter reduces list.
+// Prevents index-out-of-bounds on next keystroke
+func TestSelectState_FilterReducesBelowSelection(t *testing.T) {
+	items := []selectItem{
+		{Name: "apple"},
+		{Name: "banana"},
+		{Name: "cherry"},
+	}
+
+	s := newSelectState(items)
+	s.selected = 2 // Select "cherry"
+
+	// Type a filter that removes cherry from results
+	s.handleInput(eventChar, 'a') // Filter to "a" - matches "apple" and "banana"
+
+	// Selection should reset to 0
+	if s.selected != 0 {
+		t.Errorf("expected selected=0 after filter, got %d", s.selected)
+	}
+
+	filtered := s.filtered()
+	if len(filtered) != 2 {
+		t.Errorf("expected 2 filtered items, got %d", len(filtered))
+	}
+}
+
+// TestFilterItems_UnicodeCharacters verifies filtering works with UTF-8.
+// Model names might contain unicode characters
+func TestFilterItems_UnicodeCharacters(t *testing.T) {
+	items := []selectItem{
+		{Name: "llama-日本語"},
+		{Name: "模型-chinese"},
+		{Name: "émoji-🦙"},
+		{Name: "regular-model"},
+	}
+
+	t.Run("filter japanese", func(t *testing.T) {
+		result := filterItems(items, "日本")
+		if len(result) != 1 || result[0].Name != "llama-日本語" {
+			t.Errorf("expected llama-日本語, got %v", result)
+		}
+	})
+
+	t.Run("filter chinese", func(t *testing.T) {
+		result := filterItems(items, "模型")
+		if len(result) != 1 || result[0].Name != "模型-chinese" {
+			t.Errorf("expected 模型-chinese, got %v", result)
+		}
+	})
+
+	t.Run("filter emoji", func(t *testing.T) {
+		result := filterItems(items, "🦙")
+		if len(result) != 1 || result[0].Name != "émoji-🦙" {
+			t.Errorf("expected émoji-🦙, got %v", result)
+		}
+	})
+
+	t.Run("filter accented char", func(t *testing.T) {
+		result := filterItems(items, "émoji")
+		if len(result) != 1 || result[0].Name != "émoji-🦙" {
+			t.Errorf("expected émoji-🦙, got %v", result)
+		}
+	})
+}
+
+// TestMultiSelectState_FilterReducesBelowHighlight verifies highlight resets when filter reduces list.
+func TestMultiSelectState_FilterReducesBelowHighlight(t *testing.T) {
+	items := []selectItem{
+		{Name: "apple"},
+		{Name: "banana"},
+		{Name: "cherry"},
+	}
+
+	s := newMultiSelectState(items, nil)
+	s.highlighted = 2 // Highlight "cherry"
+
+	// Type a filter that removes cherry
+	s.handleInput(eventChar, 'a')
+
+	if s.highlighted != 0 {
+		t.Errorf("expected highlighted=0 after filter, got %d", s.highlighted)
+	}
+}
+
+// TestMultiSelectState_EmptyItems verifies handling of empty item list.
+// Empty list should be handled gracefully.
+func TestMultiSelectState_EmptyItems(t *testing.T) {
+	s := newMultiSelectState([]selectItem{}, nil)
+
+	// Toggle should not panic on empty list
+	s.toggleItem()
+
+	if s.selectedCount() != 0 {
+		t.Errorf("expected 0 selected for empty list, got %d", s.selectedCount())
+	}
+
+	// Render should handle empty list
+	var buf bytes.Buffer
+	lineCount := renderMultiSelect(&buf, "Select:", s)
+	if lineCount == 0 {
+		t.Error("renderMultiSelect should produce output even for empty list")
+	}
+	if !strings.Contains(buf.String(), "no matches") {
+		t.Error("expected 'no matches' for empty list")
+	}
+}
+
+// TestSelectState_RenderWithDescriptions verifies rendering items with descriptions.
+func TestSelectState_RenderWithDescriptions(t *testing.T) {
+	items := []selectItem{
+		{Name: "item1", Description: "First item description"},
+		{Name: "item2", Description: ""},
+		{Name: "item3", Description: "Third item"},
+	}
+
+	s := newSelectState(items)
+	var buf bytes.Buffer
+	renderSelect(&buf, "Select:", s)
+
+	output := buf.String()
+	if !strings.Contains(output, "First item description") {
+		t.Error("expected description to be rendered")
+	}
+	if !strings.Contains(output, "item2") {
+		t.Error("expected item without description to be rendered")
+	}
+}
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -30,6 +30,9 @@ const (
 )

 func generateInteractive(cmd *cobra.Command, opts runOptions) error {
+	var sessionPromptTokens int64
+	var sessionCompletionTokens int64
+
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
@@ -37,6 +40,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /load <model>   Load a session or model")
 		fmt.Fprintln(os.Stderr, "  /save <model>   Save your current session")
 		fmt.Fprintln(os.Stderr, "  /clear          Clear session context")
+		fmt.Fprintln(os.Stderr, "  /usage          Show session token usage")
 		fmt.Fprintln(os.Stderr, "  /bye            Exit")
 		fmt.Fprintln(os.Stderr, "  /?, /help       Help for a command")
 		fmt.Fprintln(os.Stderr, "  /? shortcuts    Help for keyboard shortcuts")
@@ -159,6 +163,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			sb.WriteString(before)
 			if !ok {
 				fmt.Fprintln(&sb)
+				scanner.Prompt.UseAlt = true
 				continue
 			}

@@ -444,6 +449,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			} else {
 				usageShow()
 			}
+		case strings.HasPrefix(line, "/usage"):
+			fmt.Printf("prompt tokens:     %d\n", sessionPromptTokens)
+			fmt.Printf("completion tokens: %d\n", sessionCompletionTokens)
 		case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
 			args := strings.Fields(line)
 			if len(args) > 1 {
@@ -498,7 +506,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {

 			opts.Messages = append(opts.Messages, newMessage)

-			assistant, err := chat(cmd, opts)
+			assistant, metrics, err := chat(cmd, opts)
 			if err != nil {
 				if strings.Contains(err.Error(), "does not support thinking") ||
 					strings.Contains(err.Error(), "invalid think value") {
@@ -508,6 +516,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				}
 				return err
 			}
+			if metrics != nil {
+				sessionPromptTokens += int64(metrics.PromptEvalCount)
+				sessionCompletionTokens += int64(metrics.EvalCount)
+			}
 			if assistant != nil {
 				opts.Messages = append(opts.Messages, *assistant)
 			}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -313,6 +313,8 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &deepseek2Model{}
 	case "Glm4MoeLiteForCausalLM":
 		conv = &glm4MoeLiteModel{}
+	case "Lfm2ForCausalLM":
+		conv = &lfm2Model{}
 	default:
 		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
--- a/convert/convert_glm4moelite.go
+++ b/convert/convert_glm4moelite.go
@@ -6,6 +6,10 @@ import (
 	"log/slog"
 	"regexp"
 	"strconv"
+	"strings"
+
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"

 	"github.com/ollama/ollama/fs/ggml"
 )
@@ -69,6 +73,9 @@ func (p *glm4MoeLiteModel) KV(t *Tokenizer) KV {
 	kv["glm4moelite.rope.dimension_count"] = p.QKRopeHeadDim
 	kv["glm4moelite.rope.freq_base"] = cmp.Or(p.RopeTheta, float32(1000000.0))

+	kv["glm4moelite.attention.key_length_mla"] = p.KVLoraRank + p.QKRopeHeadDim
+	kv["glm4moelite.attention.value_length_mla"] = p.KVLoraRank
+
 	kv["tokenizer.ggml.pre"] = "glm4"

 	return kv
@@ -100,6 +107,67 @@ func (p *glm4MoeLiteModel) Replacements() []string {
 	}
 }

+// repackKVB extracts K or V from the combined KV_B tensor for MLA absorption.
+// K output row-major: [n_head, kv_lora_rank, qk_nope] -> GGML ne[]={qk_nope, kv_lora_rank, n_head}
+// V output row-major: [n_head, v_head, kv_lora_rank] -> GGML ne[]={kv_lora_rank, v_head, n_head}
+func (p *glm4MoeLiteModel) repackKVB(extractK bool, kvFirst bool, numHeads int) Repacker {
+	qkNope := int(p.QKNopeHeadDim)
+	vHeadDim := int(p.VHeadDim)
+	kvLoraRank := int(p.KVLoraRank)
+	kvPerHead := qkNope + vHeadDim
+
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		dims := make([]int, len(shape))
+		for i := range shape {
+			dims[i] = int(shape[i])
+		}
+
+		var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+		var err error
+
+		// Normalize to [n_head * (qk_nope + v_head), kv_lora_rank] layout
+		if kvFirst {
+			tt, err = tensor.Transpose(tt, 1, 0)
+			if err != nil {
+				return nil, err
+			}
+			tt = tensor.Materialize(tt)
+		}
+
+		// Reshape to [n_head, qk_nope + v_head, kv_lora_rank]
+		if err := tt.Reshape(numHeads, kvPerHead, kvLoraRank); err != nil {
+			return nil, err
+		}
+
+		if extractK {
+			// Slice K: [n_head, qk_nope, kv_lora_rank]
+			tt, err = tt.Slice(nil, tensor.S(0, qkNope), nil)
+			if err != nil {
+				return nil, err
+			}
+			tt = tensor.Materialize(tt)
+			// Transpose to [n_head, kv_lora_rank, qk_nope]
+			tt, err = tensor.Transpose(tt, 0, 2, 1)
+			if err != nil {
+				return nil, err
+			}
+			tt = tensor.Materialize(tt)
+		} else {
+			// Slice V: [n_head, v_head, kv_lora_rank] - already correct layout
+			tt, err = tt.Slice(nil, tensor.S(qkNope, kvPerHead), nil)
+			if err != nil {
+				return nil, err
+			}
+			tt = tensor.Materialize(tt)
+		}
+
+		if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
+			return nil, err
+		}
+		return native.VectorF32(tt.(*tensor.Dense))
+	}
+}
+
 func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
 	merges := make([]merge, p.HiddenLayers*3)
 	for i := range p.HiddenLayers {
@@ -139,6 +207,52 @@ func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
 			slog.Debug("skipping layer", "name", t.Name())
 			continue
 		}
+
+		// Split attn_kv_b into separate attn_k_b and attn_v_b for MLA absorption
+		if strings.HasSuffix(t.Name(), ".attn_kv_b.weight") {
+			qkNope := int(p.QKNopeHeadDim)
+			vHeadDim := int(p.VHeadDim)
+			kvLoraRank := int(p.KVLoraRank)
+			kvPerHead := qkNope + vHeadDim
+			numHeads := int(p.NumAttentionHeads)
+			kvFirst := true
+			if len(t.Shape()) == 2 {
+				switch {
+				case int(t.Shape()[0]) == kvLoraRank:
+					if kvPerHead > 0 && int(t.Shape()[1])%kvPerHead == 0 {
+						numHeads = int(t.Shape()[1]) / kvPerHead
+					}
+					kvFirst = true
+				case int(t.Shape()[1]) == kvLoraRank:
+					if kvPerHead > 0 && int(t.Shape()[0])%kvPerHead == 0 {
+						numHeads = int(t.Shape()[0]) / kvPerHead
+					}
+					kvFirst = false
+				default:
+					slog.Warn("glm4moelite: unexpected attn_kv_b layout", "name", t.Name(), "shape", t.Shape())
+				}
+			}
+
+			kTensor := t.Clone()
+			kTensor.SetRepacker(p.repackKVB(true, kvFirst, numHeads))
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(t.Name(), "attn_kv_b", "attn_k_b", 1),
+				Kind:     t.Kind(),
+				Shape:    []uint64{uint64(numHeads), uint64(kvLoraRank), uint64(qkNope)},
+				WriterTo: kTensor,
+			})
+
+			vTensor := t.Clone()
+			vTensor.SetRepacker(p.repackKVB(false, kvFirst, numHeads))
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(t.Name(), "attn_kv_b", "attn_v_b", 1),
+				Kind:     t.Kind(),
+				Shape:    []uint64{uint64(numHeads), uint64(vHeadDim), uint64(kvLoraRank)},
+				WriterTo: vTensor,
+			})
+			continue
+		}
+
 		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
--- a/convert/convert_lfm2.go
+++ b/convert/convert_lfm2.go
@@ -0,0 +1,100 @@
+package convert
+
+import (
+	"slices"
+	"strings"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type lfm2Model struct {
+	ModelParameters
+	HiddenSize            uint32   `json:"hidden_size"`
+	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
+	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
+	IntermediateSize      uint32   `json:"intermediate_size"`
+	NumAttentionHeads     uint32   `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
+	RopeTheta             float32  `json:"rope_theta"`
+	NormEps               float32  `json:"norm_eps"`
+	ConvLCache            uint32   `json:"conv_L_cache"`
+	LayerTypes            []string `json:"layer_types"`
+	TieEmbedding          bool     `json:"tie_embedding"`
+}
+
+var _ ModelConverter = (*lfm2Model)(nil)
+
+func (p *lfm2Model) KV(t *Tokenizer) KV {
+	kv := p.ModelParameters.KV(t)
+	kv["general.architecture"] = "lfm2"
+	kv["lfm2.vocab_size"] = p.VocabSize
+	kv["lfm2.block_count"] = p.NumHiddenLayers
+	kv["lfm2.embedding_length"] = p.HiddenSize
+	kv["lfm2.feed_forward_length"] = p.IntermediateSize
+	kv["lfm2.context_length"] = p.MaxPositionEmbeddings
+
+	// Build per-layer KV head count array based on layer_types
+	// (0 = shortconv layer, non-zero = attention layer with that many KV heads)
+	kvHeadCounts := make([]uint32, p.NumHiddenLayers)
+	for i := range p.NumHiddenLayers {
+		if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
+			kvHeadCounts[i] = p.NumKeyValueHeads
+		}
+	}
+
+	kv["lfm2.attention.head_count"] = p.NumAttentionHeads
+	kv["lfm2.attention.head_count_kv"] = kvHeadCounts
+	kv["lfm2.attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
+	kv["lfm2.attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
+	kv["lfm2.attention.layer_norm_rms_epsilon"] = p.NormEps
+	kv["lfm2.rope.freq_base"] = p.RopeTheta
+	kv["lfm2.shortconv.l_cache"] = p.ConvLCache
+
+	return kv
+}
+
+func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
+	var out []*ggml.Tensor
+
+	for _, t := range ts {
+		shape := t.Shape()
+
+		// Squeeze conv weights: [D, 1, K] -> [D, K]
+		if strings.HasSuffix(t.Name(), "shortconv.conv.weight") {
+			if len(shape) == 3 && shape[1] == 1 {
+				shape = []uint64{shape[0], shape[2]}
+			}
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     t.Name(),
+			Kind:     t.Kind(),
+			Shape:    slices.Clone(shape),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (p *lfm2Model) Replacements() []string {
+	return []string{
+		"model.embed_tokens", "token_embd",
+		"model.embedding_norm", "output_norm",
+		"model.layers", "blk",
+		"operator_norm", "attn_norm",
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.out_proj", "attn_output",
+		"self_attn.q_layernorm", "attn_q_norm",
+		"self_attn.k_layernorm", "attn_k_norm",
+		"conv.conv", "shortconv.conv",
+		"conv.in_proj", "shortconv.in_proj",
+		"conv.out_proj", "shortconv.out_proj",
+		"feed_forward.w1", "ffn_gate",
+		"feed_forward.w2", "ffn_down",
+		"feed_forward.w3", "ffn_up",
+		"ffn_norm", "ffn_norm",
+	}
+}
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -40,6 +40,7 @@ const (
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		strings.HasSuffix(t.name, ".bias") ||
+		strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.tile_position_embd.weight" ||
--- a/docs/api/anthropic-compatibility.mdx
+++ b/docs/api/anthropic-compatibility.mdx
@@ -4,16 +4,6 @@ title: Anthropic compatibility

 Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code.

-## Recommended models
-
-For coding use cases, models like `glm-4.7:cloud`, `minimax-m2.1:cloud`, and `qwen3-coder` are recommended.
-
-Pull a model before use:
-```shell
-ollama pull qwen3-coder
-ollama pull glm-4.7:cloud
-```
-
 ## Usage

 ### Environment variables
@@ -22,8 +12,8 @@ To use Ollama with tools that expect the Anthropic API (like Claude Code), set t

 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama  # required but ignored
+export ANTHROPIC_API_KEY="" # required but ignored
 export ANTHROPIC_BASE_URL=http://localhost:11434
-export ANTHROPIC_API_KEY=ollama  # required but ignored
 ```

 ### Simple `/v1/messages` example
@@ -245,10 +235,41 @@ curl -X POST http://localhost:11434/v1/messages \

 ## Using with Claude Code

-[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend:
+[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend. 
+
+### Recommended models
+
+For coding use cases, models like `glm-4.7`, `minimax-m2.1`, and `qwen3-coder` are recommended.
+
+Download a model before use:

 ```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
+ollama pull qwen3-coder
+```
+> Note: Qwen 3 coder is a 30B parameter model requiring at least 24GB of VRAM to run smoothly. More is required for longer context lengths. 
+
+```shell
+ollama pull glm-4.7:cloud
+```
+
+### Quick setup
+
+```shell
+ollama launch claude
+```
+
+This will prompt you to select a model, configure Claude Code automatically, and launch it. To configure without launching:
+
+```shell
+ollama launch claude --config
+```
+
+### Manual setup
+
+Set the environment variables and run Claude Code:
+
+```shell
+ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY="" claude --model qwen3-coder
 ```

 Or set the environment variables in your shell profile:
@@ -256,19 +277,13 @@ Or set the environment variables in your shell profile:
 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama
 export ANTHROPIC_BASE_URL=http://localhost:11434
-export ANTHROPIC_API_KEY=ollama
+export ANTHROPIC_API_KEY=""
 ```

 Then run Claude Code with any Ollama model:

 ```shell
-# Local models
 claude --model qwen3-coder
-claude --model gpt-oss:20b
-
-# Cloud models
-claude --model glm-4.7:cloud
-claude --model minimax-m2.1:cloud
 ```

 ## Endpoints
--- a/docs/cli.mdx
+++ b/docs/cli.mdx
@@ -8,6 +8,47 @@ title: CLI Reference
 ollama run gemma3
 ```

+### Launch integrations
+
+```
+ollama launch
+```
+
+Configure and launch external applications to use Ollama models. This provides an interactive way to set up and start integrations with supported apps.
+
+#### Supported integrations
+
+- **OpenCode** - Open-source coding assistant
+- **Claude Code** - Anthropic's agentic coding tool
+- **Codex** - OpenAI's coding assistant
+- **Droid** - Factory's AI coding agent
+
+#### Examples
+
+Launch an integration interactively:
+
+```
+ollama launch
+```
+
+Launch a specific integration:
+
+```
+ollama launch claude
+```
+
+Launch with a specific model:
+
+```
+ollama launch claude --model qwen3-coder
+```
+
+Configure without launching:
+
+```
+ollama launch droid --config
+```
+
 #### Multiline input

 For multiline input, you can wrap text with `"""`:
--- a/docs/cloud.mdx
+++ b/docs/cloud.mdx
@@ -3,8 +3,6 @@ title: Cloud
 sidebarTitle: Cloud
 ---

-<Info>Ollama's cloud is currently in preview.</Info>
-
 ## Cloud Models

 Ollama's cloud models are a new kind of model in Ollama that can run without a powerful GPU. Instead, cloud models are automatically offloaded to Ollama's cloud service while offering the same capabilities as local models, making it possible to keep using your local tools while running larger models that wouldn't fit on a personal computer.
--- a/docs/context-length.mdx
+++ b/docs/context-length.mdx
@@ -8,7 +8,7 @@ Context length is the maximum number of tokens that the model has access to in m
  The default context length in Ollama is 4096 tokens.
 </Note>

-Tasks which require large context like web search, agents, and coding tools should be set to at least 32000 tokens.
+Tasks which require large context like web search, agents, and coding tools should be set to at least 64000 tokens.

 ## Setting context length

@@ -24,7 +24,7 @@ Change the slider in the Ollama app under settings to your desired context lengt
 ### CLI
 If editing the context length for Ollama is not possible, the context length can also be updated when serving Ollama.  
 ```
-OLLAMA_CONTEXT_LENGTH=32000 ollama serve
+OLLAMA_CONTEXT_LENGTH=64000 ollama serve
 ```

 ### Check allocated context length and model offloading
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -102,18 +102,20 @@
            "group": "Integrations",
            "pages": [
              "/integrations/claude-code",
-              "/integrations/vscode",
-              "/integrations/jetbrains",
-              "/integrations/codex",
+              "/integrations/clawdbot",
              "/integrations/cline",
+              "/integrations/codex",
              "/integrations/droid",
              "/integrations/goose",
-              "/integrations/zed",
-              "/integrations/roo-code",
+              "/integrations/jetbrains",
+              "/integrations/marimo",
              "/integrations/n8n",
-              "/integrations/xcode",
              "/integrations/onyx",
-              "/integrations/marimo"
+              "/integrations/opencode",
+              "/integrations/roo-code",
+              "/integrations/vscode",
+              "/integrations/xcode",
+              "/integrations/zed"
            ]
          },
          {
--- a/docs/index.mdx
+++ b/docs/index.mdx
@@ -9,7 +9,7 @@ sidebarTitle: Welcome

 <CardGroup cols={2}>
  <Card title="Quickstart" icon="rocket" href="/quickstart">
-    Get up and running with your first model
+    Get up and running with your first model or integrate Ollama with your favorite tools
  </Card>
  <Card
    title="Download Ollama"
--- a/docs/integrations/claude-code.mdx
+++ b/docs/integrations/claude-code.mdx
@@ -4,7 +4,7 @@ title: Claude Code

 Claude Code is Anthropic's agentic coding tool that can read, modify, and execute code in your working directory. 

-Open models can be used with Claude Code through Ollama's Anthropic-compatible API, enabling you to use models such as `qwen3-coder`, `gpt-oss:20b`, or other models.
+Open models can be used with Claude Code through Ollama's Anthropic-compatible API, enabling you to use models such as `glm-4.7`, `qwen3-coder`, `gpt-oss`. 

 ![Claude Code with Ollama](https://files.ollama.com/claude-code.png)

@@ -26,12 +26,27 @@ irm https://claude.ai/install.ps1 | iex

 ## Usage with Ollama

+### Quick setup
+
+```shell
+ollama launch claude
+```
+
+To configure without launching:
+
+```shell
+ollama launch claude --config
+```
+
+### Manual setup
+
 Claude Code connects to Ollama using the Anthropic-compatible API.

 1. Set the environment variables:

 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama
+export ANTHROPIC_API_KEY=""
 export ANTHROPIC_BASE_URL=http://localhost:11434
 ```

@@ -44,35 +59,17 @@ claude --model gpt-oss:20b
 Or run with environment variables inline:

 ```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 claude --model gpt-oss:20b
+ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY="" claude --model qwen3-coder 
 ```

-**Note:** Claude Code requires a large context window. We recommend at least 32K tokens. See the [context length documentation](/context-length) for how to adjust context length in Ollama.
-
-## Connecting to ollama.com
-
-1. Create an [API key](https://ollama.com/settings/keys) on ollama.com
-2. Set the environment variables:
-
-```shell
-export ANTHROPIC_BASE_URL=https://ollama.com
-export ANTHROPIC_API_KEY=<your-api-key>
-```
-
-3. Run Claude Code with a cloud model:
-
-```shell
-claude --model glm-4.7:cloud
-```
+**Note:** Claude Code requires a large context window. We recommend at least 64k tokens. See the [context length documentation](/context-length) for how to adjust context length in Ollama.

 ## Recommended Models

-### Cloud models
- `glm-4.7:cloud` - High-performance cloud model
- `minimax-m2.1:cloud` - Fast cloud model
- `qwen3-coder:480b` - Large coding model
+- `qwen3-coder` 
+- `glm-4.7`
+- `gpt-oss:20b`
+- `gpt-oss:120b`
+
+Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).

-### Local models
- `qwen3-coder` - Excellent for coding tasks
- `gpt-oss:20b` - Strong general-purpose model
- `gpt-oss:120b` - Larger general-purpose model for more complex tasks
--- a/docs/integrations/clawdbot.mdx
+++ b/docs/integrations/clawdbot.mdx
@@ -0,0 +1,48 @@
+---
+title: Clawdbot
+---
+
+Clawdbot is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.
+
+## Install
+
+Install [Clawdbot](https://clawd.bot/) 
+
+```bash
+npm install -g clawdbot@latest
+```
+
+Then run the onboarding wizard:
+
+```bash
+clawdbot onboard --install-daemon
+```
+
+<Note>Clawdbot requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
+
+## Usage with Ollama
+
+### Quick setup
+
+```bash
+ollama launch clawdbot
+```
+
+This configures Clawdbot to use Ollama and starts the gateway.
+If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 
+
+
+To configure without launching:
+
+```shell
+ollama launch clawdbot --config
+```
+
+## Recommended Models
+
+- `qwen3-coder`
+- `glm-4.7`
+- `gpt-oss:20b`
+- `gpt-oss:120b`
+
+Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
--- a/docs/integrations/codex.mdx
+++ b/docs/integrations/codex.mdx
@@ -13,7 +13,21 @@ npm install -g @openai/codex

 ## Usage with Ollama

-<Note>Codex requires a larger context window. It is recommended to use a context window of at least 32K tokens.</Note>
+<Note>Codex requires a larger context window. It is recommended to use a context window of at least 64k tokens.</Note>
+
+### Quick setup
+
+```
+ollama launch codex
+```
+
+To configure without launching:
+
+```shell
+ollama launch codex --config
+```
+
+### Manual setup

 To use `codex` with Ollama, use the `--oss` flag:

--- a/docs/integrations/droid.mdx
+++ b/docs/integrations/droid.mdx
@@ -11,10 +11,24 @@ Install the [Droid CLI](https://factory.ai/):
 curl -fsSL https://app.factory.ai/cli | sh
 ```

-<Note>Droid requires a larger context window. It is recommended to use a context window of at least 32K tokens. See [Context length](/context-length) for more information.</Note>
+<Note>Droid requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>

 ## Usage with Ollama

+### Quick setup
+
+```bash
+ollama launch droid
+```
+
+To configure without launching:
+
+```shell
+ollama launch droid --config
+```
+
+### Manual setup
+
 Add a local configuration block to `~/.factory/config.json`:

 ```json
@@ -73,4 +87,4 @@ Add the cloud configuration block to `~/.factory/config.json`:
   }
   ```

-Run `droid` in a new terminal to load the new settings.
+Run `droid` in a new terminal to load the new settings.
--- a/docs/integrations/opencode.mdx
+++ b/docs/integrations/opencode.mdx
@@ -0,0 +1,106 @@
+---
+title: OpenCode
+---
+
+OpenCode is an open-source AI coding assistant that runs in your terminal.
+
+## Install
+
+Install the [OpenCode CLI](https://opencode.ai):
+
+```bash
+curl -fsSL https://opencode.ai/install.sh | bash
+```
+
+<Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
+
+## Usage with Ollama
+
+### Quick setup
+
+```bash
+ollama launch opencode
+```
+
+To configure without launching:
+
+```shell
+ollama launch opencode --config
+```
+
+### Manual setup
+
+Add a configuration block to `~/.config/opencode/opencode.json`:
+
+```json
+{
+  "$schema": "https://opencode.ai/config.json",
+  "provider": {
+    "ollama": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "Ollama",
+      "options": {
+        "baseURL": "http://localhost:11434/v1"
+      },
+      "models": {
+        "qwen3-coder": {
+          "name": "qwen3-coder"
+        }
+      }
+    }
+  }
+}
+```
+
+## Cloud Models
+
+`glm-4.7:cloud` is the recommended model for use with OpenCode.
+
+Add the cloud configuration to `~/.config/opencode/opencode.json`:
+
+```json
+{
+  "$schema": "https://opencode.ai/config.json",
+  "provider": {
+    "ollama": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "Ollama",
+      "options": {
+        "baseURL": "http://localhost:11434/v1"
+      },
+      "models": {
+        "glm-4.7:cloud": {
+          "name": "glm-4.7:cloud"
+        }
+      }
+    }
+  }
+}
+```
+
+## Connecting to ollama.com
+
+1. Create an [API key](https://ollama.com/settings/keys) from ollama.com and export it as `OLLAMA_API_KEY`.
+2. Update `~/.config/opencode/opencode.json` to point to ollama.com:
+
+```json
+{
+  "$schema": "https://opencode.ai/config.json",
+  "provider": {
+    "ollama": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "Ollama Cloud",
+      "options": {
+        "baseURL": "https://ollama.com/v1"
+      },
+      "models": {
+        "glm-4.7:cloud": {
+          "name": "glm-4.7:cloud"
+        }
+      }
+    }
+  }
+}
+```
+
+Run `opencode` in a new terminal to load the new settings.
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -18,13 +18,13 @@ This quickstart will walk your through running your first model with Ollama. To
  <Tab title="CLI">
    Open a terminal and run the command:

-    ```
+    ```sh
    ollama run gemma3
    ```

  </Tab>
  <Tab title="cURL">
-    ```
+    ```sh
    ollama pull gemma3
    ```

@@ -45,13 +45,13 @@ This quickstart will walk your through running your first model with Ollama. To
  <Tab title="Python">
    Start by downloading a model:

-    ```
+    ```sh
    ollama pull gemma3
    ```

    Then install Ollama's Python library:

-    ```
+    ```sh
    pip install ollama
    ```

@@ -101,3 +101,42 @@ This quickstart will walk your through running your first model with Ollama. To
 </Tabs>

 See a full list of available models [here](https://ollama.com/models).
+
+## Coding 
+
+For coding use cases, we recommend using the `glm-4.7-flash` model. 
+
+Note: this model requires 23 GB of VRAM with 64000 tokens context length.
+```sh
+ollama pull glm-4.7-flash 
+```
+
+Alternatively, you can use a more powerful cloud model (with full context length):
+```sh
+ollama pull glm-4.7:cloud
+```
+
+Use `ollama launch` to quickly set up a coding tool with Ollama models:
+
+```sh
+ollama launch
+```
+
+### Supported integrations
+
+- [OpenCode](/integrations/opencode) - Open-source coding assistant
+- [Claude Code](/integrations/claude-code) - Anthropic's agentic coding tool
+- [Codex](/integrations/codex) - OpenAI's coding assistant
+- [Droid](/integrations/droid) - Factory's AI coding agent
+
+### Launch with a specific model
+
+```sh
+ollama launch claude --model glm-4.7-flash
+```
+
+### Configure without launching
+
+```sh
+ollama launch claude --config
+```
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -270,6 +270,7 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
 		"glm4moelite",
+		"lfm2",
 	}, kv.Architecture())
 }

@@ -859,6 +860,7 @@ func (f GGML) FlashAttention() bool {
 		"gemma3",
 		"glm4moelite",
 		"gptoss", "gpt-oss",
+		"lfm2",
 		"mistral3",
 		"olmo3",
 		"qwen3", "qwen3moe",
--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -0,0 +1,148 @@
+//go:build integration
+
+package integration
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestImageGeneration(t *testing.T) {
+	skipUnderMinVRAM(t, 8)
+
+	type testCase struct {
+		imageGenModel string
+		visionModel   string
+		prompt        string
+		expectedWords []string
+	}
+
+	testCases := []testCase{
+		{
+			imageGenModel: "jmorgan/z-image-turbo",
+			visionModel:   "llama3.2-vision",
+			prompt:        "A cartoon style llama flying like a superhero through the air with clouds in the background",
+			expectedWords: []string{"llama", "flying", "cartoon", "cloud", "sky", "superhero", "air", "animal", "camelid"},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("%s->%s", tc.imageGenModel, tc.visionModel), func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
+			defer cancel()
+
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			// Pull both models
+			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
+				t.Fatalf("failed to pull image gen model: %v", err)
+			}
+			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
+				t.Fatalf("failed to pull vision model: %v", err)
+			}
+
+			// Generate the image
+			t.Logf("Generating image with prompt: %s", tc.prompt)
+			imageBase64, err := generateImage(ctx, client, tc.imageGenModel, tc.prompt)
+			if err != nil {
+				if strings.Contains(err.Error(), "image generation not available") {
+					t.Skip("Target system does not support image generation")
+				} else if strings.Contains(err.Error(), "executable file not found in") { // Windows pattern, not yet supported
+					t.Skip("Windows does not support image generation yet")
+				} else if strings.Contains(err.Error(), "CUDA driver version is insufficient") {
+					t.Skip("Driver is too old")
+				} else if strings.Contains(err.Error(), "insufficient memory for image generation") {
+					t.Skip("insufficient memory for image generation")
+				} else if strings.Contains(err.Error(), "error while loading shared libraries: libcuda.so.1") { // AMD GPU or CPU
+					t.Skip("CUDA GPU is not available")
+				} else if strings.Contains(err.Error(), "ollama-mlx: no such file or directory") {
+					// most likely linux arm - not supported yet
+					t.Skip("unsupported architecture")
+				}
+				t.Fatalf("failed to generate image: %v", err)
+			}
+
+			imageData, err := base64.StdEncoding.DecodeString(imageBase64)
+			if err != nil {
+				t.Fatalf("failed to decode image: %v", err)
+			}
+			t.Logf("Generated image: %d bytes", len(imageData))
+
+			// Preload vision model and check GPU loading
+			err = client.Generate(ctx, &api.GenerateRequest{Model: tc.visionModel}, func(response api.GenerateResponse) error { return nil })
+			if err != nil {
+				t.Fatalf("failed to load vision model: %v", err)
+			}
+
+			// Use vision model to describe the image
+			chatReq := api.ChatRequest{
+				Model: tc.visionModel,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "Describe this image in detail. What is shown? What style is it? What is the main subject doing?",
+						Images:  []api.ImageData{imageData},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{
+					"seed":        42,
+					"temperature": 0.0,
+				},
+			}
+
+			// Verify the vision model's response contains expected keywords
+			response := DoChat(ctx, t, client, chatReq, tc.expectedWords, 240*time.Second, 30*time.Second)
+			if response != nil {
+				t.Logf("Vision model response: %s", response.Content)
+
+				// Additional detailed check for keywords
+				content := strings.ToLower(response.Content)
+				foundWords := []string{}
+				missingWords := []string{}
+				for _, word := range tc.expectedWords {
+					if strings.Contains(content, word) {
+						foundWords = append(foundWords, word)
+					} else {
+						missingWords = append(missingWords, word)
+					}
+				}
+				t.Logf("Found keywords: %v", foundWords)
+				if len(missingWords) > 0 {
+					t.Logf("Missing keywords (at least one was found so test passed): %v", missingWords)
+				}
+			}
+		})
+	}
+}
+
+// generateImage calls the Ollama API to generate an image and returns the base64 image data
+func generateImage(ctx context.Context, client *api.Client, model, prompt string) (string, error) {
+	var imageBase64 string
+
+	err := client.Generate(ctx, &api.GenerateRequest{
+		Model:  model,
+		Prompt: prompt,
+	}, func(resp api.GenerateResponse) error {
+		if resp.Image != "" {
+			imageBase64 = resp.Image
+		}
+		return nil
+	})
+	if err != nil {
+		return "", fmt.Errorf("failed to generate image: %w", err)
+	}
+
+	if imageBase64 == "" {
+		return "", fmt.Errorf("no image data in response")
+	}
+
+	return imageBase64, nil
+}
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -38,6 +38,7 @@ var (

 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
+		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
 		"gpt-oss:20b",
@@ -143,6 +144,7 @@ var (
 		"granite3.3",
 		"hermes3",
 		"internlm2",
+		"lfm2.5-thinking",
 		"llama-guard3",
 		"llama-pro",
 		"llama2-chinese",
@@ -263,6 +265,7 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
+		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
 		"gpt-oss:120b",
--- a/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch
+++ b/llama/patches/0032-ggml-enable-MLA-flash-attention-for-GLM-4.7-flash.patch
@@ -0,0 +1,309 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: nobody <>
+Date: Sat, 24 Jan 2026 02:31:01 +0000
+Subject: [PATCH] ggml: enable MLA flash attention for GLM-4.7-flash
+
+Add support for gqa_ratio 4 in MLA flash attention kernels. GLM-4.7-flash
+uses head size 576 with gqa_ratio 4, which was previously only supported
+for gqa_ratio 16 (DeepSeek).
+
+Metal changes:
+- Enable head size 576 for flash attention
+- Increase simdgroups to 8 for large heads (>=512)
+- Add case 8 kernel dispatch for 8 simdgroups
+
+CUDA changes:
+- Add gqa_ratio 4 support for head 576/512
+- Add tile configs for (576, 512, 4) and (576, 512, 8)
+- Add MMA config cases for ncols 4
+- Add template instances for ncols2=4
+- Fix nbatch_fa values in nvidia_fp32 config (32->64)
+---
+ ggml/src/ggml-cuda/fattn-mma-f16.cuh          | 40 +++++++++++++++----
+ ggml/src/ggml-cuda/fattn-tile.cuh             | 16 ++++++++
+ ggml/src/ggml-cuda/fattn.cu                   | 12 ++++--
+ ...ttn-mma-f16-instance-ncols1_16-ncols2_4.cu |  1 +
+ ...attn-mma-f16-instance-ncols1_2-ncols2_4.cu |  1 +
+ ...attn-mma-f16-instance-ncols1_4-ncols2_4.cu |  1 +
+ ...attn-mma-f16-instance-ncols1_8-ncols2_4.cu |  1 +
+ ggml/src/ggml-metal/ggml-metal-device.m       |  8 +---
+ ggml/src/ggml-metal/ggml-metal-ops.cpp        |  2 +-
+ ggml/src/ggml-metal/ggml-metal.metal          |  1 +
+ 10 files changed, 64 insertions(+), 19 deletions(-)
+
+diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+index 7bd1044c1..3dea2205e 100644
+--- a/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+@@ -66,7 +66,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 32, 128, 2,  32, 128, 128, 128, 2, true);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 64, 128, 2,  32, 128, 128, 128, 2, true);
+ 
+-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32, 288, 256, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256, 128, 1, true);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32, 288, 256, 128, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128, 128, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128, 128, 1, false);
+@@ -80,7 +81,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 32, 128, 2,  64, 128, 128,  64, 2, true);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 64, 128, 2,  64, 128, 128,  64, 2, true);
+ 
+-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32,  96,  64, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32,  96,  64, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32,  96,  64, 128, 1, true);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32,  96,  64, 128, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128, 128, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128, 128, 1, false);
+@@ -89,7 +91,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
+ }
+ 
+ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_config_volta(const int DKQ, const int DV, const int ncols) {
+-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256,  64, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32, 288, 256,  64, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256,  64, 1, true);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32, 288, 256,  64, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128,  64, 1, false);
+     GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128,  64, 1, false);
+@@ -397,7 +400,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
+     constexpr int  ncols           = ncols1 * ncols2;
+     constexpr int  cols_per_warp   = T_B_KQ::I;
+     constexpr int  cols_per_thread = 2; // This is specifically KQ columns, Volta only has a single VKQ column.
+-    constexpr int  np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
+    constexpr int  np              = cols_per_warp > ncols ? nwarps : nwarps * cols_per_warp/ncols; // Number of parallel CUDA warps per Q column.
+     constexpr int  nbatch_fa       = ggml_cuda_fattn_mma_get_nbatch_fa(DKQ, DV, ncols);
+     constexpr int  nbatch_K2       = ggml_cuda_fattn_mma_get_nbatch_K2(DKQ, DV, ncols);
+     constexpr int  nbatch_V2       = ggml_cuda_fattn_mma_get_nbatch_V2(DKQ, DV, ncols);
+@@ -467,7 +470,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
+                 }
+             }
+         } else {
+-            static_assert(cols_per_warp != 8, "cols_per_warp == 8 not implemented");
+ #pragma unroll
+             for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += T_A_KQ::J) {
+                 load_ldmatrix(Q_B[0], tile_Q + (threadIdx.y / np)*(T_B_KQ::I*stride_tile_Q) + k_KQ_0, stride_tile_Q);
+@@ -479,8 +481,18 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
+                     T_A_KQ K_A;
+                     load_ldmatrix(K_A, tile_K + i_KQ_0*stride_tile_K + (k_KQ_0 - k0_start), stride_tile_K);
+ 
+-                    // Wide version of KQ_C is column-major => swap A and B.
+-                    mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], Q_B[0], K_A);
+                    if constexpr (cols_per_warp == 8) {
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], K_A, Q_B[0]);
+                    } else {
+                        // Wide version of KQ_C is column-major
+#if defined(AMD_WMMA_AVAILABLE)
+                        // RDNA matrix C is column-major.
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], K_A, Q_B[0]);
+#else
+                        // swap A and B for CUDA.
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], Q_B[0], K_A);
+#endif // defined(AMD_WMMA_AVAILABLE)
+                    }
+                 }
+             }
+         }
+@@ -841,7 +853,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
+ 
+     constexpr int  cols_per_warp   = T_B_KQ::I;
+     constexpr int  cols_per_thread = 2; // This is specifically KQ columns, Volta only has a single VKQ column.
+-    constexpr int  np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
+    constexpr int  np              = cols_per_warp > ncols ? nwarps : nwarps * cols_per_warp/ncols; // Number of parallel CUDA warps per Q column.
+     constexpr int  nbatch_fa       = ggml_cuda_fattn_mma_get_nbatch_fa     (DKQ, DV, ncols);
+     constexpr int  nbatch_K2       = ggml_cuda_fattn_mma_get_nbatch_K2     (DKQ, DV, ncols);
+     constexpr int  nbatch_V2       = ggml_cuda_fattn_mma_get_nbatch_V2     (DKQ, DV, ncols);
+@@ -1353,6 +1365,13 @@ static __global__ void flash_attn_ext_f16(
+         NO_DEVICE_CODE;
+         return;
+     }
+#ifdef VOLTA_MMA_AVAILABLE
+    if (ncols1*ncols2 < 32) {
+        NO_DEVICE_CODE;
+        return;
+    }
+#endif // VOLTA_MMA_AVAILABLE
+
+ #if __CUDA_ARCH__ == GGML_CUDA_CC_TURING
+     if (ncols1*ncols2 > 32) {
+         NO_DEVICE_CODE;
+@@ -1585,3 +1604,8 @@ DECL_FATTN_MMA_F16_CASE_ALL_NCOLS2(256, 256,  64)
+ extern DECL_FATTN_MMA_F16_CASE(576, 512, 1, 16);
+ extern DECL_FATTN_MMA_F16_CASE(576, 512, 2, 16);
+ extern DECL_FATTN_MMA_F16_CASE(576, 512, 4, 16);
+
+// For GLM 4.7 Flash
+extern DECL_FATTN_MMA_F16_CASE(576, 512,  4,  4);
+extern DECL_FATTN_MMA_F16_CASE(576, 512,  8,  4);
+extern DECL_FATTN_MMA_F16_CASE(576, 512, 16,  4);
+diff --git a/ggml/src/ggml-cuda/fattn-tile.cuh b/ggml/src/ggml-cuda/fattn-tile.cuh
+index 7c4d6fe67..371be7442 100644
+--- a/ggml/src/ggml-cuda/fattn-tile.cuh
+++ b/ggml/src/ggml-cuda/fattn-tile.cuh
+@@ -68,6 +68,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  64,  64)
+ 
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  64,  64)
+ 
+     return 0;
+@@ -122,6 +124,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  32, 128)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  32,  64)
+ 
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  32,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  32,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  32,  64)
+ 
+     return 0;
+@@ -183,6 +187,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  32, 128)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  32, 128)
+ 
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 32, 512, 1, 128,  64)
+ 
+@@ -245,6 +251,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 5,  32, 256)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 3,  64, 128)
+ 
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 4,  64,  64)
+     GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 32, 256, 2, 128,  64)
+ 
+@@ -1187,6 +1195,14 @@ static void launch_fattn_tile_switch_ncols2(ggml_backend_cuda_context & ctx, ggm
+             launch_fattn_tile_switch_ncols1<DKQ, DV, 16, use_logit_softcap>(ctx, dst);
+             return;
+         }
+        if (use_gqa_opt && gqa_ratio % 8 == 0) {
+            launch_fattn_tile_switch_ncols1<DKQ, DV, 8, use_logit_softcap>(ctx, dst);
+            return;
+        }
+        if (use_gqa_opt && gqa_ratio % 4 == 0) {
+            launch_fattn_tile_switch_ncols1<DKQ, DV, 4, use_logit_softcap>(ctx, dst);
+            return;
+        }
+     }
+ 
+     if constexpr (DV <= 256) {
+diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu
+index 015540666..1693479cb 100644
+--- a/ggml/src/ggml-cuda/fattn.cu
+++ b/ggml/src/ggml-cuda/fattn.cu
+@@ -111,7 +111,7 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg
+             ggml_cuda_flash_attn_ext_mma_f16_switch_ncols2<256, 256>(ctx, dst);
+             break;
+         case 576: {
+-            // For Deepseek, go straight to the ncols1 switch to avoid compiling unnecessary kernels.
+            // For Deepseek/GLM4, go straight to the ncols1 switch to avoid compiling unnecessary kernels.
+             GGML_ASSERT(V->ne[0] == 512);
+             float max_bias = 0.0f;
+             memcpy(&max_bias, (const float *) KQV->op_params + 1, sizeof(float));
+@@ -121,8 +121,12 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg
+ 
+             GGML_ASSERT(Q->ne[2] % K->ne[2] == 0);
+             const int gqa_ratio = Q->ne[2] / K->ne[2];
+-            GGML_ASSERT(gqa_ratio % 16 == 0);
+-            ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
+            GGML_ASSERT(gqa_ratio % 4 == 0);
+            if (gqa_ratio % 16 == 0) {
+                ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
+            } else {
+                ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512,  4>(ctx, dst);
+            }
+         } break;
+         default:
+             GGML_ABORT("fatal error");
+@@ -251,7 +255,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
+             if (V->ne[0] != 512) {
+                 return BEST_FATTN_KERNEL_NONE;
+             }
+-            if (!gqa_opt_applies || gqa_ratio % 16 != 0) {
+            if (!gqa_opt_applies || gqa_ratio % 4 != 0) {
+                 return BEST_FATTN_KERNEL_NONE;
+             }
+             break;
+diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
+index 2074e954a..517993cb0 100644
+--- a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
+++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
+@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 16, 4);
+ DECL_FATTN_MMA_F16_CASE(112, 112, 16, 4);
+ DECL_FATTN_MMA_F16_CASE(128, 128, 16, 4);
+ DECL_FATTN_MMA_F16_CASE(256, 256, 16, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 16, 4);
+diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
+index 24c64cf00..97b19c67a 100644
+--- a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
+++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
+@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 2, 4);
+ DECL_FATTN_MMA_F16_CASE(112, 112, 2, 4);
+ DECL_FATTN_MMA_F16_CASE(128, 128, 2, 4);
+ DECL_FATTN_MMA_F16_CASE(256, 256, 2, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 2, 4);
+diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
+index 1ada657f1..989626dfa 100644
+--- a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
+++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
+@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 4, 4);
+ DECL_FATTN_MMA_F16_CASE(112, 112, 4, 4);
+ DECL_FATTN_MMA_F16_CASE(128, 128, 4, 4);
+ DECL_FATTN_MMA_F16_CASE(256, 256, 4, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 4, 4);
+diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
+index 86d4ffae2..173de7aac 100644
+--- a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
+++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
+@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 8, 4);
+ DECL_FATTN_MMA_F16_CASE(112, 112, 8, 4);
+ DECL_FATTN_MMA_F16_CASE(128, 128, 8, 4);
+ DECL_FATTN_MMA_F16_CASE(256, 256, 8, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 8, 4);
+diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m
+index f24270bb1..7b5ee968c 100644
+--- a/ggml/src/ggml-metal/ggml-metal-device.m
+++ b/ggml/src/ggml-metal/ggml-metal-device.m
+@@ -1071,12 +1071,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
+                 op->src[0]->ne[0] != 112 &&
+                 op->src[0]->ne[0] != 128 &&
+                 op->src[0]->ne[0] != 192 &&
+-                op->src[0]->ne[0] != 256) {
+-                return false;
+-            }
+-            if (op->src[0]->ne[0] == 576) {
+-                // DeepSeek sizes
+-                // TODO: disabled for now, until optmized
+                op->src[0]->ne[0] != 256 &&
+                op->src[0]->ne[0] != 576) {
+                 return false;
+             }
+             if (op->src[1]->type != op->src[2]->type) {
+diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp
+index e99c1763f..80864f303 100644
+--- a/ggml/src/ggml-metal/ggml-metal-ops.cpp
+++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp
+@@ -2456,7 +2456,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {
+ 
+         // simdgroups per threadgroup (a.k.a. warps)
+         //nsg = ne01 <= nqptg ? MAX(4, MIN(nsgmax, MIN(ne11/ncpsg, (int64_t) pipeline.maxTotalThreadsPerThreadgroup/32))) : 4;
+-        int32_t nsg = 4;
+        int32_t nsg = ne00 >= 512 ? 8 : 4;
+ 
+         const size_t smem = FATTN_SMEM(nsg);
+ 
+diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
+index c98d269d1..d33c16079 100644
+--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
+@@ -6166,6 +6166,7 @@ kernel void kernel_flash_attn_ext(
+       //case 1: kernel_flash_attn_ext_impl<FWD_TMPL, 1>(FWD_ARGS); break;
+       //case 2: kernel_flash_attn_ext_impl<FWD_TMPL, 2>(FWD_ARGS); break;
+         case 4: kernel_flash_attn_ext_impl<FWD_TMPL, 4>(FWD_ARGS); break;
+        case 8: kernel_flash_attn_ext_impl<FWD_TMPL, 8>(FWD_ARGS); break;
+     }
+ #undef FWD_TMPL
+ #undef FWD_ARGS
--- a/manifest/layer.go
+++ b/manifest/layer.go
@@ -1,4 +1,4 @@
-package server
+package manifest

 import (
 	"crypto/sha256"
@@ -14,7 +14,7 @@ type Layer struct {
 	Size      int64  `json:"size"`
 	From      string `json:"from,omitempty"`
 	Name      string `json:"name,omitempty"` // tensor name, e.g., "text_encoder/model.embed_tokens.weight"
-	status    string
+	Status    string `json:"-"`
 }

 const (
@@ -22,7 +22,7 @@ const (
 )

 func NewLayer(r io.Reader, mediatype string) (Layer, error) {
-	blobs, err := GetBlobsPath("")
+	blobs, err := BlobsPath("")
 	if err != nil {
 		return Layer{}, err
 	}
@@ -45,7 +45,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 	}

 	digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
-	blob, err := GetBlobsPath(digest)
+	blob, err := BlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -65,7 +65,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 		MediaType: mediatype,
 		Digest:    digest,
 		Size:      n,
-		status:    fmt.Sprintf("%s %s", status, digest),
+		Status:    fmt.Sprintf("%s %s", status, digest),
 	}, nil
 }

@@ -74,7 +74,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		return Layer{}, errors.New("creating new layer from layer with empty digest")
 	}

-	blob, err := GetBlobsPath(digest)
+	blob, err := BlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -89,7 +89,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		Digest:    digest,
 		Size:      fi.Size(),
 		From:      from,
-		status:    fmt.Sprintf("using existing layer %s", digest),
+		Status:    fmt.Sprintf("using existing layer %s", digest),
 	}, nil
 }

@@ -98,7 +98,7 @@ func (l *Layer) Open() (io.ReadSeekCloser, error) {
 		return nil, errors.New("opening layer with empty digest")
 	}

-	blob, err := GetBlobsPath(l.Digest)
+	blob, err := BlobsPath(l.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -126,7 +126,7 @@ func (l *Layer) Remove() error {
 		}
 	}

-	blob, err := GetBlobsPath(l.Digest)
+	blob, err := BlobsPath(l.Digest)
 	if err != nil {
 		return err
 	}
--- a/manifest/manifest.go
+++ b/manifest/manifest.go
@@ -1,10 +1,9 @@
-package server
+package manifest

 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -33,12 +32,38 @@ func (m *Manifest) Size() (size int64) {
 	return
 }

+func (m *Manifest) Digest() string {
+	return m.digest
+}
+
+func (m *Manifest) FileInfo() os.FileInfo {
+	return m.fi
+}
+
+// ReadConfigJSON reads and unmarshals a config layer as JSON.
+func (m *Manifest) ReadConfigJSON(configPath string, v any) error {
+	for _, layer := range m.Layers {
+		if layer.MediaType == "application/vnd.ollama.image.json" && layer.Name == configPath {
+			blobPath, err := BlobsPath(layer.Digest)
+			if err != nil {
+				return err
+			}
+			data, err := os.ReadFile(blobPath)
+			if err != nil {
+				return err
+			}
+			return json.Unmarshal(data, v)
+		}
+	}
+	return fmt.Errorf("config %q not found in manifest", configPath)
+}
+
 func (m *Manifest) Remove() error {
 	if err := os.Remove(m.filepath); err != nil {
 		return err
 	}

-	manifests, err := GetManifestPath()
+	manifests, err := Path()
 	if err != nil {
 		return err
 	}
@@ -70,11 +95,11 @@ func (m *Manifest) RemoveLayers() error {
 		if _, used := inUse[layer.Digest]; used {
 			continue
 		}
-		blob, err := GetBlobsPath(layer.Digest)
+		blob, err := BlobsPath(layer.Digest)
 		if err != nil {
 			return err
 		}
-		if err := os.Remove(blob); errors.Is(err, os.ErrNotExist) {
+		if err := os.Remove(blob); os.IsNotExist(err) {
 			slog.Debug("layer does not exist", "digest", layer.Digest)
 		} else if err != nil {
 			return err
@@ -89,7 +114,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 		return nil, model.Unqualified(n)
 	}

-	manifests, err := GetManifestPath()
+	manifests, err := Path()
 	if err != nil {
 		return nil, err
 	}
@@ -121,7 +146,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 }

 func WriteManifest(name model.Name, config Layer, layers []Layer) error {
-	manifests, err := GetManifestPath()
+	manifests, err := Path()
 	if err != nil {
 		return err
 	}
@@ -148,7 +173,7 @@ func WriteManifest(name model.Name, config Layer, layers []Layer) error {
 }

 func Manifests(continueOnError bool) (map[model.Name]*Manifest, error) {
-	manifests, err := GetManifestPath()
+	manifests, err := Path()
 	if err != nil {
 		return nil, err
 	}
--- a/manifest/manifest_test.go
+++ b/manifest/manifest_test.go
@@ -1,4 +1,4 @@
-package server
+package manifest

 import (
 	"encoding/json"
--- a/manifest/paths.go
+++ b/manifest/paths.go
@@ -0,0 +1,95 @@
+package manifest
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/types/model"
+)
+
+var ErrInvalidDigestFormat = errors.New("invalid digest format")
+
+func Path() (string, error) {
+	path := filepath.Join(envconfig.Models(), "manifests")
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
+	}
+
+	return path, nil
+}
+
+// PathForName returns the path to the manifest file for a specific model name.
+func PathForName(n model.Name) (string, error) {
+	if !n.IsValid() {
+		return "", os.ErrNotExist
+	}
+
+	manifests, err := Path()
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(manifests, n.Filepath()), nil
+}
+
+func BlobsPath(digest string) (string, error) {
+	// only accept actual sha256 digests
+	pattern := "^sha256[:-][0-9a-fA-F]{64}$"
+	re := regexp.MustCompile(pattern)
+
+	if digest != "" && !re.MatchString(digest) {
+		return "", ErrInvalidDigestFormat
+	}
+
+	digest = strings.ReplaceAll(digest, ":", "-")
+	path := filepath.Join(envconfig.Models(), "blobs", digest)
+	dirPath := filepath.Dir(path)
+	if digest == "" {
+		dirPath = path
+	}
+
+	if err := os.MkdirAll(dirPath, 0o755); err != nil {
+		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
+	}
+
+	return path, nil
+}
+
+// PruneDirectory removes empty directories recursively.
+func PruneDirectory(path string) error {
+	info, err := os.Lstat(path)
+	if err != nil {
+		return err
+	}
+
+	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
+		entries, err := os.ReadDir(path)
+		if err != nil {
+			return err
+		}
+
+		for _, entry := range entries {
+			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
+				return err
+			}
+		}
+
+		entries, err = os.ReadDir(path)
+		if err != nil {
+			return err
+		}
+
+		if len(entries) > 0 {
+			return nil
+		}
+
+		return os.Remove(path)
+	}
+
+	return nil
+}
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -609,3 +609,49 @@ func ImageGenerationsMiddleware() gin.HandlerFunc {
 		c.Next()
 	}
 }
+
+func ImageEditsMiddleware() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req openai.ImageEditRequest
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		if req.Prompt == "" {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
+			return
+		}
+
+		if req.Model == "" {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
+			return
+		}
+
+		if req.Image == "" {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "image is required"))
+			return
+		}
+
+		genReq, err := openai.FromImageEditRequest(req)
+		if err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		var b bytes.Buffer
+		if err := json.NewEncoder(&b).Encode(genReq); err != nil {
+			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
+			return
+		}
+
+		c.Request.Body = io.NopCloser(&b)
+
+		w := &ImageWriter{
+			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
+		}
+
+		c.Writer = w
+		c.Next()
+	}
+}
--- a/middleware/openai_test.go
+++ b/middleware/openai_test.go
@@ -1112,3 +1112,129 @@ func TestImageWriterResponse(t *testing.T) {
 		t.Errorf("expected image data 'dGVzdC1pbWFnZS1kYXRh', got %s", imageResp.Data[0].B64JSON)
 	}
 }
+
+func TestImageEditsMiddleware(t *testing.T) {
+	type testCase struct {
+		name string
+		body string
+		req  api.GenerateRequest
+		err  openai.ErrorResponse
+	}
+
+	var capturedRequest *api.GenerateRequest
+
+	// Base64-encoded test image (1x1 pixel PNG)
+	testImage := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII="
+	decodedImage, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=")
+
+	testCases := []testCase{
+		{
+			name: "image edit basic",
+			body: `{
+				"model": "test-model",
+				"prompt": "make it blue",
+				"image": "` + testImage + `"
+			}`,
+			req: api.GenerateRequest{
+				Model:  "test-model",
+				Prompt: "make it blue",
+				Images: []api.ImageData{decodedImage},
+			},
+		},
+		{
+			name: "image edit with size",
+			body: `{
+				"model": "test-model",
+				"prompt": "make it blue",
+				"image": "` + testImage + `",
+				"size": "512x768"
+			}`,
+			req: api.GenerateRequest{
+				Model:  "test-model",
+				Prompt: "make it blue",
+				Images: []api.ImageData{decodedImage},
+				Width:  512,
+				Height: 768,
+			},
+		},
+		{
+			name: "image edit missing prompt",
+			body: `{
+				"model": "test-model",
+				"image": "` + testImage + `"
+			}`,
+			err: openai.ErrorResponse{
+				Error: openai.Error{
+					Message: "prompt is required",
+					Type:    "invalid_request_error",
+				},
+			},
+		},
+		{
+			name: "image edit missing model",
+			body: `{
+				"prompt": "make it blue",
+				"image": "` + testImage + `"
+			}`,
+			err: openai.ErrorResponse{
+				Error: openai.Error{
+					Message: "model is required",
+					Type:    "invalid_request_error",
+				},
+			},
+		},
+		{
+			name: "image edit missing image",
+			body: `{
+				"model": "test-model",
+				"prompt": "make it blue"
+			}`,
+			err: openai.ErrorResponse{
+				Error: openai.Error{
+					Message: "image is required",
+					Type:    "invalid_request_error",
+				},
+			},
+		},
+	}
+
+	endpoint := func(c *gin.Context) {
+		c.Status(http.StatusOK)
+	}
+
+	gin.SetMode(gin.TestMode)
+	router := gin.New()
+	router.Use(ImageEditsMiddleware(), captureRequestMiddleware(&capturedRequest))
+	router.Handle(http.MethodPost, "/api/generate", endpoint)
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
+			req.Header.Set("Content-Type", "application/json")
+
+			defer func() { capturedRequest = nil }()
+
+			resp := httptest.NewRecorder()
+			router.ServeHTTP(resp, req)
+
+			if tc.err.Error.Message != "" {
+				var errResp openai.ErrorResponse
+				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
+					t.Fatal(err)
+				}
+				if diff := cmp.Diff(tc.err, errResp); diff != "" {
+					t.Fatalf("errors did not match:\n%s", diff)
+				}
+				return
+			}
+
+			if resp.Code != http.StatusOK {
+				t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
+			}
+
+			if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
+				t.Fatalf("requests did not match:\n%s", diff)
+			}
+		})
+	}
+}
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -162,6 +162,7 @@ type Tensor interface {
 	AvgPool2D(ctx Context, k, s int, p float32) Tensor
 	Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
 	Conv3D(ctx Context, weight Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) Tensor
+	SSMConv(ctx Context, kernel Tensor) Tensor

 	IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor

--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1641,6 +1641,13 @@ func (t *Tensor) Conv3D(ctx ml.Context, t2 ml.Tensor, c, s0, s1, s2, p0, p1, p2,
 	return tt
 }

+func (t *Tensor) SSMConv(ctx ml.Context, kernel ml.Tensor) ml.Tensor {
+	return &Tensor{
+		b: t.b,
+		t: C.ggml_ssm_conv(ctx.(*Context).ctx, t.t, kernel.(*Tensor).t),
+	}
+}
+
 func (t *Tensor) AvgPool2D(ctx ml.Context, k, s int, p float32) ml.Tensor {
 	return &Tensor{
 		b: t.b,
--- a/ml/backend/ggml/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-mma-f16.cuh
@@ -66,7 +66,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 32, 128, 2,  32, 128, 128, 128, 2, true);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 64, 128, 2,  32, 128, 128, 128, 2, true);

-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32, 288, 256, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256, 128, 1, true);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32, 288, 256, 128, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128, 128, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128, 128, 1, false);
@@ -80,7 +81,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 32, 128, 2,  64, 128, 128,  64, 2, true);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(256, 256, 64, 128, 2,  64, 128, 128,  64, 2, true);

-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32,  96,  64, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32,  96,  64, 128, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32,  96,  64, 128, 1, true);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32,  96,  64, 128, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128, 128, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128, 128, 1, false);
@@ -89,7 +91,8 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
 }

 static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_config_volta(const int DKQ, const int DV, const int ncols) {
-    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256,  64, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  4,  64, 4,  32, 288, 256,  64, 1, false);
+    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512,  8,  64, 4,  32, 288, 256,  64, 1, true);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 16,  64, 4,  32, 288, 256,  64, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 32, 128, 2,  32, 160, 128,  64, 1, false);
    GGML_CUDA_FATTN_MMA_CONFIG_CASE(576, 512, 64, 256, 1,  32, 160, 128,  64, 1, false);
@@ -397,7 +400,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
    constexpr int  ncols           = ncols1 * ncols2;
    constexpr int  cols_per_warp   = T_B_KQ::I;
    constexpr int  cols_per_thread = 2; // This is specifically KQ columns, Volta only has a single VKQ column.
-    constexpr int  np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
+    constexpr int  np              = cols_per_warp > ncols ? nwarps : nwarps * cols_per_warp/ncols; // Number of parallel CUDA warps per Q column.
    constexpr int  nbatch_fa       = ggml_cuda_fattn_mma_get_nbatch_fa(DKQ, DV, ncols);
    constexpr int  nbatch_K2       = ggml_cuda_fattn_mma_get_nbatch_K2(DKQ, DV, ncols);
    constexpr int  nbatch_V2       = ggml_cuda_fattn_mma_get_nbatch_V2(DKQ, DV, ncols);
@@ -467,7 +470,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
                }
            }
        } else {
-            static_assert(cols_per_warp != 8, "cols_per_warp == 8 not implemented");
 #pragma unroll
            for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += T_A_KQ::J) {
                load_ldmatrix(Q_B[0], tile_Q + (threadIdx.y / np)*(T_B_KQ::I*stride_tile_Q) + k_KQ_0, stride_tile_Q);
@@ -479,8 +481,18 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
                    T_A_KQ K_A;
                    load_ldmatrix(K_A, tile_K + i_KQ_0*stride_tile_K + (k_KQ_0 - k0_start), stride_tile_K);

-                    // Wide version of KQ_C is column-major => swap A and B.
-                    mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], Q_B[0], K_A);
+                    if constexpr (cols_per_warp == 8) {
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], K_A, Q_B[0]);
+                    } else {
+                        // Wide version of KQ_C is column-major
+#if defined(AMD_WMMA_AVAILABLE)
+                        // RDNA matrix C is column-major.
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], K_A, Q_B[0]);
+#else
+                        // swap A and B for CUDA.
+                        mma(KQ_C[i_KQ_00/(np*T_A_KQ::I)], Q_B[0], K_A);
+#endif // defined(AMD_WMMA_AVAILABLE)
+                    }
                }
            }
        }
@@ -841,7 +853,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(

    constexpr int  cols_per_warp   = T_B_KQ::I;
    constexpr int  cols_per_thread = 2; // This is specifically KQ columns, Volta only has a single VKQ column.
-    constexpr int  np              = nwarps * (cols_per_warp/ncols2) / ncols1; // Number of parallel CUDA warps per Q column.
+    constexpr int  np              = cols_per_warp > ncols ? nwarps : nwarps * cols_per_warp/ncols; // Number of parallel CUDA warps per Q column.
    constexpr int  nbatch_fa       = ggml_cuda_fattn_mma_get_nbatch_fa     (DKQ, DV, ncols);
    constexpr int  nbatch_K2       = ggml_cuda_fattn_mma_get_nbatch_K2     (DKQ, DV, ncols);
    constexpr int  nbatch_V2       = ggml_cuda_fattn_mma_get_nbatch_V2     (DKQ, DV, ncols);
@@ -1353,6 +1365,13 @@ static __global__ void flash_attn_ext_f16(
        NO_DEVICE_CODE;
        return;
    }
+#ifdef VOLTA_MMA_AVAILABLE
+    if (ncols1*ncols2 < 32) {
+        NO_DEVICE_CODE;
+        return;
+    }
+#endif // VOLTA_MMA_AVAILABLE
+
 #if __CUDA_ARCH__ == GGML_CUDA_CC_TURING
    if (ncols1*ncols2 > 32) {
        NO_DEVICE_CODE;
@@ -1585,3 +1604,8 @@ DECL_FATTN_MMA_F16_CASE_ALL_NCOLS2(256, 256,  64)
 extern DECL_FATTN_MMA_F16_CASE(576, 512, 1, 16);
 extern DECL_FATTN_MMA_F16_CASE(576, 512, 2, 16);
 extern DECL_FATTN_MMA_F16_CASE(576, 512, 4, 16);
+
+// For GLM 4.7 Flash
+extern DECL_FATTN_MMA_F16_CASE(576, 512,  4,  4);
+extern DECL_FATTN_MMA_F16_CASE(576, 512,  8,  4);
+extern DECL_FATTN_MMA_F16_CASE(576, 512, 16,  4);
--- a/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile.cuh
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn-tile.cuh
@@ -68,6 +68,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  64,  64)

+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  64,  64)

    return 0;
@@ -122,6 +124,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  32, 128)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  32,  64)

+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  32,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  32,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  32,  64)

    return 0;
@@ -183,6 +187,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2,  32, 128)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2,  32, 128)

+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 2,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 32, 512, 1, 128,  64)

@@ -245,6 +251,8 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 5,  32, 256)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 3,  64, 128)

+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  4, 128, 2,  64,  64)
+    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512,  8, 256, 2,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 16, 256, 4,  64,  64)
    GGML_CUDA_FATTN_TILE_CONFIG_CASE(576, 512, 32, 256, 2, 128,  64)

@@ -1187,6 +1195,14 @@ static void launch_fattn_tile_switch_ncols2(ggml_backend_cuda_context & ctx, ggm
            launch_fattn_tile_switch_ncols1<DKQ, DV, 16, use_logit_softcap>(ctx, dst);
            return;
        }
+        if (use_gqa_opt && gqa_ratio % 8 == 0) {
+            launch_fattn_tile_switch_ncols1<DKQ, DV, 8, use_logit_softcap>(ctx, dst);
+            return;
+        }
+        if (use_gqa_opt && gqa_ratio % 4 == 0) {
+            launch_fattn_tile_switch_ncols1<DKQ, DV, 4, use_logit_softcap>(ctx, dst);
+            return;
+        }
    }

    if constexpr (DV <= 256) {
--- a/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/fattn.cu
@@ -111,7 +111,7 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg
            ggml_cuda_flash_attn_ext_mma_f16_switch_ncols2<256, 256>(ctx, dst);
            break;
        case 576: {
-            // For Deepseek, go straight to the ncols1 switch to avoid compiling unnecessary kernels.
+            // For Deepseek/GLM4, go straight to the ncols1 switch to avoid compiling unnecessary kernels.
            GGML_ASSERT(V->ne[0] == 512);
            float max_bias = 0.0f;
            memcpy(&max_bias, (const float *) KQV->op_params + 1, sizeof(float));
@@ -121,8 +121,12 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg

            GGML_ASSERT(Q->ne[2] % K->ne[2] == 0);
            const int gqa_ratio = Q->ne[2] / K->ne[2];
-            GGML_ASSERT(gqa_ratio % 16 == 0);
-            ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
+            GGML_ASSERT(gqa_ratio % 4 == 0);
+            if (gqa_ratio % 16 == 0) {
+                ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
+            } else {
+                ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512,  4>(ctx, dst);
+            }
        } break;
        default:
            GGML_ABORT("fatal error");
@@ -251,7 +255,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
            if (V->ne[0] != 512) {
                return BEST_FATTN_KERNEL_NONE;
            }
-            if (!gqa_opt_applies || gqa_ratio % 16 != 0) {
+            if (!gqa_opt_applies || gqa_ratio % 4 != 0) {
                return BEST_FATTN_KERNEL_NONE;
            }
            break;
--- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 16, 4);
 DECL_FATTN_MMA_F16_CASE(112, 112, 16, 4);
 DECL_FATTN_MMA_F16_CASE(128, 128, 16, 4);
 DECL_FATTN_MMA_F16_CASE(256, 256, 16, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 16, 4);
--- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 2, 4);
 DECL_FATTN_MMA_F16_CASE(112, 112, 2, 4);
 DECL_FATTN_MMA_F16_CASE(128, 128, 2, 4);
 DECL_FATTN_MMA_F16_CASE(256, 256, 2, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 2, 4);
--- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 4, 4);
 DECL_FATTN_MMA_F16_CASE(112, 112, 4, 4);
 DECL_FATTN_MMA_F16_CASE(128, 128, 4, 4);
 DECL_FATTN_MMA_F16_CASE(256, 256, 4, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 4, 4);
--- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
@@ -8,3 +8,4 @@ DECL_FATTN_MMA_F16_CASE(96, 96, 8, 4);
 DECL_FATTN_MMA_F16_CASE(112, 112, 8, 4);
 DECL_FATTN_MMA_F16_CASE(128, 128, 8, 4);
 DECL_FATTN_MMA_F16_CASE(256, 256, 8, 4);
+DECL_FATTN_MMA_F16_CASE(576, 512, 8, 4);
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-device.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-device.m
@@ -1071,12 +1071,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
                op->src[0]->ne[0] != 112 &&
                op->src[0]->ne[0] != 128 &&
                op->src[0]->ne[0] != 192 &&
-                op->src[0]->ne[0] != 256) {
-                return false;
-            }
-            if (op->src[0]->ne[0] == 576) {
-                // DeepSeek sizes
-                // TODO: disabled for now, until optmized
+                op->src[0]->ne[0] != 256 &&
+                op->src[0]->ne[0] != 576) {
                return false;
            }
            if (op->src[1]->type != op->src[2]->type) {
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
@@ -8967,6 +8967,7 @@ kernel void kernel_flash_attn_ext(
      //case 1: kernel_flash_attn_ext_impl<FWD_TMPL, 1>(FWD_ARGS); break;
      //case 2: kernel_flash_attn_ext_impl<FWD_TMPL, 2>(FWD_ARGS); break;
        case 4: kernel_flash_attn_ext_impl<FWD_TMPL, 4>(FWD_ARGS); break;
+        case 8: kernel_flash_attn_ext_impl<FWD_TMPL, 8>(FWD_ARGS); break;
    }
 #undef FWD_TMPL
 #undef FWD_ARGS
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-ops.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-ops.cpp
@@ -2456,7 +2456,7 @@ int ggml_metal_op_flash_attn_ext(ggml_metal_op_t ctx, int idx) {

        // simdgroups per threadgroup (a.k.a. warps)
        //nsg = ne01 <= nqptg ? MAX(4, MIN(nsgmax, MIN(ne11/ncpsg, (int64_t) pipeline.maxTotalThreadsPerThreadgroup/32))) : 4;
-        int32_t nsg = 4;
+        int32_t nsg = ne00 >= 512 ? 8 : 4;

        const size_t smem = FATTN_SMEM(nsg);

--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.metal
@@ -6166,6 +6166,7 @@ kernel void kernel_flash_attn_ext(
      //case 1: kernel_flash_attn_ext_impl<FWD_TMPL, 1>(FWD_ARGS); break;
      //case 2: kernel_flash_attn_ext_impl<FWD_TMPL, 2>(FWD_ARGS); break;
        case 4: kernel_flash_attn_ext_impl<FWD_TMPL, 4>(FWD_ARGS); break;
+        case 8: kernel_flash_attn_ext_impl<FWD_TMPL, 8>(FWD_ARGS); break;
    }
 #undef FWD_TMPL
 #undef FWD_ARGS
--- a/model/model.go
+++ b/model/model.go
@@ -39,6 +39,13 @@ type Model interface {
 	Config() config
 }

+// Validator is an optional interface that models can implement to perform
+// validation after tensors have been loaded. If validation fails, model
+// loading will fail with the returned error.
+type Validator interface {
+	Validate() error
+}
+
 // MultimodalProcessor must be implemented by multimodal models.
 type MultimodalProcessor interface {
 	// EncodeMultimodal processes a single input (such as an image) and
@@ -116,6 +123,13 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
 	base := Base{b: b, config: m.Config()}
 	v := reflect.ValueOf(m)
 	v.Elem().Set(populateFields(base, v.Elem()))
+
+	if validator, ok := m.(Validator); ok {
+		if err := validator.Validate(); err != nil {
+			return nil, err
+		}
+	}
+
 	return m, nil
 }

--- a/model/models/glm4moelite/model.go
+++ b/model/models/glm4moelite/model.go
@@ -1,6 +1,7 @@
 package glm4moelite

 import (
+	"errors"
 	"math"

 	"github.com/ollama/ollama/fs"
@@ -11,6 +12,8 @@ import (
 	"github.com/ollama/ollama/model/input"
 )

+var ErrOldModelFormat = errors.New("this model uses a weight format that is no longer supported; please re-download it")
+
 type Options struct {
 	numExpertsUsed      int
 	numExperts          int
@@ -47,7 +50,9 @@ type Attention struct {

 	KVA     *nn.Linear  `gguf:"attn_kv_a_mqa"`
 	KVANorm *nn.RMSNorm `gguf:"attn_kv_a_norm"`
-	KVB     *nn.Linear  `gguf:"attn_kv_b"`
+
+	KB *nn.Linear `gguf:"attn_k_b"`
+	VB *nn.Linear `gguf:"attn_v_b"`

 	Output *nn.Linear `gguf:"attn_out,alt:attn_output"`
 }
@@ -78,15 +83,16 @@ func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor
 	qRot := opts.applyRotaryPositionEmbeddings(ctx, queryChunks[1], positions)
 	kRot = opts.applyRotaryPositionEmbeddings(ctx, kRot, positions)
 	kPass = attn.KVANorm.Forward(ctx, kPass, opts.eps)
-	kPass = attn.KVB.Forward(ctx, kPass)

-	kv := kPass.Reshape(ctx, kPass.Dim(0)/opts.numKVHeads, opts.numKVHeads, seqLength)
-	kvChunks := kv.ChunkSections(ctx, 0, opts.kqNopeHeadDim, opts.vHeadDim)
+	// MLA absorption: absorb K projection into query
+	qPass := queryChunks[0].Permute(ctx, 0, 2, 1, 3)
+	qPassAbsorb := attn.KB.Forward(ctx, qPass).Permute(ctx, 0, 2, 1, 3)
+	query = qRot.Concat(ctx, qPassAbsorb, 0)

-	kRot = kRot.Repeat(ctx, 1, queryChunks[0].Dim(1))
-	query = qRot.Concat(ctx, queryChunks[0], 0)
-	key := kRot.Concat(ctx, kvChunks[0], 0)
-	attention := nn.Attention(ctx, query, key, kvChunks[1], opts.kqScale, cache)
+	kPass = kPass.Reshape(ctx, opts.kvLoraRank, 1, seqLength)
+	key := kRot.Concat(ctx, kPass, 0)
+
+	attention := nn.AttentionWithVMLA(ctx, query, key, kPass, nil, attn.VB.Weight, opts.kqScale, cache)

 	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), seqLength)
 	return attn.Output.Forward(ctx, attention)
@@ -217,7 +223,6 @@ func New(c fs.Config) (model.Model, error) {

 	keyLength := int(c.Uint("attention.key_length"))
 	valueLength := int(c.Uint("attention.value_length"))
-
 	kqScale := 1.0 / math.Sqrt(float64(keyLength))

 	var pre []string
@@ -236,7 +241,7 @@ func New(c fs.Config) (model.Model, error) {
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
+				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
 				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				EOS: append(
@@ -279,6 +284,15 @@ func (m Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor
 	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
 }

+func (m *Model) Validate() error {
+	for _, layer := range m.Layers {
+		if layer.Attention != nil && (layer.Attention.KB == nil || layer.Attention.VB == nil) {
+			return ErrOldModelFormat
+		}
+	}
+	return nil
+}
+
 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))

--- a/model/models/glm4moelite/model_test.go
+++ b/model/models/glm4moelite/model_test.go
@@ -0,0 +1,73 @@
+package glm4moelite
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/ml/nn"
+)
+
+func TestValidate(t *testing.T) {
+	tests := []struct {
+		name    string
+		model   *Model
+		wantErr bool
+	}{
+		{
+			name: "valid model with KB and VB",
+			model: &Model{
+				Layers: []Layer{
+					{Attention: &Attention{KB: &nn.Linear{}, VB: &nn.Linear{}}},
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "missing KB",
+			model: &Model{
+				Layers: []Layer{
+					{Attention: &Attention{VB: &nn.Linear{}}},
+				},
+			},
+			wantErr: true,
+		},
+		{
+			name: "missing VB",
+			model: &Model{
+				Layers: []Layer{
+					{Attention: &Attention{KB: &nn.Linear{}}},
+				},
+			},
+			wantErr: true,
+		},
+		{
+			name: "missing both KB and VB",
+			model: &Model{
+				Layers: []Layer{
+					{Attention: &Attention{}},
+				},
+			},
+			wantErr: true,
+		},
+		{
+			name: "nil Attention is ok",
+			model: &Model{
+				Layers: []Layer{
+					{Attention: nil},
+				},
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.model.Validate()
+			if (err != nil) != tt.wantErr {
+				t.Errorf("Validate() error = %v, wantErr %v", err, tt.wantErr)
+			}
+			if tt.wantErr && err != ErrOldModelFormat {
+				t.Errorf("Validate() error = %v, want %v", err, ErrOldModelFormat)
+			}
+		})
+	}
+}
--- a/model/models/lfm2/cache.go
+++ b/model/models/lfm2/cache.go
@@ -0,0 +1,410 @@
+package lfm2
+
+import (
+	"slices"
+
+	"github.com/ollama/ollama/kvcache"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/model/input"
+)
+
+var _ kvcache.Cache = (*HybridCache)(nil)
+
+// HybridCache stores:
+// - a standard causal KV cache for attention layers
+// - a per-sequence recurrent conv state for shortconv layers
+//
+// Conv state shape (per layer, per sequence): [dConv, hiddenSize] where dConv = L_cache - 1.
+// Stored internally as a tensor of shape [dConv * hiddenSize, maxSlots].
+type HybridCache struct {
+	kv *kvcache.Causal
+
+	backend      ml.Backend
+	dtype        ml.DType
+	maxSequences int
+
+	hiddenSize int
+	dConv      int
+
+	// slot mapping for recurrent state
+	slotForSeq map[int]int
+	refCount   []int
+	freeSlots  []int
+
+	// per-layer conv state buffers (allocated lazily)
+	convCtxs   map[int]ml.Context
+	convStates map[int]ml.Tensor // [dConv*hiddenSize, maxSlots]
+
+	// current forward batch (derived in StartForward)
+	curSeqs       []int
+	curSlots      []int
+	curSlotsInput ml.Tensor
+	curSeqTokens  int
+
+	// track if EnsureWritable has been called for this forward pass
+	writableEnsured bool
+	// track any error from EnsureWritable to propagate later
+	writableError error
+}
+
+func NewHybridCache(shift func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error), hiddenSize, dConv int) *HybridCache {
+	return &HybridCache{
+		kv:         kvcache.NewCausalCache(shift),
+		hiddenSize: hiddenSize,
+		dConv:      dConv,
+		slotForSeq: make(map[int]int),
+		convCtxs:   make(map[int]ml.Context),
+		convStates: make(map[int]ml.Tensor),
+	}
+}
+
+func (c *HybridCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
+	c.backend = backend
+	c.dtype = dtype
+	c.maxSequences = maxSequences
+
+	// initialize slot allocator
+	c.refCount = make([]int, maxSequences)
+	c.freeSlots = c.freeSlots[:0]
+	for i := maxSequences - 1; i >= 0; i-- {
+		c.freeSlots = append(c.freeSlots, i)
+	}
+
+	c.kv.Init(backend, dtype, maxSequences, capacity, maxBatch)
+}
+
+func (c *HybridCache) Close() {
+	for _, ctx := range c.convCtxs {
+		ctx.Close()
+	}
+	c.kv.Close()
+}
+
+func (c *HybridCache) SetConfig(config ml.CacheConfig) {
+	c.kv.SetConfig(config)
+}
+
+func (c *HybridCache) SetLayer(layer int) {
+	c.kv.SetLayer(layer)
+}
+
+func (c *HybridCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
+	return c.kv.Get(ctx)
+}
+
+func (c *HybridCache) Put(ctx ml.Context, key, value ml.Tensor) {
+	c.kv.Put(ctx, key, value)
+}
+
+func (c *HybridCache) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
+	if err := c.kv.StartForward(ctx, batch, reserve); err != nil {
+		return err
+	}
+
+	// Derive equal-length sequence layout for shortconv.
+	// LFM2 shortconv assumes tokens form a [seq_tokens, seqs] grid.
+	seqCounts := make(map[int]int)
+	c.curSeqs = c.curSeqs[:0]
+	for _, s := range batch.Sequences {
+		if _, ok := seqCounts[s]; !ok {
+			c.curSeqs = append(c.curSeqs, s)
+		}
+		seqCounts[s]++
+	}
+
+	if len(c.curSeqs) == 0 {
+		return nil
+	}
+
+	nTokens := len(batch.Sequences)
+	nSeqs := len(c.curSeqs)
+	want := nTokens / nSeqs
+	for _, s := range c.curSeqs {
+		if seqCounts[s] != want {
+			return kvcache.ErrNotSupported
+		}
+	}
+
+	c.curSeqTokens = want
+
+	// When reserving memory for estimation, use fake slot assignments
+	// without modifying permanent state (slotForSeq, refCount)
+	if reserve {
+		c.curSlots = c.curSlots[:0]
+		slots := make([]int32, nSeqs)
+		for i := range nSeqs {
+			c.curSlots = append(c.curSlots, i)
+			slots[i] = int32(i)
+		}
+		c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
+		return nil
+	}
+
+	// Ensure slots exist for sequences in this batch
+	c.curSlots = c.curSlots[:0]
+	var newSlots []int // track newly allocated slots that need zeroing
+	for _, s := range c.curSeqs {
+		slot, ok := c.slotForSeq[s]
+		if !ok {
+			var err error
+			slot, err = c.allocSlot()
+			if err != nil {
+				return err
+			}
+			c.slotForSeq[s] = slot
+			c.refCount[slot] = 1
+			newSlots = append(newSlots, slot)
+		}
+		c.curSlots = append(c.curSlots, slot)
+	}
+
+	// Zero conv state for newly allocated slots to clear stale data from previous sequences
+	if len(newSlots) > 0 {
+		c.zeroConvSlots(ctx, newSlots)
+	}
+
+	// Create a tensor for the current slots
+	slots := make([]int32, len(c.curSlots))
+	for i, v := range c.curSlots {
+		slots[i] = int32(v)
+	}
+	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
+
+	// Reset writable state for new forward pass
+	c.writableEnsured = false
+	c.writableError = nil
+
+	return nil
+}
+
+func (c *HybridCache) allocSlot() (int, error) {
+	if len(c.freeSlots) == 0 {
+		return 0, kvcache.ErrKvCacheFull
+	}
+	slot := c.freeSlots[len(c.freeSlots)-1]
+	c.freeSlots = c.freeSlots[:len(c.freeSlots)-1]
+	return slot, nil
+}
+
+func (c *HybridCache) freeSlot(slot int) {
+	// Bounds check before freeing
+	if slot >= 0 && slot < c.maxSequences {
+		c.freeSlots = append(c.freeSlots, slot)
+	}
+}
+
+// zeroConvSlots zeros the conv state for the given slots across all layers.
+// This must be called when recycling slots to prevent stale state from affecting new sequences.
+func (c *HybridCache) zeroConvSlots(ctx ml.Context, slots []int) {
+	if len(slots) == 0 || len(c.convStates) == 0 {
+		return
+	}
+
+	// Use input context for creating tensors
+	inputCtx := ctx.Input()
+
+	// Create slot indices tensor
+	slotIndices := make([]int32, len(slots))
+	for i, s := range slots {
+		slotIndices[i] = int32(s)
+	}
+	slotsTensor := inputCtx.FromInts(slotIndices, len(slotIndices))
+
+	// Create zero tensor for the slots (SetRows requires F32 source)
+	zeros := inputCtx.Zeros(ml.DTypeF32, c.dConv*c.hiddenSize, len(slots))
+
+	// Zero each layer's conv state for these slots
+	for _, buf := range c.convStates {
+		ctx.Forward(buf.SetRows(ctx, zeros, slotsTensor))
+	}
+}
+
+// EnsureWritable ensures that sequences in the current batch have private (non-shared) conv slots.
+// Returns an error if slot allocation fails.
+func (c *HybridCache) EnsureWritable(ctx ml.Context) error {
+	for i, seq := range c.curSeqs {
+		slot, ok := c.slotForSeq[seq]
+		if !ok {
+			continue
+		}
+
+		// Bounds check
+		if slot < 0 || slot >= len(c.refCount) {
+			continue
+		}
+
+		if c.refCount[slot] <= 1 {
+			continue
+		}
+
+		newSlot, err := c.allocSlot()
+		if err != nil {
+			return err
+		}
+		c.refCount[slot]--
+		c.refCount[newSlot] = 1
+		c.slotForSeq[seq] = newSlot
+		c.curSlots[i] = newSlot
+
+		// Copy existing conv state for all initialized layers
+		for _, buf := range c.convStates {
+			// buf: [dConv*hiddenSize, maxSlots]
+			src := buf.Rows(ctx, ctx.Input().FromInts([]int32{int32(slot)}, 1))
+			// SetRows requires F32 source
+			srcF32 := src.Cast(ctx, ml.DTypeF32)
+			ctx.Forward(buf.SetRows(ctx, srcF32, ctx.Input().FromInts([]int32{int32(newSlot)}, 1)))
+		}
+	}
+
+	// Rebuild current slots tensor
+	slots := make([]int32, len(c.curSlots))
+	for i, v := range c.curSlots {
+		slots[i] = int32(v)
+	}
+	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
+
+	return nil
+}
+
+func (c *HybridCache) CopyPrefix(srcSeq, dstSeq int, prefixLen int32) {
+	// KV cache shares prefix metadata (no copy) which is correct for prefix reuse.
+	c.kv.CopyPrefix(srcSeq, dstSeq, prefixLen)
+
+	// For shortconv state we implement copy-on-write: dst shares the same slot as src.
+	// On the first write to dst, EnsureWritable will create a private slot.
+	if dstSlot, ok := c.slotForSeq[dstSeq]; ok {
+		// Bounds check before decrementing
+		if dstSlot >= 0 && dstSlot < len(c.refCount) {
+			c.refCount[dstSlot]--
+			if c.refCount[dstSlot] <= 0 {
+				c.refCount[dstSlot] = 0
+				c.freeSlot(dstSlot)
+			}
+		}
+		delete(c.slotForSeq, dstSeq)
+	}
+
+	srcSlot, ok := c.slotForSeq[srcSeq]
+	if !ok {
+		// src may not have a slot yet; dst will allocate on demand
+		return
+	}
+
+	// Bounds check before incrementing
+	if srcSlot >= 0 && srcSlot < len(c.refCount) {
+		c.slotForSeq[dstSeq] = srcSlot
+		c.refCount[srcSlot]++
+	}
+}
+
+func (c *HybridCache) CanResume(seq int, pos int32) bool {
+	return c.kv.CanResume(seq, pos)
+}
+
+func (c *HybridCache) Remove(seq int, beginIndex, endIndex int32) error {
+	if err := c.kv.Remove(seq, beginIndex, endIndex); err != nil {
+		return err
+	}
+
+	// For recurrent state, any removal invalidates the state because
+	// the state at position N depends on all previous positions.
+	// Drop the slot mapping so it resets on next use.
+	slot, ok := c.slotForSeq[seq]
+	if !ok {
+		return nil
+	}
+
+	// Bounds check
+	if slot < 0 || slot >= len(c.refCount) {
+		delete(c.slotForSeq, seq)
+		return nil
+	}
+
+	c.refCount[slot]--
+	if c.refCount[slot] <= 0 {
+		c.refCount[slot] = 0
+		c.freeSlot(slot)
+	}
+	delete(c.slotForSeq, seq)
+
+	return nil
+}
+
+func (c *HybridCache) slotsTensor() ml.Tensor {
+	return c.curSlotsInput
+}
+
+func (c *HybridCache) seqTokens() int {
+	return c.curSeqTokens
+}
+
+func (c *HybridCache) numSeqs() int {
+	return len(c.curSeqs)
+}
+
+func (c *HybridCache) convBuffer(ctx ml.Context, layer int) ml.Tensor {
+	if buf, ok := c.convStates[layer]; ok {
+		return buf
+	}
+
+	if _, ok := c.convCtxs[layer]; !ok {
+		c.convCtxs[layer] = c.backend.NewContextSize(1).Layer(layer)
+	}
+
+	buf := c.convCtxs[layer].Zeros(c.dtype, c.dConv*c.hiddenSize, c.maxSequences)
+	c.convStates[layer] = buf
+	return buf
+}
+
+// ConvState returns the conv state for current batch sequences as shape [dConv, hiddenSize, nSeqs].
+// Returns an error if copy-on-write allocation fails.
+func (c *HybridCache) ConvState(ctx ml.Context, layer int) (ml.Tensor, error) {
+	if !c.writableEnsured {
+		needsWritable := false
+		for _, seq := range c.curSeqs {
+			slot, ok := c.slotForSeq[seq]
+			if !ok {
+				continue
+			}
+			if slot >= 0 && slot < len(c.refCount) && c.refCount[slot] > 1 {
+				needsWritable = true
+				break
+			}
+		}
+
+		if needsWritable {
+			if err := c.EnsureWritable(ctx); err != nil {
+				c.writableError = err
+			}
+		}
+		c.writableEnsured = true
+	}
+
+	if c.writableError != nil {
+		return nil, c.writableError
+	}
+
+	buf := c.convBuffer(ctx, layer)
+	cur := buf.Rows(ctx, c.slotsTensor())
+	return cur.Reshape(ctx, c.dConv, c.hiddenSize, c.numSeqs()), nil
+}
+
+// UpdateConvState writes a new conv state for current batch sequences.
+// newState must have shape [dConv, hiddenSize, nSeqs].
+func (c *HybridCache) UpdateConvState(ctx ml.Context, layer int, newState ml.Tensor) {
+	buf := c.convBuffer(ctx, layer)
+	src := newState.Reshape(ctx, c.dConv*c.hiddenSize, c.numSeqs())
+	// SetRows requires F32 source
+	srcF32 := src.Cast(ctx, ml.DTypeF32)
+	ctx.Forward(buf.SetRows(ctx, srcF32, c.slotsTensor()))
+}
+
+// IsSupportedForBatch returns true if the current batch layout supports shortconv.
+func (c *HybridCache) IsSupportedForBatch() bool {
+	return c.curSeqTokens > 0 && len(c.curSeqs) > 0
+}
+
+// Seqs returns the ordered unique sequences for the current forward pass.
+func (c *HybridCache) Seqs() []int {
+	return slices.Clone(c.curSeqs)
+}
--- a/model/models/lfm2/cache_test.go
+++ b/model/models/lfm2/cache_test.go
@@ -0,0 +1,444 @@
+package lfm2
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/kvcache"
+	"github.com/ollama/ollama/ml"
+)
+
+// TestHybridCache tests verify the slot management logic of HybridCache.
+// These tests focus on the recurrent state slot allocation, reference counting,
+// and copy-on-write semantics without requiring a full ML backend.
+
+// createSlotOnlyCache creates a HybridCache with only the slot management
+// fields initialized. Used to test slot logic in isolation.
+func createSlotOnlyCache(maxSequences int) *HybridCache {
+	return &HybridCache{
+		hiddenSize:   256,
+		dConv:        3,
+		maxSequences: maxSequences,
+		refCount:     make([]int, maxSequences),
+		freeSlots:    initFreeSlots(maxSequences),
+		slotForSeq:   make(map[int]int),
+		convCtxs:     make(map[int]ml.Context),
+		convStates:   make(map[int]ml.Tensor),
+	}
+}
+
+func initFreeSlots(n int) []int {
+	slots := make([]int, 0, n)
+	for i := n - 1; i >= 0; i-- {
+		slots = append(slots, i)
+	}
+	return slots
+}
+
+func TestHybridCache_SlotAllocation(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Verify initial state
+	if len(cache.freeSlots) != 4 {
+		t.Errorf("expected 4 free slots, got %d", len(cache.freeSlots))
+	}
+
+	// Allocate all slots
+	for range 4 {
+		slot, err := cache.allocSlot()
+		if err != nil {
+			t.Fatalf("allocSlot failed: %v", err)
+		}
+		cache.refCount[slot] = 1
+	}
+
+	// Should be full now
+	if len(cache.freeSlots) != 0 {
+		t.Errorf("expected 0 free slots, got %d", len(cache.freeSlots))
+	}
+
+	// Trying to allocate another should fail
+	_, err := cache.allocSlot()
+	if err != kvcache.ErrKvCacheFull {
+		t.Errorf("expected ErrKvCacheFull, got %v", err)
+	}
+}
+
+func TestHybridCache_SlotReuse(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Allocate a slot
+	slot1, _ := cache.allocSlot()
+	cache.refCount[slot1] = 1
+
+	// Free it
+	cache.refCount[slot1] = 0
+	cache.freeSlot(slot1)
+
+	// Allocate again - should get the same slot back (LIFO)
+	slot2, _ := cache.allocSlot()
+	if slot2 != slot1 {
+		t.Errorf("expected slot %d to be reused, got %d", slot1, slot2)
+	}
+}
+
+func TestHybridCache_SlotRefCounting_ShareSlot(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Allocate slot for seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Simulate sharing slot with seq 2 (copy-on-write style)
+	cache.slotForSeq[2] = slot1
+	cache.refCount[slot1]++
+
+	// Should share the same slot
+	if cache.slotForSeq[2] != slot1 {
+		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
+	}
+
+	// Ref count should be 2
+	if cache.refCount[slot1] != 2 {
+		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
+	}
+}
+
+func TestHybridCache_SlotRefCounting_DecRef(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Allocate slot for seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Share with seq 2
+	cache.slotForSeq[2] = slot1
+	cache.refCount[slot1]++
+
+	// Unshare seq 2
+	cache.refCount[slot1]--
+	delete(cache.slotForSeq, 2)
+
+	// Ref count should be back to 1
+	if cache.refCount[slot1] != 1 {
+		t.Errorf("expected refCount 1 after unshare, got %d", cache.refCount[slot1])
+	}
+
+	// Seq 2 should no longer have a slot
+	if _, ok := cache.slotForSeq[2]; ok {
+		t.Error("seq 2 should not have a slot after unshare")
+	}
+}
+
+func TestHybridCache_SlotFreeWhenUnused(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	initialFreeSlots := len(cache.freeSlots)
+
+	// Allocate slot for seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Free the slot when refCount drops to 0
+	cache.refCount[slot1]--
+	if cache.refCount[slot1] <= 0 {
+		cache.refCount[slot1] = 0
+		cache.freeSlot(slot1)
+	}
+	delete(cache.slotForSeq, 1)
+
+	// Slot should be freed
+	if len(cache.freeSlots) != initialFreeSlots {
+		t.Errorf("expected %d free slots, got %d", initialFreeSlots, len(cache.freeSlots))
+	}
+
+	// Ref count should be 0
+	if cache.refCount[slot1] != 0 {
+		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
+	}
+}
+
+func TestHybridCache_SlotOverwrite(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Allocate slots for seq 1 and seq 2
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	slot2, _ := cache.allocSlot()
+	cache.slotForSeq[2] = slot2
+	cache.refCount[slot2] = 1
+
+	initialFreeSlots := len(cache.freeSlots)
+
+	// Simulate overwriting seq 2's slot with slot1 (sharing)
+	// First free the old slot
+	cache.refCount[slot2]--
+	if cache.refCount[slot2] <= 0 {
+		cache.refCount[slot2] = 0
+		cache.freeSlot(slot2)
+	}
+	// Then share slot1
+	cache.slotForSeq[2] = slot1
+	cache.refCount[slot1]++
+
+	// Seq 2 should now share slot1
+	if cache.slotForSeq[2] != slot1 {
+		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
+	}
+
+	// Old slot2 should be freed
+	if len(cache.freeSlots) != initialFreeSlots+1 {
+		t.Errorf("expected %d free slots, got %d", initialFreeSlots+1, len(cache.freeSlots))
+	}
+}
+
+func TestHybridCache_BoundsChecking(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Test freeing invalid slot (should not panic)
+	cache.freeSlot(-1)
+	cache.freeSlot(100) // out of bounds
+
+	// freeSlot does bounds checking, so invalid slots should be ignored
+	if len(cache.freeSlots) != 4 {
+		t.Errorf("invalid slots should not affect free list, got %d slots", len(cache.freeSlots))
+	}
+}
+
+func TestHybridCache_MultipleSequences_RefCounting(t *testing.T) {
+	cache := createSlotOnlyCache(8)
+
+	// Allocate slot for seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Fork to seq 2, 3, 4 (all share slot1)
+	for _, seq := range []int{2, 3, 4} {
+		cache.slotForSeq[seq] = slot1
+		cache.refCount[slot1]++
+	}
+
+	// Ref count should be 4
+	if cache.refCount[slot1] != 4 {
+		t.Errorf("expected refCount 4, got %d", cache.refCount[slot1])
+	}
+
+	// Remove seq 2, 3
+	for _, seq := range []int{2, 3} {
+		delete(cache.slotForSeq, seq)
+		cache.refCount[slot1]--
+	}
+
+	if cache.refCount[slot1] != 2 {
+		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
+	}
+
+	// Slot should still be allocated (not in free list)
+	found := false
+	for _, s := range cache.freeSlots {
+		if s == slot1 {
+			found = true
+			break
+		}
+	}
+	if found {
+		t.Error("slot1 should not be in free list yet")
+	}
+
+	// Remove remaining sequences
+	for _, seq := range []int{1, 4} {
+		delete(cache.slotForSeq, seq)
+		cache.refCount[slot1]--
+	}
+
+	if cache.refCount[slot1] != 0 {
+		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
+	}
+}
+
+func TestHybridCache_ChainedSharing(t *testing.T) {
+	cache := createSlotOnlyCache(8)
+
+	// Create seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Share 1 -> 2
+	cache.slotForSeq[2] = slot1
+	cache.refCount[slot1]++
+
+	// Share 2 -> 3 (should still share slot1)
+	cache.slotForSeq[3] = cache.slotForSeq[2] // which is slot1
+	cache.refCount[slot1]++
+
+	// All should share slot1
+	if cache.slotForSeq[1] != slot1 || cache.slotForSeq[2] != slot1 || cache.slotForSeq[3] != slot1 {
+		t.Error("all sequences should share slot1")
+	}
+
+	if cache.refCount[slot1] != 3 {
+		t.Errorf("expected refCount 3, got %d", cache.refCount[slot1])
+	}
+}
+
+func TestHybridCache_CacheParameters(t *testing.T) {
+	cache := NewHybridCache(nil, 512, 5) // hiddenSize=512, dConv=5
+
+	if cache.hiddenSize != 512 {
+		t.Errorf("expected hiddenSize 512, got %d", cache.hiddenSize)
+	}
+	if cache.dConv != 5 {
+		t.Errorf("expected dConv 5, got %d", cache.dConv)
+	}
+}
+
+func TestHybridCache_NumSeqs(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Initially no sequences
+	if cache.numSeqs() != 0 {
+		t.Errorf("expected 0 seqs, got %d", cache.numSeqs())
+	}
+
+	// Manually set up current batch state
+	cache.curSeqs = []int{1, 2, 3}
+
+	if cache.numSeqs() != 3 {
+		t.Errorf("expected 3 seqs, got %d", cache.numSeqs())
+	}
+}
+
+func TestHybridCache_SeqTokens(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Initially 0
+	if cache.seqTokens() != 0 {
+		t.Errorf("expected 0 seqTokens, got %d", cache.seqTokens())
+	}
+
+	// Manually set up current batch state
+	cache.curSeqTokens = 16
+
+	if cache.seqTokens() != 16 {
+		t.Errorf("expected 16 seqTokens, got %d", cache.seqTokens())
+	}
+}
+
+// Test that Seqs returns a clone of curSeqs
+func TestHybridCache_Seqs_ReturnsClone(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	cache.curSeqs = []int{1, 2, 3}
+
+	seqs := cache.Seqs()
+
+	// Modify returned slice
+	seqs[0] = 999
+
+	// Original should be unchanged
+	if cache.curSeqs[0] != 1 {
+		t.Error("Seqs should return a clone, not the original slice")
+	}
+}
+
+func TestHybridCache_IsSupportedForBatch(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Initially not supported (no batch set up)
+	if cache.IsSupportedForBatch() {
+		t.Error("expected IsSupportedForBatch to be false initially")
+	}
+
+	// Set up a valid batch
+	cache.curSeqTokens = 1
+	cache.curSeqs = []int{1}
+
+	if !cache.IsSupportedForBatch() {
+		t.Error("expected IsSupportedForBatch to be true with valid batch")
+	}
+}
+
+func TestHybridCache_ZeroConvSlots_EmptyInputs(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// zeroConvSlots should handle empty slots without panicking
+	cache.zeroConvSlots(nil, nil)
+	cache.zeroConvSlots(nil, []int{})
+
+	// zeroConvSlots should handle empty convStates without panicking
+	cache.zeroConvSlots(nil, []int{0, 1, 2})
+}
+
+func TestHybridCache_SlotRecycling_TracksNewSlots(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Allocate slot for seq 1
+	slot1, _ := cache.allocSlot()
+	cache.slotForSeq[1] = slot1
+	cache.refCount[slot1] = 1
+
+	// Free the slot (simulating sequence removal)
+	cache.refCount[slot1]--
+	cache.freeSlot(slot1)
+	delete(cache.slotForSeq, 1)
+
+	// Verify slot is in free list
+	if len(cache.freeSlots) != 4 {
+		t.Errorf("expected 4 free slots after freeing, got %d", len(cache.freeSlots))
+	}
+
+	// Allocate for new seq 2 - should get recycled slot
+	slot2, _ := cache.allocSlot()
+	if slot2 != slot1 {
+		t.Errorf("expected recycled slot %d, got %d", slot1, slot2)
+	}
+
+	// This recycled slot would need zeroing in the real implementation
+	// The actual zeroing is tested via integration tests since it requires ML context
+}
+
+func TestHybridCache_NewSequence_GetsTrackedForZeroing(t *testing.T) {
+	cache := createSlotOnlyCache(4)
+
+	// Simulate the slot allocation flow from StartForward
+	// When a sequence doesn't have a slot, it gets allocated and tracked as "new"
+
+	newSlots := []int{}
+
+	// Seq 1 doesn't have a slot - allocate and track
+	seq := 1
+	if _, ok := cache.slotForSeq[seq]; !ok {
+		slot, err := cache.allocSlot()
+		if err != nil {
+			t.Fatalf("allocSlot failed: %v", err)
+		}
+		cache.slotForSeq[seq] = slot
+		cache.refCount[slot] = 1
+		newSlots = append(newSlots, slot)
+	}
+
+	// Verify newSlots contains the allocated slot
+	if len(newSlots) != 1 {
+		t.Errorf("expected 1 new slot, got %d", len(newSlots))
+	}
+
+	// Seq 1 already has a slot - should NOT be tracked as new
+	newSlots2 := []int{}
+	if _, ok := cache.slotForSeq[seq]; !ok {
+		slot, _ := cache.allocSlot()
+		cache.slotForSeq[seq] = slot
+		cache.refCount[slot] = 1
+		newSlots2 = append(newSlots2, slot)
+	}
+
+	// Verify no new slots for existing sequence
+	if len(newSlots2) != 0 {
+		t.Errorf("expected 0 new slots for existing sequence, got %d", len(newSlots2))
+	}
+}
--- a/model/models/lfm2/model.go
+++ b/model/models/lfm2/model.go
@@ -0,0 +1,253 @@
+package lfm2
+
+import (
+	"cmp"
+	"math"
+
+	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/ml/nn"
+	"github.com/ollama/ollama/ml/nn/rope"
+	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/model/input"
+)
+
+type Options struct {
+	hiddenSize       int
+	headDim, ropeDim int
+
+	eps, ropeBase, ropeScale float32
+
+	ropeType              string
+	originalContextLength int
+
+	// per-layer head counts (LFM2 alternates attention and recurrent layers)
+	numHeadsByLayer   []int
+	numKVHeadsByLayer []int
+}
+
+func (o Options) headDimValue() int {
+	// Head dim is shared across layers; fall back to first attention layer head count.
+	for _, h := range o.numHeadsByLayer {
+		if h > 0 {
+			return cmp.Or(o.headDim, o.hiddenSize/h)
+		}
+	}
+	return cmp.Or(o.headDim, o.hiddenSize)
+}
+
+func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions ml.Tensor) ml.Tensor {
+	opts := []func(*rope.Options){rope.WithTypeNeoX()}
+	if o.ropeType == "yarn" {
+		attnFactor := float32(1.0 / (1.0 + 0.1*math.Log(float64(o.ropeScale))))
+		opts = append(opts,
+			rope.WithOriginalContextLength(o.originalContextLength),
+			rope.WithExtrapolationFactor(1.),
+			rope.WithAttentionFactor(attnFactor),
+		)
+	}
+
+	headCount := 1
+	for _, h := range o.numHeadsByLayer {
+		if h > 0 {
+			headCount = h
+			break
+		}
+	}
+	return nn.RoPE(ctx, states, positions, cmp.Or(o.ropeDim, o.headDim, o.hiddenSize/headCount), o.ropeBase, 1./o.ropeScale, opts...)
+}
+
+type Model struct {
+	model.Base
+	model.TextProcessor
+
+	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
+	Layers         []Layer       `gguf:"blk"`
+	OutputNorm     *nn.RMSNorm   `gguf:"output_norm,alt:token_embd_norm"`
+	Output         *nn.Linear    `gguf:"output,alt:token_embd"`
+
+	Options
+}
+
+func New(c fs.Config) (model.Model, error) {
+	if c.Uint("expert_count") > 0 {
+		return nil, model.ErrUnsupportedModel
+	}
+
+	if c.String("tokenizer.ggml.model") != "gpt2" {
+		return nil, model.ErrUnsupportedTokenizer
+	}
+
+	vocabulary := model.Vocabulary{
+		Values: c.Strings("tokenizer.ggml.tokens"),
+		Scores: c.Floats("tokenizer.ggml.scores"),
+		Types:  c.Ints("tokenizer.ggml.token_type"),
+		Merges: c.Strings("tokenizer.ggml.merges"),
+		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
+		BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
+		EOS: append(
+			[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
+			c.Ints("tokenizer.ggml.eos_token_ids")...,
+		),
+	}
+
+	var pretokenizers []string
+	switch c.String("tokenizer.ggml.pre") {
+	case "default":
+		// use default BPE pretokenizer
+	default:
+		// llama-bpe style (default for LFM2)
+		pretokenizers = []string{
+			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
+		}
+	}
+
+	m := Model{
+		TextProcessor: model.NewBytePairEncoding(&vocabulary, pretokenizers...),
+		Layers:        make([]Layer, c.Uint("block_count")),
+		Options: Options{
+			hiddenSize:            int(c.Uint("embedding_length")),
+			headDim:               int(c.Uint("attention.key_length")),
+			ropeDim:               int(c.Uint("rope.dimension_count")),
+			eps:                   c.Float("attention.layer_norm_rms_epsilon"),
+			ropeType:              c.String("rope.scaling.type"),
+			ropeBase:              c.Float("rope.freq_base"),
+			ropeScale:             c.Float("rope.scaling.factor", 1),
+			originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
+		},
+	}
+
+	type headCounts interface {
+		HeadCount() []uint64
+		HeadCountKV() []uint64
+	}
+	hc, ok := c.(headCounts)
+	if !ok {
+		return nil, model.ErrUnsupportedModel
+	}
+
+	headCount := hc.HeadCount()
+	headCountKV := hc.HeadCountKV()
+
+	m.numHeadsByLayer = make([]int, len(m.Layers))
+	m.numKVHeadsByLayer = make([]int, len(m.Layers))
+	for i := range m.Layers {
+		m.numHeadsByLayer[i] = int(headCount[i])
+		m.numKVHeadsByLayer[i] = int(headCountKV[i])
+
+		if m.numKVHeadsByLayer[i] == 0 {
+			m.Layers[i].Operator = &ShortConv{}
+		} else {
+			m.Layers[i].Operator = &Attention{}
+		}
+	}
+
+	lCache := int(c.Uint("shortconv.l_cache"))
+	dConv := max(0, lCache-1)
+	m.Cache = NewHybridCache(m.Shift, m.hiddenSize, dConv)
+	return &m, nil
+}
+
+type Operator interface {
+	Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor
+}
+
+type Attention struct {
+	Query     *nn.Linear  `gguf:"attn_q"`
+	QueryNorm *nn.RMSNorm `gguf:"attn_q_norm"`
+	Key       *nn.Linear  `gguf:"attn_k"`
+	KeyNorm   *nn.RMSNorm `gguf:"attn_k_norm"`
+	Value     *nn.Linear  `gguf:"attn_v"`
+	Output    *nn.Linear  `gguf:"attn_output,alt:attn_out"`
+}
+
+func (sa *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
+	batchSize := hiddenStates.Dim(1)
+	headDim := opts.headDimValue()
+	numHeads := opts.numHeadsByLayer[layer]
+	numKVHeads := opts.numKVHeadsByLayer[layer]
+
+	query := sa.Query.Forward(ctx, hiddenStates)
+	key := sa.Key.Forward(ctx, hiddenStates)
+	value := sa.Value.Forward(ctx, hiddenStates)
+
+	query = query.Reshape(ctx, headDim, numHeads, batchSize)
+	key = key.Reshape(ctx, headDim, numKVHeads, batchSize)
+	value = value.Reshape(ctx, headDim, numKVHeads, batchSize)
+
+	query = sa.QueryNorm.Forward(ctx, query, opts.eps)
+	key = sa.KeyNorm.Forward(ctx, key, opts.eps)
+
+	query = opts.applyRotaryPositionEmbeddings(ctx, query, positions)
+	key = opts.applyRotaryPositionEmbeddings(ctx, key, positions)
+
+	attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(headDim)), cache)
+	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), batchSize)
+	return sa.Output.Forward(ctx, attention)
+}
+
+type MLP struct {
+	Up   *nn.Linear `gguf:"ffn_up"`
+	Down *nn.Linear `gguf:"ffn_down"`
+	Gate *nn.Linear `gguf:"ffn_gate"`
+}
+
+func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
+	hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
+	return mlp.Down.Forward(ctx, hiddenState)
+}
+
+type Layer struct {
+	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
+	Operator      Operator
+	MLPNorm       *nn.RMSNorm `gguf:"ffn_norm"`
+	MLP           *MLP
+}
+
+func (l *Layer) Forward(ctx ml.Context, layer int, hiddenState, positions, outputs ml.Tensor, cache *HybridCache, opts *Options) ml.Tensor {
+	residual := hiddenState
+
+	hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
+	hiddenState = l.Operator.Forward(ctx, hiddenState, positions, cache, layer, opts)
+
+	if outputs != nil {
+		hiddenState = hiddenState.Rows(ctx, outputs)
+		residual = residual.Rows(ctx, outputs)
+	}
+
+	hiddenState = hiddenState.Add(ctx, residual)
+	residual = hiddenState
+
+	hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
+	hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
+	return hiddenState.Add(ctx, residual)
+}
+
+func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
+	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
+}
+
+func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
+	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
+
+	hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
+
+	for i, layer := range m.Layers {
+		m.Cache.SetLayer(i)
+
+		var outputs ml.Tensor
+		if i == len(m.Layers)-1 {
+			outputs = batch.Outputs
+		}
+
+		hiddenState = layer.Forward(ctx, i, hiddenState, positions, outputs, m.Cache.(*HybridCache), &m.Options)
+	}
+
+	hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
+	return m.Output.Forward(ctx, hiddenState), nil
+}
+
+func init() {
+	model.Register("lfm2", New)
+}
--- a/model/models/lfm2/shortconv.go
+++ b/model/models/lfm2/shortconv.go
@@ -0,0 +1,50 @@
+package lfm2
+
+import (
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/ml/nn"
+)
+
+type shortConvKernel struct {
+	Weight ml.Tensor `gguf:"weight"`
+}
+
+// ShortConv implements the LFM2 short-convolution block (GGML_OP_SSM_CONV) with a recurrent
+// state stored in the HybridCache.
+type ShortConv struct {
+	Conv    *shortConvKernel `gguf:"shortconv.conv"`
+	InProj  *nn.Linear       `gguf:"shortconv.in_proj"`
+	OutProj *nn.Linear       `gguf:"shortconv.out_proj"`
+}
+
+func (sc *ShortConv) Forward(ctx ml.Context, hiddenStates ml.Tensor, _ ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
+	nSeqs := cache.numSeqs()
+	seqTokens := cache.seqTokens()
+	hiddenSize := hiddenStates.Dim(0)
+	if nSeqs <= 0 || seqTokens <= 0 || hiddenStates.Dim(1) != nSeqs*seqTokens {
+		panic("lfm2: unsupported batch layout for shortconv")
+	}
+
+	bcx := sc.InProj.Forward(ctx, hiddenStates).Reshape(ctx, 3*hiddenSize, seqTokens, nSeqs)
+
+	elementSize := bcx.Stride(0)
+	b := bcx.View(ctx, 0*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
+	c := bcx.View(ctx, 1*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
+	x := bcx.View(ctx, 2*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
+
+	bx := b.Mul(ctx, x).Permute(ctx, 1, 0, 2, 3)
+
+	state, err := cache.ConvState(ctx, layer)
+	if err != nil {
+		panic("lfm2: failed to get conv state: " + err.Error())
+	}
+	sx := state.Concat(ctx, bx, 0)
+
+	convOut := sx.SSMConv(ctx, sc.Conv.Weight)
+	y := c.Mul(ctx, convOut)
+
+	dConv := sx.Dim(0) - seqTokens
+	cache.UpdateConvState(ctx, layer, sx.Slice(ctx, 0, sx.Dim(0)-dConv, sx.Dim(0), 1))
+
+	return sc.OutProj.Forward(ctx, y.Reshape(ctx, hiddenSize, seqTokens*nSeqs))
+}
--- a/model/models/models.go
+++ b/model/models/models.go
@@ -9,6 +9,7 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma3n"
 	_ "github.com/ollama/ollama/model/models/glm4moelite"
 	_ "github.com/ollama/ollama/model/models/gptoss"
+	_ "github.com/ollama/ollama/model/models/lfm2"
 	_ "github.com/ollama/ollama/model/models/llama"
 	_ "github.com/ollama/ollama/model/models/llama4"
 	_ "github.com/ollama/ollama/model/models/mistral3"
--- a/model/parsers/lfm2.go
+++ b/model/parsers/lfm2.go
@@ -0,0 +1,498 @@
+package parsers
+
+import (
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"strconv"
+	"strings"
+	"unicode"
+
+	"github.com/ollama/ollama/api"
+)
+
+type LFM2ParserState int
+
+const (
+	LFM2CollectingThinking LFM2ParserState = iota
+	LFM2CollectingContent
+	LFM2CollectingToolCalls
+)
+
+const (
+	lfm2ThinkingOpenTag  = "<think>"
+	lfm2ThinkingCloseTag = "</think>"
+	lfm2ToolCallStartTag = "<|tool_call_start|>"
+	lfm2ToolCallEndTag   = "<|tool_call_end|>"
+)
+
+type LFM2Parser struct {
+	state                    LFM2ParserState
+	buffer                   strings.Builder
+	hasThinkingSupport       bool
+	needsThinkingLeadingTrim bool // trim leading whitespace after <think> tag
+	needsContentLeadingTrim  bool // trim leading whitespace after </think> tag
+}
+
+func (p *LFM2Parser) HasToolSupport() bool {
+	return true
+}
+
+func (p *LFM2Parser) HasThinkingSupport() bool {
+	return p.hasThinkingSupport
+}
+
+func (p *LFM2Parser) setInitialState(lastMessage *api.Message, thinkValue *api.ThinkValue) {
+	prefill := lastMessage != nil && lastMessage.Role == "assistant"
+
+	// Check both model capability AND request preference
+	thinkingEnabled := p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool())
+
+	if !thinkingEnabled {
+		p.state = LFM2CollectingContent
+		return
+	}
+
+	if prefill && lastMessage.Content != "" {
+		p.state = LFM2CollectingContent
+		return
+	}
+
+	p.state = LFM2CollectingThinking
+	p.needsThinkingLeadingTrim = true
+}
+
+func (p *LFM2Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	p.setInitialState(lastMessage, thinkValue)
+	return tools
+}
+
+type lfm2Event interface {
+	isLFM2Event()
+}
+
+type lfm2EventThinkingContent struct {
+	content string
+}
+
+type lfm2EventContent struct {
+	content string
+}
+
+type lfm2EventToolCall struct {
+	toolCall api.ToolCall
+}
+
+func (lfm2EventThinkingContent) isLFM2Event() {}
+func (lfm2EventContent) isLFM2Event()         {}
+func (lfm2EventToolCall) isLFM2Event()        {}
+
+func (p *LFM2Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents()
+
+	var toolCalls []api.ToolCall
+	var contentSb strings.Builder
+	var thinkingSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case lfm2EventToolCall:
+			toolCalls = append(toolCalls, event.toolCall)
+		case lfm2EventThinkingContent:
+			thinkingSb.WriteString(event.content)
+		case lfm2EventContent:
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
+}
+
+func (p *LFM2Parser) parseEvents() []lfm2Event {
+	var all []lfm2Event
+
+	keepLooping := true
+	for keepLooping {
+		var events []lfm2Event
+		events, keepLooping = p.eat()
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	return all
+}
+
+func (p *LFM2Parser) eat() ([]lfm2Event, bool) {
+	var events []lfm2Event
+	bufStr := p.buffer.String()
+	if bufStr == "" {
+		return events, false
+	}
+
+	switch p.state {
+	case LFM2CollectingThinking:
+		// Strip opening <think> tag if present
+		if strings.HasPrefix(bufStr, lfm2ThinkingOpenTag) {
+			bufStr = bufStr[len(lfm2ThinkingOpenTag):]
+			p.needsThinkingLeadingTrim = true
+			p.buffer.Reset()
+			p.buffer.WriteString(bufStr)
+		}
+
+		// Trim leading whitespace after <think> tag (may span multiple chunks)
+		if p.needsThinkingLeadingTrim {
+			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
+				bufStr = trimmed
+				p.buffer.Reset()
+				p.buffer.WriteString(bufStr)
+			}
+			// Clear flag once we have non-whitespace content or buffer is empty
+			if len(bufStr) > 0 {
+				p.needsThinkingLeadingTrim = false
+			}
+		}
+
+		if strings.Contains(bufStr, lfm2ThinkingCloseTag) { // thinking[</think>] -> content
+			split := strings.SplitN(bufStr, lfm2ThinkingCloseTag, 2)
+			thinking := split[0]
+			thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
+
+			remaining := split[1]
+			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = LFM2CollectingContent
+			p.needsThinkingLeadingTrim = false
+			// Set flag to trim any additional whitespace that may arrive in later chunks
+			p.needsContentLeadingTrim = len(remaining) == 0
+
+			if len(thinking) > 0 {
+				events = append(events, lfm2EventThinkingContent{content: thinking})
+			}
+			return events, true
+		} else if overlapLen := overlap(bufStr, lfm2ThinkingCloseTag); overlapLen > 0 { // partial </think>
+			beforePartialTag := bufStr[:len(bufStr)-overlapLen]
+			trailingLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingLen
+
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, lfm2EventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		} else { // otherwise its thinking content
+			whitespaceLen := trailingWhitespaceLen(bufStr)
+			ambiguousStart := len(bufStr) - whitespaceLen
+
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, lfm2EventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		}
+
+	case LFM2CollectingContent:
+		// Trim leading whitespace after </think> tag (may span multiple chunks)
+		if p.needsContentLeadingTrim {
+			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
+				bufStr = trimmed
+				p.buffer.Reset()
+				p.buffer.WriteString(bufStr)
+			}
+			// Clear flag once we have non-whitespace content
+			if len(bufStr) > 0 {
+				p.needsContentLeadingTrim = false
+			}
+		}
+
+		if strings.Contains(bufStr, lfm2ToolCallStartTag) { // content[<|tool_call_start|>] -> tool calls
+			split := strings.SplitN(bufStr, lfm2ToolCallStartTag, 2)
+			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
+			remaining := split[1]
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = LFM2CollectingToolCalls
+
+			if len(contentBefore) > 0 {
+				events = append(events, lfm2EventContent{content: contentBefore})
+			}
+			return events, true
+		} else { // otherwise its content
+			p.buffer.Reset()
+			if len(bufStr) > 0 {
+				events = append(events, lfm2EventContent{content: bufStr})
+			}
+			return events, false
+		}
+
+	case LFM2CollectingToolCalls:
+		// Look for complete tool call JSON between tags
+		if idx := strings.Index(bufStr, lfm2ToolCallEndTag); idx != -1 {
+			toolCallContent := bufStr[:idx]
+
+			if toolCalls, err := p.parseToolCallsContent(toolCallContent); err == nil && len(toolCalls) > 0 {
+				remaining := bufStr[idx+len(lfm2ToolCallEndTag):]
+
+				// Check if there's another tool call
+				if strings.HasPrefix(remaining, lfm2ToolCallStartTag) {
+					remaining = remaining[len(lfm2ToolCallStartTag):]
+				} else {
+					// No more tool calls, go back to content
+					remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+					p.state = LFM2CollectingContent
+				}
+
+				p.buffer.Reset()
+				p.buffer.WriteString(remaining)
+
+				for _, tc := range toolCalls {
+					events = append(events, lfm2EventToolCall{toolCall: tc})
+				}
+				return events, true
+			} else if err != nil {
+				slog.Warn("lfm2 tool call parsing failed", "error", err, "content", toolCallContent)
+			}
+		}
+
+		return events, false
+	}
+
+	return events, false
+}
+
+// parseToolCallsContent parses one or more tool calls from content
+// Supports JSON format and Python-style format including multiple calls: [func1(...),func2(...)]
+func (p *LFM2Parser) parseToolCallsContent(content string) ([]api.ToolCall, error) {
+	content = strings.TrimSpace(content)
+
+	// Try JSON format first: {"name": "func", "arguments": {...}}
+	var parsed struct {
+		Name      string          `json:"name"`
+		Arguments json.RawMessage `json:"arguments"`
+	}
+
+	if err := json.Unmarshal([]byte(content), &parsed); err == nil && parsed.Name != "" {
+		var args api.ToolCallFunctionArguments
+		if len(parsed.Arguments) > 0 {
+			if err := json.Unmarshal(parsed.Arguments, &args); err != nil {
+				return nil, err
+			}
+		} else {
+			args = api.NewToolCallFunctionArguments()
+		}
+
+		return []api.ToolCall{{
+			Function: api.ToolCallFunction{
+				Name:      parsed.Name,
+				Arguments: args,
+			},
+		}}, nil
+	}
+
+	// Try Python-style format: [func(arg1='val1'),func2(arg2='val2')] or func(arg1='val1')
+	return p.parsePythonStyleToolCalls(content)
+}
+
+// parsePythonStyleToolCalls parses one or more Python-style tool calls
+// Examples: [bash(command='ls'),bash(command='pwd')] or bash(command='ls')
+func (p *LFM2Parser) parsePythonStyleToolCalls(content string) ([]api.ToolCall, error) {
+	content = strings.TrimSpace(content)
+
+	// Strip outer brackets if present: [func(...)] -> func(...)
+	if strings.HasPrefix(content, "[") && strings.HasSuffix(content, "]") {
+		content = content[1 : len(content)-1]
+	}
+
+	var toolCalls []api.ToolCall
+
+	// Parse multiple function calls separated by commas at the top level
+	for len(content) > 0 {
+		content = strings.TrimSpace(content)
+		if content == "" {
+			break
+		}
+
+		// Skip leading comma from previous iteration
+		if strings.HasPrefix(content, ",") {
+			content = strings.TrimSpace(content[1:])
+			if content == "" {
+				break
+			}
+		}
+
+		// Find function name
+		parenIdx := strings.Index(content, "(")
+		if parenIdx == -1 {
+			return nil, errors.New("invalid tool call: no opening parenthesis")
+		}
+
+		funcName := strings.TrimSpace(content[:parenIdx])
+		if funcName == "" {
+			return nil, errors.New("invalid tool call: empty function name")
+		}
+
+		// Find matching closing parenthesis
+		closeIdx := findMatchingParen(content, parenIdx)
+		if closeIdx == -1 {
+			return nil, errors.New("invalid tool call: no matching closing parenthesis")
+		}
+
+		argsStr := content[parenIdx+1 : closeIdx]
+		args := api.NewToolCallFunctionArguments()
+
+		if argsStr != "" {
+			if err := parsePythonArgs(argsStr, &args); err != nil {
+				return nil, err
+			}
+		}
+
+		toolCalls = append(toolCalls, api.ToolCall{
+			Function: api.ToolCallFunction{
+				Name:      funcName,
+				Arguments: args,
+			},
+		})
+
+		// Move past this function call
+		content = content[closeIdx+1:]
+	}
+
+	if len(toolCalls) == 0 {
+		return nil, errors.New("no tool calls found")
+	}
+
+	return toolCalls, nil
+}
+
+// findMatchingParen finds the index of the closing parenthesis matching the one at openIdx
+// Returns -1 if not found. Handles nested parentheses and quoted strings.
+func findMatchingParen(s string, openIdx int) int {
+	depth := 1
+	i := openIdx + 1
+	for i < len(s) && depth > 0 {
+		switch s[i] {
+		case '(':
+			depth++
+		case ')':
+			depth--
+			if depth == 0 {
+				return i
+			}
+		case '\'', '"':
+			// Skip quoted string
+			quote := s[i]
+			i++
+			for i < len(s) && s[i] != quote {
+				if s[i] == '\\' && i+1 < len(s) {
+					i++ // skip escaped char
+				}
+				i++
+			}
+		}
+		i++
+	}
+	return -1
+}
+
+// parseToolCallContent parses a single tool call (for backward compatibility with tests)
+func (p *LFM2Parser) parseToolCallContent(content string) (api.ToolCall, error) {
+	calls, err := p.parseToolCallsContent(content)
+	if err != nil {
+		return api.ToolCall{}, err
+	}
+	if len(calls) == 0 {
+		return api.ToolCall{}, errors.New("no tool call found")
+	}
+	return calls[0], nil
+}
+
+// parsePythonArgs parses Python-style keyword arguments: key='value', key2="value2"
+func parsePythonArgs(argsStr string, args *api.ToolCallFunctionArguments) error {
+	// Simple state machine to parse key='value' pairs
+	// Handles: command='ls', flag="-la", count=42, enabled=true
+	var key string
+	i := 0
+
+	for i < len(argsStr) {
+		// Skip whitespace
+		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
+			i++
+		}
+		if i >= len(argsStr) {
+			break
+		}
+
+		// Parse key
+		keyStart := i
+		for i < len(argsStr) && argsStr[i] != '=' && argsStr[i] != ',' {
+			i++
+		}
+		if i >= len(argsStr) || argsStr[i] != '=' {
+			return errors.New("invalid argument: expected '='")
+		}
+		key = strings.TrimSpace(argsStr[keyStart:i])
+		i++ // skip '='
+
+		// Skip whitespace after =
+		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t') {
+			i++
+		}
+
+		// Parse value
+		var value string
+		if i < len(argsStr) && (argsStr[i] == '\'' || argsStr[i] == '"') {
+			// Quoted string
+			quote := argsStr[i]
+			i++
+			valueStart := i
+			for i < len(argsStr) && argsStr[i] != quote {
+				if argsStr[i] == '\\' && i+1 < len(argsStr) {
+					i += 2 // skip escaped char
+				} else {
+					i++
+				}
+			}
+			value = argsStr[valueStart:i]
+			if i < len(argsStr) {
+				i++ // skip closing quote
+			}
+			args.Set(key, value)
+		} else {
+			// Unquoted value (number, bool, etc)
+			valueStart := i
+			for i < len(argsStr) && argsStr[i] != ',' {
+				i++
+			}
+			value = strings.TrimSpace(argsStr[valueStart:i])
+
+			// Try to parse as number or bool
+			if v, err := strconv.ParseInt(value, 10, 64); err == nil {
+				args.Set(key, v)
+			} else if v, err := strconv.ParseFloat(value, 64); err == nil {
+				args.Set(key, v)
+			} else if value == "true" {
+				args.Set(key, true)
+			} else if value == "false" {
+				args.Set(key, false)
+			} else {
+				args.Set(key, value)
+			}
+		}
+
+		// Skip comma and whitespace
+		for i < len(argsStr) && (argsStr[i] == ',' || argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
+			i++
+		}
+	}
+
+	return nil
+}
--- a/model/parsers/lfm2_test.go
+++ b/model/parsers/lfm2_test.go
--- a/model/parsers/ministral.go
+++ b/model/parsers/ministral.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"unicode"

 	"github.com/ollama/ollama/api"
 )
@@ -17,12 +18,34 @@ const (
 	ministralCollectingToolArgs
 )

+// ministralEvent represents an event emitted during parsing
+type ministralEvent interface {
+	isMinistralEvent()
+}
+
+type ministralEventContent struct {
+	content string
+}
+
+type ministralEventThinking struct {
+	thinking string
+}
+
+type ministralEventToolCall struct {
+	name string
+	args string // raw JSON string
+}
+
+func (ministralEventContent) isMinistralEvent()  {}
+func (ministralEventThinking) isMinistralEvent() {}
+func (ministralEventToolCall) isMinistralEvent() {}
+
 type MinistralParser struct {
 	state              ministralParserState
 	buffer             strings.Builder
 	tools              []api.Tool
 	hasThinkingSupport bool
-	currentTool        *api.Tool
+	pendingToolName    string // stores tool name while collecting args
 }

 func (p *MinistralParser) HasToolSupport() bool {
@@ -63,74 +86,251 @@ func toolByName(tools []api.Tool, n string) (*api.Tool, error) {
 	return nil, fmt.Errorf("tool '%s' not found", n)
 }

-func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
+const (
+	ministralToolCallsTag = "[TOOL_CALLS]"
+	ministralThinkTag     = "[THINK]"
+	ministralThinkEndTag  = "[/THINK]"
+	ministralArgsTag      = "[ARGS]"
+)
+
+// eat consumes the parser's buffer, and returns a list of any unambiguous
+// events from the current parser state. The second return value indicates
+// whether to keep looping (true when state transitions, false when waiting
+// for more data).
+func (p *MinistralParser) eat() ([]ministralEvent, bool) {
+	var events []ministralEvent

 	switch p.state {
 	case ministralCollectingContent:
-		if strings.Contains(p.buffer.String(), "[TOOL_CALLS]") {
-			before, _ := splitAtTag(&p.buffer, "[TOOL_CALLS]", false)
-			if before != "" {
-				return before, "", calls, nil
+		bufStr := p.buffer.String()
+
+		// Check for [TOOL_CALLS] tag
+		if strings.Contains(bufStr, ministralToolCallsTag) {
+			split := strings.SplitN(bufStr, ministralToolCallsTag, 2)
+			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
+			if len(before) > 0 {
+				events = append(events, ministralEventContent{content: before})
 			}
+			after := split[1]
+			p.buffer.Reset()
+			p.buffer.WriteString(after)
 			p.state = ministralCollectingToolName
-		} else if strings.Contains(p.buffer.String(), "[THINK]") {
+			return events, true
+		}
+
+		// Check for [THINK] tag
+		if strings.Contains(bufStr, ministralThinkTag) {
+			split := strings.SplitN(bufStr, ministralThinkTag, 2)
+			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
+			if len(before) > 0 {
+				events = append(events, ministralEventContent{content: before})
+			}
+			after := split[1]
+			p.buffer.Reset()
+			p.buffer.WriteString(after)
 			p.state = ministralCollectingThinkingContent
-			return "", "", calls, nil
-		} else {
-			p.buffer.Reset()
-			return s, "", calls, nil
+			return events, true
 		}
+
+		// Check for partial tag overlap with [TOOL_CALLS] or [THINK]
+		overlapToolCalls := overlap(bufStr, ministralToolCallsTag)
+		overlapThink := overlap(bufStr, ministralThinkTag)
+		maxOverlap := max(overlapToolCalls, overlapThink)
+
+		if maxOverlap > 0 {
+			// Withhold the potential partial tag
+			beforePartialTag := bufStr[:len(bufStr)-maxOverlap]
+			trailingWS := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWS
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, ministralEventContent{content: unambiguous})
+			}
+			return events, false
+		}
+
+		// No tag found: emit content but withhold trailing whitespace
+		whitespaceLen := trailingWhitespaceLen(bufStr)
+		ambiguousStart := len(bufStr) - whitespaceLen
+		unambiguous := bufStr[:ambiguousStart]
+		ambiguous := bufStr[ambiguousStart:]
+		p.buffer.Reset()
+		p.buffer.WriteString(ambiguous)
+		if len(unambiguous) > 0 {
+			events = append(events, ministralEventContent{content: unambiguous})
+		}
+		return events, false
+
 	case ministralCollectingThinkingContent:
-		if strings.Contains(p.buffer.String(), "[/THINK]") {
-			thinkingContent, after := splitAtTag(&p.buffer, "[/THINK]", true)
-			p.state = ministralCollectingContent
-			if after != "" {
-				p.buffer.Reset()
-				return after, thinkingContent, calls, nil
-			}
-			return "", thinkingContent, calls, nil
-		} else {
+		bufStr := p.buffer.String()
+
+		if strings.Contains(bufStr, ministralThinkEndTag) {
+			split := strings.SplitN(bufStr, ministralThinkEndTag, 2)
+			thinkingContent := split[0]
+			after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
 			p.buffer.Reset()
-			return "", s, calls, nil
-		}
-	case ministralCollectingToolName:
-		if strings.Contains(p.buffer.String(), "[ARGS]") {
-			name, _ := splitAtTag(&p.buffer, "[ARGS]", false)
-
-			t, err := toolByName(p.tools, name)
-			if err != nil {
-				return "", "", calls, err
+			p.buffer.WriteString(after)
+			if len(thinkingContent) > 0 {
+				events = append(events, ministralEventThinking{thinking: thinkingContent})
 			}
-			p.currentTool = t
-			p.state = ministralCollectingToolArgs
-			return "", "", calls, nil
-		}
-		return "", "", calls, nil
-	case ministralCollectingToolArgs:
-		if strings.Contains(p.buffer.String(), "}") {
-			before, _ := splitAtTag(&p.buffer, "}", false)
-			before += "}"
-
-			var args api.ToolCallFunctionArguments
-			if err := json.Unmarshal([]byte(before), &args); err != nil {
-				// todo - throw a better error
-				return "", "", calls, err
-			}
-
 			p.state = ministralCollectingContent
+			return events, true
+		}

-			call := api.ToolCall{
+		// Check for partial overlap with [/THINK]
+		if overlapLen := overlap(bufStr, ministralThinkEndTag); overlapLen > 0 {
+			unambiguous := bufStr[:len(bufStr)-overlapLen]
+			ambiguous := bufStr[len(bufStr)-overlapLen:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, ministralEventThinking{thinking: unambiguous})
+			}
+			return events, false
+		}
+
+		// No tag found: emit all thinking content
+		p.buffer.Reset()
+		if len(bufStr) > 0 {
+			events = append(events, ministralEventThinking{thinking: bufStr})
+		}
+		return events, false
+
+	case ministralCollectingToolName:
+		bufStr := p.buffer.String()
+
+		if strings.Contains(bufStr, ministralArgsTag) {
+			split := strings.SplitN(bufStr, ministralArgsTag, 2)
+			toolName := split[0]
+			after := split[1]
+			p.pendingToolName = toolName
+			p.buffer.Reset()
+			p.buffer.WriteString(after)
+			p.state = ministralCollectingToolArgs
+			return events, true
+		}
+		// Wait for more data
+		return events, false
+
+	case ministralCollectingToolArgs:
+		bufStr := p.buffer.String()
+		jsonEnd := findJSONEnd(bufStr)
+
+		if jsonEnd != -1 {
+			jsonStr := bufStr[:jsonEnd+1]
+			remaining := bufStr[jsonEnd+1:]
+
+			events = append(events, ministralEventToolCall{
+				name: p.pendingToolName,
+				args: jsonStr,
+			})
+
+			p.pendingToolName = ""
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = ministralCollectingContent
+			return events, true
+		}
+		// Wait for more data
+		return events, false
+
+	default:
+		panic("unexpected ministral event")
+	}
+}
+
+// parseEvents loops calling eat() until it returns false
+func (p *MinistralParser) parseEvents() []ministralEvent {
+	var all []ministralEvent
+	keepLooping := true
+	for keepLooping {
+		var events []ministralEvent
+		events, keepLooping = p.eat()
+		all = append(all, events...)
+	}
+	return all
+}
+
+func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+
+	events := p.parseEvents()
+
+	var contentBuilder, thinkingBuilder strings.Builder
+	var toolCalls []api.ToolCall
+
+	for _, event := range events {
+		switch e := event.(type) {
+		case ministralEventContent:
+			contentBuilder.WriteString(e.content)
+		case ministralEventThinking:
+			thinkingBuilder.WriteString(e.thinking)
+		case ministralEventToolCall:
+			// Validate tool exists
+			tool, toolErr := toolByName(p.tools, e.name)
+			if toolErr != nil {
+				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, toolErr
+			}
+			// Parse JSON arguments
+			var args api.ToolCallFunctionArguments
+			if jsonErr := json.Unmarshal([]byte(e.args), &args); jsonErr != nil {
+				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, jsonErr
+			}
+			toolCalls = append(toolCalls, api.ToolCall{
 				Function: api.ToolCallFunction{
-					Name:      p.currentTool.Function.Name,
+					Name:      tool.Function.Name,
 					Arguments: args,
 				},
-			}
-			calls = append(calls, call)
-			return "", "", calls, nil
+			})
 		}
-		return "", "", calls, nil
 	}

-	return p.buffer.String(), thinking, calls, nil
+	return contentBuilder.String(), thinkingBuilder.String(), toolCalls, nil
+}
+
+// findJSONEnd finds the index of the closing brace that completes a JSON object.
+// It properly handles nested objects, arrays, and strings (including escaped characters).
+// Returns -1 if the JSON is not yet complete.
+func findJSONEnd(s string) int {
+	depth := 0
+	inString := false
+	escaped := false
+
+	for i, r := range s {
+		if inString {
+			switch {
+			case escaped:
+				// If the previous character was a backslash, skip this character
+				escaped = false
+			case r == '\\':
+				// Mark the next character as escaped
+				escaped = true
+			case r == '"':
+				// End of string literal
+				inString = false
+			}
+			continue
+		}
+
+		switch r {
+		case '"':
+			// Start of string literal
+			inString = true
+		case '{', '[':
+			// Increase nesting level for objects and arrays
+			depth++
+		case '}', ']':
+			// Decrease nesting level
+			depth--
+			if depth == 0 {
+				// Reached the end of the root JSON structure
+				return i
+			}
+		}
+	}
+
+	return -1
 }
--- a/model/parsers/ministral_test.go
+++ b/model/parsers/ministral_test.go
@@ -0,0 +1,545 @@
+package parsers
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestMinistralParserStreaming(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []ministralEvent
+	}
+
+	cases := []struct {
+		desc  string
+		tools []api.Tool
+		steps []step
+		think bool // whether to enable thinking support
+	}{
+		// Content streaming
+		{
+			desc: "simple content",
+			steps: []step{
+				{input: "Hello, how can I help you?", wantEvents: []ministralEvent{
+					ministralEventContent{content: "Hello, how can I help you?"},
+				}},
+			},
+		},
+		{
+			desc: "streaming content word by word",
+			steps: []step{
+				{input: "Hello,", wantEvents: []ministralEvent{ministralEventContent{content: "Hello,"}}},
+				{input: " how", wantEvents: []ministralEvent{ministralEventContent{content: " how"}}},
+				{input: " can I help?", wantEvents: []ministralEvent{ministralEventContent{content: " can I help?"}}},
+			},
+		},
+
+		// Simple tool calls
+		{
+			desc:  "simple tool call",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "San Francisco"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "get_weather", args: `{"location": "San Francisco"}`},
+				}},
+			},
+		},
+		{
+			desc:  "tool call with nested object",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]create_entities[ARGS]{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
+				}},
+			},
+		},
+		{
+			desc:  "tool call with deeply nested objects",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "update_config"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]update_config[ARGS]{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "update_config", args: `{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`},
+				}},
+			},
+		},
+		{
+			desc:  "tool call with array of objects",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "process_items"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]process_items[ARGS]{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "process_items", args: `{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`},
+				}},
+			},
+		},
+		{
+			desc:  "tool call with escaped quotes in string",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "search"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]search[ARGS]{"query": "say \"hello\""}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "search", args: `{"query": "say \"hello\""}`},
+				}},
+			},
+		},
+		{
+			desc:  "tool call with braces inside string",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "format"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]format[ARGS]{"template": "Hello {name}!"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "format", args: `{"template": "Hello {name}!"}`},
+				}},
+			},
+		},
+		{
+			desc:  "empty JSON object",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "no_args"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]no_args[ARGS]{}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "no_args", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc:  "JSON with newlines in string",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "write"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]write[ARGS]{"content": "line1\nline2\nline3"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "write", args: `{"content": "line1\nline2\nline3"}`},
+				}},
+			},
+		},
+		{
+			desc:  "backslash in string value",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "path"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]path[ARGS]{"dir": "C:\\Users\\test"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "path", args: `{"dir": "C:\\Users\\test"}`},
+				}},
+			},
+		},
+
+		// Content after tool call
+		{
+			desc:  "content after tool call",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				// NOTE: It's unclear if this is valid Ministral output, but the parser
+				// currently treats text after a tool call as regular content. This test
+				// documents that behavior so we notice if it changes.
+				{input: `[TOOL_CALLS]test[ARGS]{"a": 1}some content after`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "test", args: `{"a": 1}`},
+					ministralEventContent{content: "some content after"},
+				}},
+			},
+		},
+
+		// Multiple tool calls
+		{
+			desc: "multiple tool calls in sequence",
+			tools: []api.Tool{
+				{Function: api.ToolFunction{Name: "get_weather"}},
+				{Function: api.ToolFunction{Name: "get_time"}},
+			},
+			steps: []step{
+				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "NYC"}[TOOL_CALLS]get_time[ARGS]{"timezone": "EST"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "get_weather", args: `{"location": "NYC"}`},
+					ministralEventToolCall{name: "get_time", args: `{"timezone": "EST"}`},
+				}},
+			},
+		},
+		{
+			desc: "multiple tool calls streamed separately",
+			tools: []api.Tool{
+				{Function: api.ToolFunction{Name: "tool_a"}},
+				{Function: api.ToolFunction{Name: "tool_b"}},
+			},
+			steps: []step{
+				{input: `[TOOL_CALLS]tool_a[ARGS]{"x": 1}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "tool_a", args: `{"x": 1}`},
+				}},
+				{input: `[TOOL_CALLS]tool_b[ARGS]{"y": 2}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "tool_b", args: `{"y": 2}`},
+				}},
+			},
+		},
+
+		// Streaming tool calls
+		{
+			desc:  "streaming tool call with nested objects",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
+			steps: []step{
+				{input: "[TOOL_CALLS]create_entities[ARGS]", wantEvents: []ministralEvent{}},
+				{input: `{"entities": [{"entityType": "Person",`, wantEvents: []ministralEvent{}},
+				{input: ` "name": "Jack",`, wantEvents: []ministralEvent{}},
+				{input: ` "observations": ["Works`, wantEvents: []ministralEvent{}},
+				{input: ` as a baker"]}`, wantEvents: []ministralEvent{}},
+				{input: `]}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
+				}},
+			},
+		},
+		{
+			desc:  "streaming with incomplete JSON waits for completion",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				{input: "[TOOL_CALLS]test[ARGS]{", wantEvents: []ministralEvent{}},
+				{input: `"a": {`, wantEvents: []ministralEvent{}},
+				{input: `"b": 1`, wantEvents: []ministralEvent{}},
+				{input: `}`, wantEvents: []ministralEvent{}},
+				{input: `}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "test", args: `{"a": {"b": 1}}`},
+				}},
+			},
+		},
+
+		// Partial tag handling
+		{
+			desc: "partial tool tag fakeout",
+			steps: []step{
+				{input: "abc[TOOL", wantEvents: []ministralEvent{ministralEventContent{content: "abc"}}},
+				{input: " not a tag", wantEvents: []ministralEvent{ministralEventContent{content: "[TOOL not a tag"}}},
+			},
+		},
+		{
+			desc:  "tool call tag split across chunks",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				{input: "[TOOL_", wantEvents: []ministralEvent{}},
+				{input: "CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "test", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc:  "content before tool call",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
+			steps: []step{
+				{input: "hello [TOOL_CALLS]get_weather[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventContent{content: "hello"},
+					ministralEventToolCall{name: "get_weather", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc:  "whitespace between content and tool call is trimmed",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				{input: "content \n [TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content"},
+					ministralEventToolCall{name: "test", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc:  "tabs and newlines before tool call are trimmed",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				{input: "content\t\n\t[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content"},
+					ministralEventToolCall{name: "test", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc:  "non-breaking space before tool call is trimmed",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				// \u00a0 is non-breaking space, which unicode.IsSpace considers whitespace
+				{input: "content\u00a0[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content"},
+					ministralEventToolCall{name: "test", args: `{}`},
+				}},
+			},
+		},
+		{
+			desc: "whitespace before THINK tag is trimmed",
+			steps: []step{
+				{input: "content \n [THINK]thinking[/THINK]after", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content"},
+					ministralEventThinking{thinking: "thinking"},
+					ministralEventContent{content: "after"},
+				}},
+			},
+		},
+		{
+			desc: "trailing whitespace withheld then emitted",
+			steps: []step{
+				{input: "Hello ", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
+				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: " world"}}},
+			},
+		},
+		{
+			desc: "trailing newline withheld then emitted",
+			steps: []step{
+				{input: "Hello\n", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
+				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: "\nworld"}}},
+			},
+		},
+
+		// Thinking support
+		{
+			desc:  "thinking content",
+			think: true,
+			steps: []step{
+				{input: "thinking here[/THINK]", wantEvents: []ministralEvent{
+					ministralEventThinking{thinking: "thinking here"},
+				}},
+				{input: "content after", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content after"},
+				}},
+			},
+		},
+		{
+			desc:  "thinking with whitespace after end tag",
+			think: true,
+			steps: []step{
+				{input: "my thoughts[/THINK]  \n  response", wantEvents: []ministralEvent{
+					ministralEventThinking{thinking: "my thoughts"},
+					ministralEventContent{content: "response"},
+				}},
+			},
+		},
+		{
+			desc:  "non-breaking space after think end tag is trimmed",
+			think: true,
+			steps: []step{
+				// \u00a0 is non-breaking space
+				{input: "thinking[/THINK]\u00a0response", wantEvents: []ministralEvent{
+					ministralEventThinking{thinking: "thinking"},
+					ministralEventContent{content: "response"},
+				}},
+			},
+		},
+		{
+			desc:  "partial think end tag",
+			think: true,
+			steps: []step{
+				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
+				{input: "NK]after", wantEvents: []ministralEvent{ministralEventContent{content: "after"}}},
+			},
+		},
+		{
+			desc:  "think tag fakeout",
+			think: true,
+			steps: []step{
+				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
+				{input: "not end tag", wantEvents: []ministralEvent{ministralEventThinking{thinking: "[/THInot end tag"}}},
+			},
+		},
+		{
+			desc:  "thinking then tool call",
+			think: true,
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
+			steps: []step{
+				{input: "let me think[/THINK][TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
+					ministralEventThinking{thinking: "let me think"},
+					ministralEventToolCall{name: "test", args: `{}`},
+				}},
+			},
+		},
+
+		// Content then THINK tag transition
+		{
+			desc: "content then think tag",
+			steps: []step{
+				{input: "content[THINK]thinking[/THINK]more", wantEvents: []ministralEvent{
+					ministralEventContent{content: "content"},
+					ministralEventThinking{thinking: "thinking"},
+					ministralEventContent{content: "more"},
+				}},
+			},
+		},
+
+		// Unicode handling
+		{
+			desc: "unicode content",
+			steps: []step{
+				{input: "你好 🌍 مرحبا", wantEvents: []ministralEvent{
+					ministralEventContent{content: "你好 🌍 مرحبا"},
+				}},
+			},
+		},
+		{
+			desc:  "unicode in tool args",
+			tools: []api.Tool{{Function: api.ToolFunction{Name: "greet"}}},
+			steps: []step{
+				{input: `[TOOL_CALLS]greet[ARGS]{"message": "你好 🌍"}`, wantEvents: []ministralEvent{
+					ministralEventToolCall{name: "greet", args: `{"message": "你好 🌍"}`},
+				}},
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := MinistralParser{}
+			parser.hasThinkingSupport = tc.think
+			parser.Init(tc.tools, nil, nil)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestMinistralParser_Errors(t *testing.T) {
+	t.Run("unknown tool returns error", func(t *testing.T) {
+		p := &MinistralParser{}
+		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "known_tool"}}}, nil, nil)
+
+		_, _, _, err := p.Add(`[TOOL_CALLS]unknown_tool[ARGS]{"a": 1}`, true)
+		if err == nil {
+			t.Fatal("expected error for unknown tool")
+		}
+	})
+
+	t.Run("invalid JSON returns error", func(t *testing.T) {
+		p := &MinistralParser{}
+		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "test"}}}, nil, nil)
+
+		_, _, _, err := p.Add(`[TOOL_CALLS]test[ARGS]{invalid json}`, true)
+		if err == nil {
+			t.Fatal("expected error for invalid JSON")
+		}
+	})
+}
+
+func TestFindJSONEnd(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected int
+	}{
+		{
+			name:     "simple object",
+			input:    `{"a": 1}`,
+			expected: 7,
+		},
+		{
+			name:     "nested object",
+			input:    `{"a": {"b": 2}}`,
+			expected: 14,
+		},
+		{
+			name:     "array inside object",
+			input:    `{"items": [1, 2, 3]}`,
+			expected: 19,
+		},
+		{
+			name:     "braces in string",
+			input:    `{"template": "Hello {name}!"}`,
+			expected: 28,
+		},
+		{
+			name:     "escaped quotes",
+			input:    `{"msg": "say \"hi\""}`,
+			expected: 20,
+		},
+		{
+			name:     "incomplete object",
+			input:    `{"a": {"b": 1}`,
+			expected: -1,
+		},
+		{
+			name:     "deeply nested",
+			input:    `{"a": {"b": {"c": {"d": 1}}}}`,
+			expected: 28,
+		},
+		{
+			name:     "object with trailing content",
+			input:    `{"a": 1} extra`,
+			expected: 7,
+		},
+		{
+			name:     "array",
+			input:    `[{"a": 1}, {"b": 2}]`,
+			expected: 19,
+		},
+		{
+			name:     "escaped backslash before quote",
+			input:    `{"path": "C:\\"}`,
+			expected: 15,
+		},
+		{
+			name:     "empty string",
+			input:    "",
+			expected: -1,
+		},
+		{
+			name:     "no opening brace",
+			input:    "hello world",
+			expected: -1,
+		},
+		{
+			name:     "only opening brace",
+			input:    "{",
+			expected: -1,
+		},
+		{
+			name:     "unclosed string",
+			input:    `{"key": "unclosed`,
+			expected: -1,
+		},
+		{
+			name:     "double escaped backslash then quote",
+			input:    `{"path": "C:\\\\"}`,
+			expected: 17,
+		},
+		{
+			name:     "unicode in key and value",
+			input:    `{"키": "값"}`,
+			expected: 13,
+		},
+		{
+			name:     "nested arrays",
+			input:    `{"matrix": [[1, 2], [3, 4]]}`,
+			expected: 27,
+		},
+		{
+			name:     "mixed nesting",
+			input:    `{"a": [{"b": {"c": [1, 2, 3]}}]}`,
+			expected: 31,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := findJSONEnd(tt.input)
+			if result != tt.expected {
+				t.Errorf("findJSONEnd(%q) = %d, want %d", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestMinistralParser_HasToolSupport(t *testing.T) {
+	p := &MinistralParser{}
+	if !p.HasToolSupport() {
+		t.Error("expected HasToolSupport to return true")
+	}
+}
+
+func TestMinistralParser_HasThinkingSupport(t *testing.T) {
+	p := &MinistralParser{hasThinkingSupport: false}
+	if p.HasThinkingSupport() {
+		t.Error("expected HasThinkingSupport to return false")
+	}
+
+	p = &MinistralParser{hasThinkingSupport: true}
+	if !p.HasThinkingSupport() {
+		t.Error("expected HasThinkingSupport to return true")
+	}
+}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -3,6 +3,7 @@ package parsers
 import (
 	"strings"
 	"unicode"
+	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/harmony"
@@ -70,6 +71,10 @@ func ParserForName(name string) Parser {
 		return &FunctionGemmaParser{}
 	case "glm-4.7":
 		return &GLM47Parser{}
+	case "lfm2":
+		return &LFM2Parser{hasThinkingSupport: false}
+	case "lfm2-thinking":
+		return &LFM2Parser{hasThinkingSupport: true}
 	default:
 		return nil
 	}
@@ -110,3 +115,33 @@ func splitAtTag(sb *strings.Builder, tag string, trimAfter bool) (string, string
 	sb.WriteString(after)
 	return before, after // return events
 }
+
+// overlap returns the longest overlap between the suffix of s and the prefix of delim
+func overlap(s, delim string) int {
+	max := min(len(delim), len(s))
+	for i := max; i > 0; i-- {
+		if strings.HasSuffix(s, delim[:i]) {
+			return i
+		}
+	}
+	return 0
+}
+
+// trailingWhitespaceLen returns the length in bytes of trailing whitespace in s
+func trailingWhitespaceLen(s string) int {
+	remaining := s
+	total := 0
+	for len(remaining) > 0 {
+		r, size := utf8.DecodeLastRuneInString(remaining)
+		// if it's an invalid utf8 rune, assume it isn't whitespace
+		if r == utf8.RuneError && size == 1 {
+			break
+		}
+		if !unicode.IsSpace(r) {
+			break
+		}
+		total += size
+		remaining = remaining[:len(remaining)-size]
+	}
+	return total
+}
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -11,7 +11,6 @@ import (
 	"strconv"
 	"strings"
 	"unicode"
-	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/logutil"
@@ -194,36 +193,6 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
 	}
 }

-// TODO(drifkin): move this to a shared location
-// longest overlap between suffix of s and prefix of delim
-func overlap(s, delim string) int {
-	max := min(len(delim), len(s))
-	for i := max; i > 0; i-- {
-		if strings.HasSuffix(s, delim[:i]) {
-			return i
-		}
-	}
-	return 0
-}
-
-func trailingWhitespaceLen(s string) int {
-	remaining := s
-	total := 0
-	for len(remaining) > 0 {
-		r, size := utf8.DecodeLastRuneInString(remaining)
-		// if it's an invalid utf8 rune, assume it isn't whitespace
-		if r == utf8.RuneError && size == 1 {
-			break
-		}
-		if !unicode.IsSpace(r) {
-			break
-		}
-		total += size
-		remaining = remaining[:len(remaining)-size]
-	}
-	return total
-}
-
 type XMLFunctionCall struct {
 	XMLName    xml.Name       `xml:"function"`
 	Name       string         `xml:"name,attr"`
--- a/model/renderers/lfm2.go
+++ b/model/renderers/lfm2.go
@@ -0,0 +1,144 @@
+package renderers
+
+import (
+	"encoding/json"
+	"strings"
+
+	"github.com/ollama/ollama/api"
+)
+
+type LFM2Renderer struct {
+	IsThinking bool
+}
+
+func (r *LFM2Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
+	var sb strings.Builder
+
+	// Note: BOS token is added by the tokenizer (add_bos_token: true), not the renderer
+
+	// Extract first system message if present (to combine with tools)
+	var firstSystemContent string
+	startIdx := 0
+	if len(messages) > 0 && messages[0].Role == "system" {
+		firstSystemContent = messages[0].Content
+		startIdx = 1
+	}
+
+	// Append tools to first system content
+	if len(tools) > 0 {
+		if firstSystemContent != "" {
+			firstSystemContent += "\n"
+		}
+		firstSystemContent += "List of tools: ["
+		for i, tool := range tools {
+			toolJSON, err := json.Marshal(tool)
+			if err != nil {
+				return "", err
+			}
+			firstSystemContent += string(toolJSON)
+			if i < len(tools)-1 {
+				firstSystemContent += ", "
+			}
+		}
+		firstSystemContent += "]"
+	}
+
+	// Output first system block if it has content
+	if firstSystemContent != "" {
+		sb.WriteString("<|im_start|>system\n")
+		sb.WriteString(firstSystemContent)
+		sb.WriteString("<|im_end|>\n")
+	}
+
+	// Find the index of the last assistant message for thinking stripping
+	lastAssistantIndex := -1
+	for i := len(messages) - 1; i >= startIdx; i-- {
+		if messages[i].Role == "assistant" {
+			lastAssistantIndex = i
+			break
+		}
+	}
+
+	// Track whether we need to add generation prompt
+	needsGenerationPrompt := len(messages) > 0
+
+	for i := startIdx; i < len(messages); i++ {
+		message := messages[i]
+		switch message.Role {
+		case "system":
+			// Additional system messages (after the first) are rendered normally
+			sb.WriteString("<|im_start|>system\n")
+			sb.WriteString(message.Content)
+			sb.WriteString("<|im_end|>\n")
+
+		case "user":
+			sb.WriteString("<|im_start|>user\n")
+			sb.WriteString(message.Content)
+			sb.WriteString("<|im_end|>\n")
+			needsGenerationPrompt = true
+
+		case "assistant":
+			sb.WriteString("<|im_start|>assistant\n")
+
+			// Check if this is the last assistant message
+			isLastAssistant := i == lastAssistantIndex
+
+			// Process content (may need thinking stripped)
+			content := message.Content
+
+			// Handle thinking tags in assistant content
+			keepPastThinking := r.IsThinking && (thinkValue != nil && thinkValue.Bool())
+			if strings.Contains(content, "</think>") {
+				parts := strings.SplitN(content, "</think>", 2)
+				if len(parts) > 1 {
+					if !isLastAssistant && !keepPastThinking {
+						// Strip thinking entirely for past assistant messages
+						content = strings.TrimSpace(parts[1])
+					} else {
+						// Preserve thinking but trim whitespace after </think>
+						content = parts[0] + "</think>" + strings.TrimLeft(parts[1], " \t\n\r")
+					}
+				}
+			}
+
+			if len(message.ToolCalls) > 0 {
+				// Assistant with tool calls - write content first (if any after stripping)
+				if content != "" {
+					sb.WriteString(content)
+				}
+
+				for _, toolCall := range message.ToolCalls {
+					sb.WriteString("<|tool_call_start|>")
+					toolCallJSON := map[string]any{
+						"name":      toolCall.Function.Name,
+						"arguments": toolCall.Function.Arguments,
+					}
+					callJSON, _ := json.Marshal(toolCallJSON)
+					sb.WriteString(string(callJSON))
+					sb.WriteString("<|tool_call_end|>")
+				}
+			} else {
+				sb.WriteString(content)
+			}
+
+			sb.WriteString("<|im_end|>\n")
+			needsGenerationPrompt = true // Always add gen prompt after assistant when add_generation_prompt=true
+
+		case "tool":
+			// Tool responses are rendered as plain messages per the chat template
+			sb.WriteString("<|im_start|>tool\n")
+			sb.WriteString(message.Content)
+			sb.WriteString("<|im_end|>\n")
+			needsGenerationPrompt = true
+		}
+	}
+
+	// Add generation prompt
+	if needsGenerationPrompt {
+		sb.WriteString("<|im_start|>assistant\n")
+		// Note: Model is a "thinking-only" model - it will output <think> itself
+		// We don't add <think> tag to the prompt
+	}
+
+	return sb.String(), nil
+}
--- a/model/renderers/lfm2_test.go
+++ b/model/renderers/lfm2_test.go
@@ -0,0 +1,427 @@
+package renderers
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestLFM2Renderer(t *testing.T) {
+	tests := []struct {
+		name       string
+		messages   []api.Message
+		tools      []api.Tool
+		thinkValue *api.ThinkValue
+		expected   string
+	}{
+		{
+			name: "basic user message",
+			messages: []api.Message{
+				{Role: "user", Content: "Hello!"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "basic with system message",
+			messages: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Hello!"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "multiple system messages rendered separately",
+			messages: []api.Message{
+				{Role: "system", Content: "First instruction."},
+				{Role: "system", Content: "Second instruction."},
+				{Role: "user", Content: "Hello!"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>system\nFirst instruction.<|im_end|>\n<|im_start|>system\nSecond instruction.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "multi-turn conversation",
+			messages: []api.Message{
+				{Role: "user", Content: "What is 2+2?"},
+				{Role: "assistant", Content: "The answer is 4."},
+				{Role: "user", Content: "Thanks!"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\nThe answer is 4.<|im_end|>\n<|im_start|>user\nThanks!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "only system message",
+			messages: []api.Message{
+				{Role: "system", Content: "You are helpful."},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>system\nYou are helpful.<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// When assistant is the LAST assistant, thinking is preserved (even with keep_past_thinking=false)
+			name: "user-assistant-user: last assistant preserves thinking",
+			messages: []api.Message{
+				{Role: "user", Content: "Q1"},
+				{Role: "assistant", Content: "<think>reasoning</think>A1"},
+				{Role: "user", Content: "Q2"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reasoning</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// With two assistants, first is stripped (not last), second preserved (is last)
+			name: "multi-turn thinking: first stripped, second preserved",
+			messages: []api.Message{
+				{Role: "user", Content: "Q1"},
+				{Role: "assistant", Content: "<think>reason1</think>A1"},
+				{Role: "user", Content: "Q2"},
+				{Role: "assistant", Content: "<think>reason2</think>A2"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\nA1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// With thinking enabled (keep_past_thinking=true), both preserved
+			name: "multi-turn thinking: both preserved when thinking enabled",
+			messages: []api.Message{
+				{Role: "user", Content: "Q1"},
+				{Role: "assistant", Content: "<think>reason1</think>A1"},
+				{Role: "user", Content: "Q2"},
+				{Role: "assistant", Content: "<think>reason2</think>A2"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reason1</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "assistant with tool calls",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
+		},
+		{
+			name: "assistant with content and tool calls",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather in Paris?"},
+				{
+					Role:    "assistant",
+					Content: "Let me check.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   `<|im_start|>user` + "\n" + `What's the weather in Paris?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `Let me check.<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
+		},
+		{
+			name: "tool response",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{Role: "assistant", Content: "Let me check."},
+				{Role: "tool", Content: "22C, Sunny"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<|im_end|>\n<|im_start|>tool\n22C, Sunny<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "multiple tool calls",
+			messages: []api.Message{
+				{Role: "user", Content: "Get weather for Paris and London"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "London",
+								}),
+							},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   `<|im_start|>user` + "\n" + `Get weather for Paris and London<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|tool_call_start|>{"arguments":{"location":"London"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
+		},
+		{
+			name: "tools definitions with system message",
+			messages: []api.Message{
+				{Role: "system", Content: "You are helpful."},
+				{Role: "user", Content: "What's the weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get current weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"location": {
+									Type:        api.PropertyType{"string"},
+									Description: "City name",
+								},
+							}),
+							Required: []string{"location"},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   `<|im_start|>system` + "\n" + `You are helpful.` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
+		},
+		{
+			name: "tools definitions without system message",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get current weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"location": {
+									Type:        api.PropertyType{"string"},
+									Description: "City name",
+								},
+							}),
+							Required: []string{"location"},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   `<|im_start|>system` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
+		},
+		{
+			name: "multiple tools without system message",
+			messages: []api.Message{
+				{Role: "user", Content: "Hello"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_time",
+						Description: "Get time",
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>system\nList of tools: [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather\",\"parameters\":{\"type\":\"\",\"properties\":null}}}, {\"type\":\"function\",\"function\":{\"name\":\"get_time\",\"description\":\"Get time\",\"parameters\":{\"type\":\"\",\"properties\":null}}}]<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "user-tool sequence",
+			messages: []api.Message{
+				{Role: "user", Content: "Check weather"},
+				{Role: "tool", Content: "22C"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "full tool call cycle",
+			messages: []api.Message{
+				{Role: "user", Content: "Check weather"},
+				{Role: "assistant", Content: "Let me check"},
+				{Role: "tool", Content: "22C"},
+				{Role: "assistant", Content: "It's 22C"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>assistant\nLet me check<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\nIt's 22C<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "unicode content",
+			messages: []api.Message{
+				{Role: "user", Content: "你好世界! مرحبا 🌍"},
+				{Role: "assistant", Content: "Hello! 👋"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\n你好世界! مرحبا 🌍<|im_end|>\n<|im_start|>assistant\nHello! 👋<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "newlines in content",
+			messages: []api.Message{
+				{Role: "user", Content: "Line 1\nLine 2\n\nLine 4"},
+				{Role: "assistant", Content: "Response with\nmultiple\nlines"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nLine 1\nLine 2\n\nLine 4<|im_end|>\n<|im_start|>assistant\nResponse with\nmultiple\nlines<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			name: "empty assistant content",
+			messages: []api.Message{
+				{Role: "user", Content: "Hello"},
+				{Role: "assistant", Content: ""},
+				{Role: "user", Content: "OK"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n<|im_end|>\n<|im_start|>user\nOK<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// Generation prompt does NOT include <think> - model outputs it
+			name: "generation prompt has no think tag",
+			messages: []api.Message{
+				{Role: "user", Content: "Think hard"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected:   "<|im_start|>user\nThink hard<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// Interleaved: thinking before tool call - last assistant preserves thinking
+			name: "thinking before tool call (last assistant)",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{
+					Role:    "assistant",
+					Content: "<think>I need to check the weather</think>",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>I need to check the weather</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// Two assistants with tool calls - first has thinking stripped
+			name: "two assistants with tools: first thinking stripped",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{
+					Role:    "assistant",
+					Content: "<think>checking</think>",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "22C"},
+				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// Two assistants with tools - both preserved when thinking enabled
+			name: "two assistants with tools: both preserved when thinking enabled",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{
+					Role:    "assistant",
+					Content: "<think>checking</think>",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "22C"},
+				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
+			},
+			thinkValue: &api.ThinkValue{Value: true},
+			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>checking</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
+		},
+		{
+			// Content before thinking before tool call
+			name: "content then thinking then tool call",
+			messages: []api.Message{
+				{Role: "user", Content: "What's the weather?"},
+				{
+					Role:    "assistant",
+					Content: "Let me check.<think>Using weather API</think>",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get_weather",
+								Arguments: testArgs(map[string]any{
+									"location": "Paris",
+								}),
+							},
+						},
+					},
+				},
+			},
+			thinkValue: &api.ThinkValue{Value: false},
+			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<think>Using weather API</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
+		},
+	}
+
+	renderer := &LFM2Renderer{IsThinking: true}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
+			if err != nil {
+				t.Fatalf("Render() error = %v", err)
+			}
+			if diff := cmp.Diff(tt.expected, rendered); diff != "" {
+				t.Errorf("Render() mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -82,6 +82,10 @@ func rendererForName(name string) Renderer {
 		return &FunctionGemmaRenderer{}
 	case "glm-4.7":
 		return &GLM47Renderer{}
+	case "lfm2":
+		return &LFM2Renderer{IsThinking: false}
+	case "lfm2-thinking":
+		return &LFM2Renderer{IsThinking: true}
 	default:
 		return nil
 	}
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -794,3 +794,47 @@ func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationRespons
 		Data:    data,
 	}
 }
+
+// ImageEditRequest is an OpenAI-compatible image edit request.
+type ImageEditRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`
+	Image  string `json:"image"`          // Base64-encoded image data
+	Size   string `json:"size,omitempty"` // e.g., "1024x1024"
+	Seed   *int64 `json:"seed,omitempty"`
+}
+
+// FromImageEditRequest converts an OpenAI image edit request to an Ollama GenerateRequest.
+func FromImageEditRequest(r ImageEditRequest) (api.GenerateRequest, error) {
+	req := api.GenerateRequest{
+		Model:  r.Model,
+		Prompt: r.Prompt,
+	}
+
+	// Decode the input image
+	if r.Image != "" {
+		imgData, err := decodeImageURL(r.Image)
+		if err != nil {
+			return api.GenerateRequest{}, fmt.Errorf("invalid image: %w", err)
+		}
+		req.Images = append(req.Images, imgData)
+	}
+
+	// Parse size if provided (e.g., "1024x768")
+	if r.Size != "" {
+		var w, h int32
+		if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
+			req.Width = w
+			req.Height = h
+		}
+	}
+
+	if r.Seed != nil {
+		if req.Options == nil {
+			req.Options = map[string]any{}
+		}
+		req.Options["seed"] = *r.Seed
+	}
+
+	return req, nil
+}
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@@ -448,3 +448,86 @@ func TestFromChatRequest_TopLogprobsRange(t *testing.T) {
 		})
 	}
 }
+
+func TestFromImageEditRequest_Basic(t *testing.T) {
+	req := ImageEditRequest{
+		Model:  "test-model",
+		Prompt: "make it blue",
+		Image:  prefix + image,
+	}
+
+	result, err := FromImageEditRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "test-model" {
+		t.Errorf("expected model 'test-model', got %q", result.Model)
+	}
+
+	if result.Prompt != "make it blue" {
+		t.Errorf("expected prompt 'make it blue', got %q", result.Prompt)
+	}
+
+	if len(result.Images) != 1 {
+		t.Fatalf("expected 1 image, got %d", len(result.Images))
+	}
+}
+
+func TestFromImageEditRequest_WithSize(t *testing.T) {
+	req := ImageEditRequest{
+		Model:  "test-model",
+		Prompt: "make it blue",
+		Image:  prefix + image,
+		Size:   "512x768",
+	}
+
+	result, err := FromImageEditRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Width != 512 {
+		t.Errorf("expected width 512, got %d", result.Width)
+	}
+
+	if result.Height != 768 {
+		t.Errorf("expected height 768, got %d", result.Height)
+	}
+}
+
+func TestFromImageEditRequest_WithSeed(t *testing.T) {
+	seed := int64(12345)
+	req := ImageEditRequest{
+		Model:  "test-model",
+		Prompt: "make it blue",
+		Image:  prefix + image,
+		Seed:   &seed,
+	}
+
+	result, err := FromImageEditRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Options == nil {
+		t.Fatal("expected options to be set")
+	}
+
+	if result.Options["seed"] != seed {
+		t.Errorf("expected seed %d, got %v", seed, result.Options["seed"])
+	}
+}
+
+func TestFromImageEditRequest_InvalidImage(t *testing.T) {
+	req := ImageEditRequest{
+		Model:  "test-model",
+		Prompt: "make it blue",
+		Image:  "not-valid-base64",
+	}
+
+	_, err := FromImageEditRequest(req)
+	if err == nil {
+		t.Error("expected error for invalid image")
+	}
+}
--- a/readline/readline.go
+++ b/readline/readline.go
@@ -95,7 +95,21 @@ func (i *Instance) Readline() (string, error) {

 	var currentLineBuf []rune

+	// draining tracks if we're processing buffered input from cooked mode.
+	// In cooked mode Enter sends \n, but in raw mode Ctrl+J sends \n.
+	// We treat \n from cooked mode as submit, not multiline.
+	// We check Buffered() after the first read since the bufio buffer is
+	// empty until then. This is compatible with """ multiline mode in
+	// interactive.go since each Readline() call is independent.
+	var draining, stopDraining bool
+
 	for {
+		// Apply deferred state change from previous iteration
+		if stopDraining {
+			draining = false
+			stopDraining = false
+		}
+
 		// don't show placeholder when pasting unless we're in multiline mode
 		showPlaceholder := !i.Pasting || i.Prompt.UseAlt
 		if buf.IsEmpty() && showPlaceholder {
@@ -105,6 +119,15 @@ func (i *Instance) Readline() (string, error) {

 		r, err := i.Terminal.Read()

+		// After reading, check if there's more buffered data. If so, we're
+		// processing cooked-mode input. Once buffer empties, the current
+		// char is the last buffered one (still drain it), then stop next iteration.
+		if i.Terminal.reader.Buffered() > 0 {
+			draining = true
+		} else if draining {
+			stopDraining = true
+		}
+
 		if buf.IsEmpty() {
 			fmt.Print(ClearToEOL)
 		}
@@ -232,15 +255,20 @@ func (i *Instance) Readline() (string, error) {
 			fd := os.Stdin.Fd()
 			return handleCharCtrlZ(fd, i.Terminal.termios)
 		case CharCtrlJ:
-			i.pastedLines = append(i.pastedLines, buf.String())
-			buf.Buf.Clear()
-			buf.Pos = 0
-			buf.DisplayPos = 0
-			buf.LineHasSpace.Clear()
-			fmt.Println()
-			fmt.Print(i.Prompt.AltPrompt)
-			i.Prompt.UseAlt = true
-			continue
+			// If not draining cooked-mode input, treat as multiline
+			if !draining {
+				i.pastedLines = append(i.pastedLines, buf.String())
+				buf.Buf.Clear()
+				buf.Pos = 0
+				buf.DisplayPos = 0
+				buf.LineHasSpace.Clear()
+				fmt.Println()
+				fmt.Print(i.Prompt.AltPrompt)
+				i.Prompt.UseAlt = true
+				continue
+			}
+			// Draining cooked-mode input: treat \n as submit
+			fallthrough
 		case CharEnter:
 			output := buf.String()
 			if len(i.pastedLines) > 0 {
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -14,8 +14,8 @@
 VOL_NAME=${VOL_NAME:-"Ollama"}
 export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
 export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${VERSION#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
-export CGO_CFLAGS="-mmacosx-version-min=14.0"
-export CGO_CXXFLAGS="-mmacosx-version-min=14.0"
+export CGO_CFLAGS="-O3 -mmacosx-version-min=14.0"
+export CGO_CXXFLAGS="-O3 -mmacosx-version-min=14.0"
 export CGO_LDFLAGS="-mmacosx-version-min=14.0"

 set -e
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -56,6 +56,12 @@ function checkEnv {

    $script:DIST_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
    $env:CGO_ENABLED="1"
+    if (-not $env:CGO_CFLAGS) {
+        $env:CGO_CFLAGS = "-O3"
+    }
+    if (-not $env:CGO_CXXFLAGS) {
+        $env:CGO_CXXFLAGS = "-O3"
+    }
    Write-Output "Checking version"
    if (!$env:VERSION) {
        $data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always)
--- a/server/create.go
+++ b/server/create.go
@@ -28,6 +28,7 @@ import (
 	"github.com/ollama/ollama/format"
 	ofs "github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/fs/ggml"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -90,7 +91,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
 			ch <- resp
 		}

-		oldManifest, _ := ParseNamedManifest(name)
+		oldManifest, _ := manifest.ParseNamedManifest(name)

 		var baseLayers []*layerGGML
 		var err error
@@ -123,9 +124,9 @@ func (s *Server) CreateHandler(c *gin.Context) {
 				}

 				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "") {
-					manifest, mErr := ParseNamedManifest(fromName)
-					if mErr == nil && manifest.Config.Digest != "" {
-						configPath, pErr := GetBlobsPath(manifest.Config.Digest)
+					mf, mErr := manifest.ParseNamedManifest(fromName)
+					if mErr == nil && mf.Config.Digest != "" {
+						configPath, pErr := manifest.BlobsPath(mf.Config.Digest)
 						if pErr == nil {
 							if cfgFile, fErr := os.Open(configPath); fErr == nil {
 								var baseConfig model.ConfigV2
@@ -342,7 +343,7 @@ func detectModelTypeFromFiles(files map[string]string) string {
 			return "gguf"
 		} else {
 			// try to see if we can find a gguf file even without the file extension
-			blobPath, err := GetBlobsPath(files[fn])
+			blobPath, err := manifest.BlobsPath(files[fn])
 			if err != nil {
 				slog.Error("error getting blobs path", "file", fn)
 				return ""
@@ -394,7 +395,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 			return nil, fmt.Errorf("%w: %s: %s", errFilePath, err, fp)
 		}

-		blobPath, err := GetBlobsPath(digest)
+		blobPath, err := manifest.BlobsPath(digest)
 		if err != nil {
 			return nil, err
 		}
@@ -432,7 +433,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 		return nil, err
 	}

-	layer, err := NewLayer(t, mediaType)
+	layer, err := manifest.NewLayer(t, mediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -465,7 +466,7 @@ func kvFromLayers(baseLayers []*layerGGML) (ofs.Config, error) {
 }

 func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, config *model.ConfigV2, fn func(resp api.ProgressResponse)) (err error) {
-	var layers []Layer
+	var layers []manifest.Layer
 	for _, layer := range baseLayers {
 		if layer.GGML != nil {
 			quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
@@ -550,13 +551,13 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
 	}

 	for _, layer := range layers {
-		if layer.status != "" {
-			fn(api.ProgressResponse{Status: layer.status})
+		if layer.Status != "" {
+			fn(api.ProgressResponse{Status: layer.Status})
 		}
 	}

 	fn(api.ProgressResponse{Status: "writing manifest"})
-	if err := WriteManifest(name, *configLayer, layers); err != nil {
+	if err := manifest.WriteManifest(name, *configLayer, layers); err != nil {
 		return err
 	}

@@ -577,7 +578,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 		return nil, err
 	}

-	blob, err := GetBlobsPath(layer.Digest)
+	blob, err := manifest.BlobsPath(layer.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -599,7 +600,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 	}
 	temp.Seek(0, io.SeekStart)
 	fn(api.ProgressResponse{Status: "verifying conversion"})
-	newLayer, err := NewLayer(temp, layer.MediaType)
+	newLayer, err := manifest.NewLayer(temp, layer.MediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -619,7 +620,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	var layers []*layerGGML

 	fn(api.ProgressResponse{Status: "parsing GGUF"})
-	blobPath, err := GetBlobsPath(digest)
+	blobPath, err := manifest.BlobsPath(digest)
 	if err != nil {
 		return nil, err
 	}
@@ -654,7 +655,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 		mediatype = "application/vnd.ollama.image.projector"
 	}

-	layer, err := NewLayerFromLayer(digest, mediatype, blob.Name())
+	layer, err := manifest.NewLayerFromLayer(digest, mediatype, blob.Name())
 	if err != nil {
 		slog.Debug("could not create new layer from layer", "error", err)
 		return nil, err
@@ -665,8 +666,8 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	return detectChatTemplate(layers)
 }

-func removeLayer(layers []Layer, mediatype string) []Layer {
-	return slices.DeleteFunc(layers, func(layer Layer) bool {
+func removeLayer(layers []manifest.Layer, mediatype string) []manifest.Layer {
+	return slices.DeleteFunc(layers, func(layer manifest.Layer) bool {
 		if layer.MediaType != mediatype {
 			return false
 		}
@@ -680,7 +681,7 @@ func removeLayer(layers []Layer, mediatype string) []Layer {
 	})
 }

-func setTemplate(layers []Layer, t string) ([]Layer, error) {
+func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.template")
 	if _, err := template.Parse(t); err != nil {
 		return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
@@ -690,7 +691,7 @@ func setTemplate(layers []Layer, t string) ([]Layer, error) {
 	}

 	blob := strings.NewReader(t)
-	layer, err := NewLayer(blob, "application/vnd.ollama.image.template")
+	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.template")
 	if err != nil {
 		return nil, err
 	}
@@ -699,11 +700,11 @@ func setTemplate(layers []Layer, t string) ([]Layer, error) {
 	return layers, nil
 }

-func setSystem(layers []Layer, s string) ([]Layer, error) {
+func setSystem(layers []manifest.Layer, s string) ([]manifest.Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.system")
 	if s != "" {
 		blob := strings.NewReader(s)
-		layer, err := NewLayer(blob, "application/vnd.ollama.image.system")
+		layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.system")
 		if err != nil {
 			return nil, err
 		}
@@ -712,9 +713,9 @@ func setSystem(layers []Layer, s string) ([]Layer, error) {
 	return layers, nil
 }

-func setLicense(layers []Layer, l string) ([]Layer, error) {
+func setLicense(layers []manifest.Layer, l string) ([]manifest.Layer, error) {
 	blob := strings.NewReader(l)
-	layer, err := NewLayer(blob, "application/vnd.ollama.image.license")
+	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.license")
 	if err != nil {
 		return nil, err
 	}
@@ -722,7 +723,7 @@ func setLicense(layers []Layer, l string) ([]Layer, error) {
 	return layers, nil
 }

-func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
+func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer, error) {
 	if p == nil {
 		p = make(map[string]any)
 	}
@@ -731,7 +732,7 @@ func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
 			continue
 		}

-		digestPath, err := GetBlobsPath(layer.Digest)
+		digestPath, err := manifest.BlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -765,7 +766,7 @@ func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
 	if err := json.NewEncoder(&b).Encode(p); err != nil {
 		return nil, err
 	}
-	layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
+	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.params")
 	if err != nil {
 		return nil, err
 	}
@@ -773,7 +774,7 @@ func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
 	return layers, nil
 }

-func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
+func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, error) {
 	// this leaves the old messages intact if no new messages were specified
 	// which may not be the correct behaviour
 	if len(m) == 0 {
@@ -786,7 +787,7 @@ func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
 	if err := json.NewEncoder(&b).Encode(m); err != nil {
 		return nil, err
 	}
-	layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
+	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.messages")
 	if err != nil {
 		return nil, err
 	}
@@ -794,7 +795,7 @@ func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
 	return layers, nil
 }

-func createConfigLayer(layers []Layer, config model.ConfigV2) (*Layer, error) {
+func createConfigLayer(layers []manifest.Layer, config model.ConfigV2) (*manifest.Layer, error) {
 	digests := make([]string, len(layers))
 	for i, layer := range layers {
 		digests[i] = layer.Digest
@@ -805,7 +806,7 @@ func createConfigLayer(layers []Layer, config model.ConfigV2) (*Layer, error) {
 	if err := json.NewEncoder(&b).Encode(config); err != nil {
 		return nil, err
 	}
-	layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	layer, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		return nil, err
 	}
--- a/server/create_test.go
+++ b/server/create_test.go
@@ -10,6 +10,7 @@ import (
 	"testing"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/manifest"
 )

 func TestConvertFromSafetensors(t *testing.T) {
@@ -17,7 +18,7 @@ func TestConvertFromSafetensors(t *testing.T) {

 	// Helper function to create a new layer and return its digest
 	makeTemp := func(content string) string {
-		l, err := NewLayer(strings.NewReader(content), "application/octet-stream")
+		l, err := manifest.NewLayer(strings.NewReader(content), "application/octet-stream")
 		if err != nil {
 			t.Fatalf("Failed to create layer: %v", err)
 		}
--- a/server/download.go
+++ b/server/download.go
@@ -24,6 +24,8 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/manifest"
+	"github.com/ollama/ollama/types/model"
 )

 const maxRetries = 6
@@ -456,7 +458,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 }

 type downloadOpts struct {
-	mp      ModelPath
+	n       model.Name
 	digest  string
 	regOpts *registryOptions
 	fn      func(api.ProgressResponse)
@@ -465,10 +467,10 @@ type downloadOpts struct {
 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
 func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ error) {
 	if opts.digest == "" {
-		return false, fmt.Errorf(("%s: %s"), opts.mp.GetNamespaceRepository(), "digest is empty")
+		return false, fmt.Errorf(("%s: %s"), opts.n.DisplayNamespaceModel(), "digest is empty")
 	}

-	fp, err := GetBlobsPath(opts.digest)
+	fp, err := manifest.BlobsPath(opts.digest)
 	if err != nil {
 		return false, err
 	}
@@ -492,8 +494,8 @@ func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ erro
 	data, ok := blobDownloadManager.LoadOrStore(opts.digest, &blobDownload{Name: fp, Digest: opts.digest})
 	download := data.(*blobDownload)
 	if !ok {
-		requestURL := opts.mp.BaseURL()
-		requestURL = requestURL.JoinPath("v2", opts.mp.GetNamespaceRepository(), "blobs", opts.digest)
+		requestURL := opts.n.BaseURL()
+		requestURL = requestURL.JoinPath("v2", opts.n.DisplayNamespaceModel(), "blobs", opts.digest)
 		if err := download.Prepare(ctx, requestURL, opts.regOpts); err != nil {
 			blobDownloadManager.Delete(opts.digest)
 			return false, err
--- a/server/images.go
+++ b/server/images.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"context"
 	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -24,6 +23,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/gguf"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/model/parsers"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/template"
@@ -75,12 +75,6 @@ type Model struct {
 func (m *Model) Capabilities() []model.Capability {
 	capabilities := []model.Capability{}

-	// Check for image generation model via config capabilities
-	if slices.Contains(m.Config.Capabilities, "image") {
-		return []model.Capability{model.CapabilityImage}
-	}
-
-	// Check for completion capability
 	if m.ModelPath != "" {
 		f, err := gguf.Open(m.ModelPath)
 		if err == nil {
@@ -274,44 +268,22 @@ func (m *Model) String() string {
 	return modelfile.String()
 }

-func GetManifest(mp ModelPath) (*Manifest, string, error) {
-	fp, err := mp.GetManifestPath()
-	if err != nil {
-		return nil, "", err
-	}
-
-	f, err := os.Open(fp)
-	if err != nil {
-		return nil, "", err
-	}
-	defer f.Close()
-
-	sha256sum := sha256.New()
-
-	var manifest Manifest
-	if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
-		return nil, "", err
-	}
-
-	return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
-}
-
 func GetModel(name string) (*Model, error) {
-	mp := ParseModelPath(name)
-	manifest, digest, err := GetManifest(mp)
+	n := model.ParseName(name)
+	mf, err := manifest.ParseNamedManifest(n)
 	if err != nil {
 		return nil, err
 	}

-	model := &Model{
-		Name:      mp.GetFullTagname(),
-		ShortName: mp.GetShortTagname(),
-		Digest:    digest,
+	m := &Model{
+		Name:      n.String(),
+		ShortName: n.DisplayShortest(),
+		Digest:    mf.Digest(),
 		Template:  template.DefaultTemplate,
 	}

-	if manifest.Config.Digest != "" {
-		filename, err := GetBlobsPath(manifest.Config.Digest)
+	if mf.Config.Digest != "" {
+		filename, err := manifest.BlobsPath(mf.Config.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -322,29 +294,29 @@ func GetModel(name string) (*Model, error) {
 		}
 		defer configFile.Close()

-		if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
+		if err := json.NewDecoder(configFile).Decode(&m.Config); err != nil {
 			return nil, err
 		}
 	}

-	for _, layer := range manifest.Layers {
-		filename, err := GetBlobsPath(layer.Digest)
+	for _, layer := range mf.Layers {
+		filename, err := manifest.BlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}

 		switch layer.MediaType {
 		case "application/vnd.ollama.image.model":
-			model.ModelPath = filename
-			model.ParentModel = layer.From
+			m.ModelPath = filename
+			m.ParentModel = layer.From
 		case "application/vnd.ollama.image.embed":
 			// Deprecated in versions  > 0.1.2
 			// TODO: remove this warning in a future version
 			slog.Info("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
 		case "application/vnd.ollama.image.adapter":
-			model.AdapterPaths = append(model.AdapterPaths, filename)
+			m.AdapterPaths = append(m.AdapterPaths, filename)
 		case "application/vnd.ollama.image.projector":
-			model.ProjectorPaths = append(model.ProjectorPaths, filename)
+			m.ProjectorPaths = append(m.ProjectorPaths, filename)
 		case "application/vnd.ollama.image.prompt",
 			"application/vnd.ollama.image.template":
 			bts, err := os.ReadFile(filename)
@@ -352,7 +324,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}

-			model.Template, err = template.Parse(string(bts))
+			m.Template, err = template.Parse(string(bts))
 			if err != nil {
 				return nil, err
 			}
@@ -362,7 +334,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}

-			model.System = string(bts)
+			m.System = string(bts)
 		case "application/vnd.ollama.image.params":
 			params, err := os.Open(filename)
 			if err != nil {
@@ -371,7 +343,7 @@ func GetModel(name string) (*Model, error) {
 			defer params.Close()

 			// parse model options parameters into a map so that we can see which fields have been specified explicitly
-			if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
+			if err = json.NewDecoder(params).Decode(&m.Options); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.messages":
@@ -381,7 +353,7 @@ func GetModel(name string) (*Model, error) {
 			}
 			defer msgs.Close()

-			if err = json.NewDecoder(msgs).Decode(&model.Messages); err != nil {
+			if err = json.NewDecoder(msgs).Decode(&m.Messages); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.license":
@@ -389,11 +361,11 @@ func GetModel(name string) (*Model, error) {
 			if err != nil {
 				return nil, err
 			}
-			model.License = append(model.License, string(bts))
+			m.License = append(m.License, string(bts))
 		}
 	}

-	return model, nil
+	return m, nil
 }

 func CopyModel(src, dst model.Name) error {
@@ -408,7 +380,7 @@ func CopyModel(src, dst model.Name) error {
 		return nil
 	}

-	manifests, err := GetManifestPath()
+	manifests, err := manifest.Path()
 	if err != nil {
 		return err
 	}
@@ -437,7 +409,7 @@ func CopyModel(src, dst model.Name) error {

 func deleteUnusedLayers(deleteMap map[string]struct{}) error {
 	// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
-	manifests, err := Manifests(true)
+	manifests, err := manifest.Manifests(true)
 	if err != nil {
 		return err
 	}
@@ -452,7 +424,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {

 	// only delete the files which are still in the deleteMap
 	for k := range deleteMap {
-		fp, err := GetBlobsPath(k)
+		fp, err := manifest.BlobsPath(k)
 		if err != nil {
 			slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
 			continue
@@ -468,7 +440,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {

 func PruneLayers() error {
 	deleteMap := make(map[string]struct{})
-	p, err := GetBlobsPath("")
+	p, err := manifest.BlobsPath("")
 	if err != nil {
 		return err
 	}
@@ -483,9 +455,9 @@ func PruneLayers() error {
 		name := blob.Name()
 		name = strings.ReplaceAll(name, "-", ":")

-		_, err := GetBlobsPath(name)
+		_, err := manifest.BlobsPath(name)
 		if err != nil {
-			if errors.Is(err, ErrInvalidDigestFormat) {
+			if errors.Is(err, manifest.ErrInvalidDigestFormat) {
 				// remove invalid blobs (e.g. partial downloads)
 				if err := os.Remove(filepath.Join(p, blob.Name())); err != nil {
 					slog.Error("couldn't remove blob", "blob", blob.Name(), "error", err)
@@ -510,63 +482,30 @@ func PruneLayers() error {
 	return nil
 }

-func PruneDirectory(path string) error {
-	info, err := os.Lstat(path)
-	if err != nil {
-		return err
-	}
-
-	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
-		entries, err := os.ReadDir(path)
-		if err != nil {
-			return err
-		}
-
-		for _, entry := range entries {
-			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
-				return err
-			}
-		}
-
-		entries, err = os.ReadDir(path)
-		if err != nil {
-			return err
-		}
-
-		if len(entries) > 0 {
-			return nil
-		}
-
-		return os.Remove(path)
-	}
-
-	return nil
-}
-
 func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	mp := ParseModelPath(name)
+	n := model.ParseName(name)
 	fn(api.ProgressResponse{Status: "retrieving manifest"})

-	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
+	if n.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}

-	manifest, _, err := GetManifest(mp)
+	mf, err := manifest.ParseNamedManifest(n)
 	if err != nil {
 		fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
 		return err
 	}

-	var layers []Layer
-	layers = append(layers, manifest.Layers...)
-	if manifest.Config.Digest != "" {
-		layers = append(layers, manifest.Config)
+	var layers []manifest.Layer
+	layers = append(layers, mf.Layers...)
+	if mf.Config.Digest != "" {
+		layers = append(layers, mf.Config)
 	}

 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
 		// Read raw manifest JSON to preserve tensor metadata fields
-		manifestPath, err := mp.GetManifestPath()
+		manifestPath, err := manifest.PathForName(n)
 		if err != nil {
 			return err
 		}
@@ -574,7 +513,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		if err != nil {
 			return err
 		}
-		if err := pushWithTransfer(ctx, mp, layers, manifestJSON, regOpts, fn); err != nil {
+		if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -582,17 +521,17 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	}

 	for _, layer := range layers {
-		if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
+		if err := uploadBlob(ctx, n, layer, regOpts, fn); err != nil {
 			slog.Info(fmt.Sprintf("error uploading blob: %v", err))
 			return err
 		}
 	}

 	fn(api.ProgressResponse{Status: "pushing manifest"})
-	requestURL := mp.BaseURL()
-	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)
+	requestURL := n.BaseURL()
+	requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)

-	manifestJSON, err := json.Marshal(manifest)
+	manifestJSON, err := json.Marshal(mf)
 	if err != nil {
 		return err
 	}
@@ -611,44 +550,44 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }

 func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	mp := ParseModelPath(name)
+	n := model.ParseName(name)

 	// build deleteMap to prune unused layers
 	deleteMap := make(map[string]struct{})
-	manifest, _, err := GetManifest(mp)
+	existingMf, err := manifest.ParseNamedManifest(n)
 	if errors.Is(err, os.ErrNotExist) {
 		// noop
 	} else if err != nil {
 		slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
 	} else {
-		for _, l := range manifest.Layers {
+		for _, l := range existingMf.Layers {
 			deleteMap[l.Digest] = struct{}{}
 		}
-		if manifest.Config.Digest != "" {
-			deleteMap[manifest.Config.Digest] = struct{}{}
+		if existingMf.Config.Digest != "" {
+			deleteMap[existingMf.Config.Digest] = struct{}{}
 		}
 	}

-	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
+	if n.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}

 	fn(api.ProgressResponse{Status: "pulling manifest"})

-	manifest, err = pullModelManifest(ctx, mp, regOpts)
+	mf, err := pullModelManifest(ctx, n, regOpts)
 	if err != nil {
 		return fmt.Errorf("pull model manifest: %s", err)
 	}

-	var layers []Layer
-	layers = append(layers, manifest.Layers...)
-	if manifest.Config.Digest != "" {
-		layers = append(layers, manifest.Config)
+	var layers []manifest.Layer
+	layers = append(layers, mf.Layers...)
+	if mf.Config.Digest != "" {
+		layers = append(layers, mf.Config)
 	}

 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
-		if err := pullWithTransfer(ctx, mp, layers, manifest, regOpts, fn); err != nil {
+		if err := pullWithTransfer(ctx, n, layers, mf, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -658,7 +597,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	skipVerify := make(map[string]bool)
 	for _, layer := range layers {
 		cacheHit, err := downloadBlob(ctx, downloadOpts{
-			mp:      mp,
+			n:       n,
 			digest:  layer.Digest,
 			regOpts: regOpts,
 			fn:      fn,
@@ -677,7 +616,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		}
 		if err := verifyBlob(layer.Digest); err != nil {
 			if errors.Is(err, errDigestMismatch) {
-				fp, err := GetBlobsPath(layer.Digest)
+				fp, err := manifest.BlobsPath(layer.Digest)
 				if err != nil {
 					return err
 				}
@@ -692,16 +631,16 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	for _, layer := range layers {
 		delete(deleteMap, layer.Digest)
 	}
-	delete(deleteMap, manifest.Config.Digest)
+	delete(deleteMap, mf.Config.Digest)

 	fn(api.ProgressResponse{Status: "writing manifest"})

-	manifestJSON, err := json.Marshal(manifest)
+	manifestJSON, err := json.Marshal(mf)
 	if err != nil {
 		return err
 	}

-	fp, err := mp.GetManifestPath()
+	fp, err := manifest.PathForName(n)
 	if err != nil {
 		return err
 	}
@@ -728,9 +667,9 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }

 // hasTensorLayers checks if any layer has tensor media type.
-func hasTensorLayers(layers []Layer) bool {
+func hasTensorLayers(layers []manifest.Layer) bool {
 	for _, layer := range layers {
-		if layer.MediaType == MediaTypeImageTensor {
+		if layer.MediaType == manifest.MediaTypeImageTensor {
 			return true
 		}
 	}
@@ -738,7 +677,7 @@ func hasTensorLayers(layers []Layer) bool {
 }

 // pullWithTransfer uses the simplified x/transfer package for downloading blobs.
-func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifest *Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, mf *manifest.Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -747,12 +686,12 @@ func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
 		}
 	}

-	destDir, err := GetBlobsPath("")
+	destDir, err := manifest.BlobsPath("")
 	if err != nil {
 		return err
 	}

-	base := mp.BaseURL()
+	base := n.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -784,7 +723,7 @@ func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
 		Blobs:      blobs,
 		BaseURL:    baseURL,
 		DestDir:    destDir,
-		Repository: mp.GetNamespaceRepository(),
+		Repository: n.DisplayNamespaceModel(),
 		Progress:   progress,
 		Token:      regOpts.Token,
 		GetToken:   getToken,
@@ -795,12 +734,12 @@ func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes

 	// Write manifest
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	manifestJSON, err := json.Marshal(manifest)
+	manifestJSON, err := json.Marshal(mf)
 	if err != nil {
 		return err
 	}

-	fp, err := mp.GetManifestPath()
+	fp, err := manifest.PathForName(n)
 	if err != nil {
 		return err
 	}
@@ -812,7 +751,7 @@ func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
 }

 // pushWithTransfer uses the simplified x/transfer package for uploading blobs and manifest.
-func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -822,12 +761,12 @@ func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
 		}
 	}

-	srcDir, err := GetBlobsPath("")
+	srcDir, err := manifest.BlobsPath("")
 	if err != nil {
 		return err
 	}

-	base := mp.BaseURL()
+	base := n.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -864,13 +803,13 @@ func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifes
 		GetToken:    getToken,
 		Logger:      slog.Default(),
 		Manifest:    manifestJSON,
-		ManifestRef: mp.Tag,
-		Repository:  mp.GetNamespaceRepository(),
+		ManifestRef: n.Tag,
+		Repository:  n.DisplayNamespaceModel(),
 	})
 }

-func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) {
-	requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)
+func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptions) (*manifest.Manifest, error) {
+	requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)

 	headers := make(http.Header)
 	headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
@@ -880,7 +819,7 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptio
 	}
 	defer resp.Body.Close()

-	var m Manifest
+	var m manifest.Manifest
 	if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
 		return nil, err
 	}
@@ -1042,7 +981,7 @@ func parseRegistryChallenge(authStr string) registryChallenge {
 var errDigestMismatch = errors.New("digest mismatch, file must be downloaded again")

 func verifyBlob(digest string) error {
-	fp, err := GetBlobsPath(digest)
+	fp, err := manifest.BlobsPath(digest)
 	if err != nil {
 		return err
 	}
--- a/server/images_test.go
+++ b/server/images_test.go
@@ -56,6 +56,15 @@ func TestModelCapabilities(t *testing.T) {
 			},
 			expectedCaps: []model.Capability{model.CapabilityImage},
 		},
+		{
+			name: "model with image and vision capability (image editing)",
+			model: Model{
+				Config: model.ConfigV2{
+					Capabilities: []string{"image", "vision"},
+				},
+			},
+			expectedCaps: []model.Capability{model.CapabilityImage, model.CapabilityVision},
+		},
 		{
 			name: "model with completion capability",
 			model: Model{
--- a/server/model.go
+++ b/server/model.go
@@ -13,6 +13,7 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/fs/ggml"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/model"
 )
@@ -20,19 +21,19 @@ import (
 var intermediateBlobs map[string]string = make(map[string]string)

 type layerGGML struct {
-	Layer
+	manifest.Layer
 	*ggml.GGML
 }

 func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
-	m, err := ParseNamedManifest(name)
+	m, err := manifest.ParseNamedManifest(name)
 	switch {
 	case errors.Is(err, os.ErrNotExist):
 		if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
 			return nil, err
 		}

-		m, err = ParseNamedManifest(name)
+		m, err = manifest.ParseNamedManifest(name)
 		if err != nil {
 			return nil, err
 		}
@@ -41,7 +42,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
 	}

 	for _, layer := range m.Layers {
-		layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
+		layer, err := manifest.NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
 		if err != nil {
 			return nil, err
 		}
@@ -50,7 +51,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
 		case "application/vnd.ollama.image.model",
 			"application/vnd.ollama.image.projector",
 			"application/vnd.ollama.image.adapter":
-			blobpath, err := GetBlobsPath(layer.Digest)
+			blobpath, err := manifest.BlobsPath(layer.Digest)
 			if err != nil {
 				return nil, err
 			}
@@ -81,12 +82,12 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 			if t, err := template.Named(s); err != nil {
 				slog.Debug("template detection", "error", err, "template", s)
 			} else {
-				layer, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
+				layer, err := manifest.NewLayer(t.Reader(), "application/vnd.ollama.image.template")
 				if err != nil {
 					return nil, err
 				}

-				layer.status = fmt.Sprintf("using autodetected template %s", t.Name)
+				layer.Status = fmt.Sprintf("using autodetected template %s", t.Name)
 				layers = append(layers, &layerGGML{layer, nil})

 				if t.Parameters != nil {
@@ -95,7 +96,7 @@ func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
 						return nil, err
 					}

-					layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
+					layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.params")
 					if err != nil {
 						return nil, err
 					}
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -1,146 +0,0 @@
-package server
-
-import (
-	"errors"
-	"fmt"
-	"io/fs"
-	"net/url"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/types/model"
-)
-
-type ModelPath struct {
-	ProtocolScheme string
-	Registry       string
-	Namespace      string
-	Repository     string
-	Tag            string
-}
-
-const (
-	DefaultRegistry       = "registry.ollama.ai"
-	DefaultNamespace      = "library"
-	DefaultTag            = "latest"
-	DefaultProtocolScheme = "https"
-)
-
-var (
-	ErrInvalidImageFormat  = errors.New("invalid image format")
-	ErrInvalidDigestFormat = errors.New("invalid digest format")
-	ErrInvalidProtocol     = errors.New("invalid protocol scheme")
-	ErrInsecureProtocol    = errors.New("insecure protocol http")
-	ErrModelPathInvalid    = errors.New("invalid model path")
-)
-
-func ParseModelPath(name string) ModelPath {
-	mp := ModelPath{
-		ProtocolScheme: DefaultProtocolScheme,
-		Registry:       DefaultRegistry,
-		Namespace:      DefaultNamespace,
-		Repository:     "",
-		Tag:            DefaultTag,
-	}
-
-	before, after, found := strings.Cut(name, "://")
-	if found {
-		mp.ProtocolScheme = before
-		name = after
-	}
-
-	name = strings.ReplaceAll(name, string(os.PathSeparator), "/")
-	parts := strings.Split(name, "/")
-	switch len(parts) {
-	case 3:
-		mp.Registry = parts[0]
-		mp.Namespace = parts[1]
-		mp.Repository = parts[2]
-	case 2:
-		mp.Namespace = parts[0]
-		mp.Repository = parts[1]
-	case 1:
-		mp.Repository = parts[0]
-	}
-
-	if repo, tag, found := strings.Cut(mp.Repository, ":"); found {
-		mp.Repository = repo
-		mp.Tag = tag
-	}
-
-	return mp
-}
-
-func (mp ModelPath) GetNamespaceRepository() string {
-	return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
-}
-
-func (mp ModelPath) GetFullTagname() string {
-	return fmt.Sprintf("%s/%s/%s:%s", mp.Registry, mp.Namespace, mp.Repository, mp.Tag)
-}
-
-func (mp ModelPath) GetShortTagname() string {
-	if mp.Registry == DefaultRegistry {
-		if mp.Namespace == DefaultNamespace {
-			return fmt.Sprintf("%s:%s", mp.Repository, mp.Tag)
-		}
-		return fmt.Sprintf("%s/%s:%s", mp.Namespace, mp.Repository, mp.Tag)
-	}
-	return fmt.Sprintf("%s/%s/%s:%s", mp.Registry, mp.Namespace, mp.Repository, mp.Tag)
-}
-
-// GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist.
-func (mp ModelPath) GetManifestPath() (string, error) {
-	name := model.Name{
-		Host:      mp.Registry,
-		Namespace: mp.Namespace,
-		Model:     mp.Repository,
-		Tag:       mp.Tag,
-	}
-	if !name.IsValid() {
-		return "", fs.ErrNotExist
-	}
-	return filepath.Join(envconfig.Models(), "manifests", name.Filepath()), nil
-}
-
-func (mp ModelPath) BaseURL() *url.URL {
-	return &url.URL{
-		Scheme: mp.ProtocolScheme,
-		Host:   mp.Registry,
-	}
-}
-
-func GetManifestPath() (string, error) {
-	path := filepath.Join(envconfig.Models(), "manifests")
-	if err := os.MkdirAll(path, 0o755); err != nil {
-		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
-	}
-
-	return path, nil
-}
-
-func GetBlobsPath(digest string) (string, error) {
-	// only accept actual sha256 digests
-	pattern := "^sha256[:-][0-9a-fA-F]{64}$"
-	re := regexp.MustCompile(pattern)
-
-	if digest != "" && !re.MatchString(digest) {
-		return "", ErrInvalidDigestFormat
-	}
-
-	digest = strings.ReplaceAll(digest, ":", "-")
-	path := filepath.Join(envconfig.Models(), "blobs", digest)
-	dirPath := filepath.Dir(path)
-	if digest == "" {
-		dirPath = path
-	}
-
-	if err := os.MkdirAll(dirPath, 0o755); err != nil {
-		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
-	}
-
-	return path, nil
-}
--- a/server/modelpath_test.go
+++ b/server/modelpath_test.go
@@ -1,153 +0,0 @@
-package server
-
-import (
-	"path/filepath"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestGetBlobsPath(t *testing.T) {
-	// GetBlobsPath expects an actual directory to exist
-	tempDir := t.TempDir()
-
-	tests := []struct {
-		name     string
-		digest   string
-		expected string
-		err      error
-	}{
-		{
-			"empty digest",
-			"",
-			filepath.Join(tempDir, "blobs"),
-			nil,
-		},
-		{
-			"valid with colon",
-			"sha256:456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
-			filepath.Join(tempDir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
-			nil,
-		},
-		{
-			"valid with dash",
-			"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9",
-			filepath.Join(tempDir, "blobs", "sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9"),
-			nil,
-		},
-		{
-			"digest too short",
-			"sha256-45640291",
-			"",
-			ErrInvalidDigestFormat,
-		},
-		{
-			"digest too long",
-			"sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7aad9aaaaaaaaaa",
-			"",
-			ErrInvalidDigestFormat,
-		},
-		{
-			"digest invalid chars",
-			"../sha256-456402914e838a953e0cf80caa6adbe75383d9e63584a964f504a7bbb8f7a",
-			"",
-			ErrInvalidDigestFormat,
-		},
-	}
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			t.Setenv("OLLAMA_MODELS", tempDir)
-
-			got, err := GetBlobsPath(tc.digest)
-
-			require.ErrorIs(t, tc.err, err, tc.name)
-			assert.Equal(t, tc.expected, got, tc.name)
-		})
-	}
-}
-
-func TestParseModelPath(t *testing.T) {
-	tests := []struct {
-		name string
-		arg  string
-		want ModelPath
-	}{
-		{
-			"full path https",
-			"https://example.com/ns/repo:tag",
-			ModelPath{
-				ProtocolScheme: "https",
-				Registry:       "example.com",
-				Namespace:      "ns",
-				Repository:     "repo",
-				Tag:            "tag",
-			},
-		},
-		{
-			"full path http",
-			"http://example.com/ns/repo:tag",
-			ModelPath{
-				ProtocolScheme: "http",
-				Registry:       "example.com",
-				Namespace:      "ns",
-				Repository:     "repo",
-				Tag:            "tag",
-			},
-		},
-		{
-			"no protocol",
-			"example.com/ns/repo:tag",
-			ModelPath{
-				ProtocolScheme: "https",
-				Registry:       "example.com",
-				Namespace:      "ns",
-				Repository:     "repo",
-				Tag:            "tag",
-			},
-		},
-		{
-			"no registry",
-			"ns/repo:tag",
-			ModelPath{
-				ProtocolScheme: "https",
-				Registry:       DefaultRegistry,
-				Namespace:      "ns",
-				Repository:     "repo",
-				Tag:            "tag",
-			},
-		},
-		{
-			"no namespace",
-			"repo:tag",
-			ModelPath{
-				ProtocolScheme: "https",
-				Registry:       DefaultRegistry,
-				Namespace:      DefaultNamespace,
-				Repository:     "repo",
-				Tag:            "tag",
-			},
-		},
-		{
-			"no tag",
-			"repo",
-			ModelPath{
-				ProtocolScheme: "https",
-				Registry:       DefaultRegistry,
-				Namespace:      DefaultNamespace,
-				Repository:     "repo",
-				Tag:            DefaultTag,
-			},
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			got := ParseModelPath(tc.arg)
-
-			if got != tc.want {
-				t.Errorf("got: %q want: %q", got, tc.want)
-			}
-		})
-	}
-}
--- a/server/quantization.go
+++ b/server/quantization.go
@@ -95,6 +95,13 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
 			// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
 			newType = fsggml.TensorTypeQ8_0
 		}
+	} else if strings.Contains(name, "attn_k_b.weight") ||
+		strings.Contains(name, "attn_v_b.weight") ||
+		strings.Contains(name, "attn_kv_a_mqa.weight") ||
+		strings.Contains(name, "attn_q_a.weight") ||
+		strings.Contains(name, "attn_q_b.weight") {
+		// MLA tensors need higher precision to avoid quality degradation
+		newType = fsggml.TensorTypeQ8_0
 	} else if strings.Contains(name, "ffn_down") {
 		iLayer := qs.iFfnDown
 		n_layer := qs.nFfnDown
@@ -198,8 +205,8 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
 	name := t.Name
 	quantize := strings.HasSuffix(name, "weight")

-	// don't quantize vision stuff
-	quantize = quantize && (!strings.Contains(name, "v.") || strings.Contains(name, "_v."))
+	// don't quantize vision encoder tensors (named with "v." prefix)
+	quantize = quantize && !strings.HasPrefix(name, "v.")
 	quantize = quantize && !strings.Contains(name, "mm.")

 	// quantize only 2D and 3D tensors (experts)
@@ -219,6 +226,9 @@ func newType(t *fsggml.Tensor, kv fsggml.KV, qs *quantizeState, ftype fsggml.Fil
 	// NOTE: can't use LLM_TN here because the layer number is not known
 	quantize = quantize && !strings.Contains(name, "ssm_conv1d.weight")

+	// do not quantize LFM2's shortconv kernel weights
+	quantize = quantize && !strings.Contains(name, "shortconv.conv.weight")
+
 	// do not quantize RWKV's time_mix_first tensors
 	quantize = quantize && !strings.Contains(name, "time_mix_first.weight")
 	quantize = quantize && !strings.Contains(name, "time_mix_w1.weight")
--- a/server/routes.go
+++ b/server/routes.go
@@ -39,6 +39,7 @@ import (
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/logutil"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/middleware"
 	"github.com/ollama/ollama/model/parsers"
 	"github.com/ollama/ollama/model/renderers"
@@ -974,7 +975,7 @@ func (s *Server) PushHandler(c *gin.Context) {
 // is.
 func getExistingName(n model.Name) (model.Name, error) {
 	var zero model.Name
-	existing, err := Manifests(true)
+	existing, err := manifest.Manifests(true)
 	if err != nil {
 		return zero, err
 	}
@@ -1018,7 +1019,7 @@ func (s *Server) DeleteHandler(c *gin.Context) {
 		return
 	}

-	m, err := ParseNamedManifest(n)
+	m, err := manifest.ParseNamedManifest(n)
 	if err != nil {
 		switch {
 		case os.IsNotExist(err):
@@ -1080,7 +1081,7 @@ func (s *Server) ShowHandler(c *gin.Context) {
 func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	name := model.ParseName(req.Model)
 	if !name.IsValid() {
-		return nil, ErrModelPathInvalid
+		return nil, model.Unqualified(name)
 	}
 	name, err := getExistingName(name)
 	if err != nil {
@@ -1112,7 +1113,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {

 	// For safetensors LLM models (experimental), populate details from config.json
 	if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
-		if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
+		if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
 			if arch, ok := info["general.architecture"].(string); ok && arch != "" {
 				modelDetails.Family = arch
 			}
@@ -1121,7 +1122,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 			}
 		}
 		// Get torch_dtype directly from config.json for quantization level
-		if dtype, err := xserver.GetSafetensorsDtype(name.String()); err == nil && dtype != "" {
+		if dtype, err := xserver.GetSafetensorsDtype(name); err == nil && dtype != "" {
 			modelDetails.QuantizationLevel = dtype
 		}
 	}
@@ -1135,7 +1136,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
 	}

-	manifest, err := ParseNamedManifest(name)
+	mf, err := manifest.ParseNamedManifest(name)
 	if err != nil {
 		return nil, err
 	}
@@ -1147,8 +1148,11 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		Details:      modelDetails,
 		Messages:     msgs,
 		Capabilities: m.Capabilities(),
-		ModifiedAt:   manifest.fi.ModTime(),
+		ModifiedAt:   mf.FileInfo().ModTime(),
 		Requires:     m.Config.Requires,
+		// Several integrations crash on a nil/omitempty+empty ModelInfo, so by
+		// default we return an empty map.
+		ModelInfo: make(map[string]any),
 	}

 	if m.Config.RemoteHost != "" {
@@ -1211,7 +1215,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 	if slices.Contains(m.Capabilities(), model.CapabilityImage) {
 		// Populate tensor info if verbose
 		if req.Verbose {
-			if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
+			if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
 				resp.Tensors = tensors
 			}
 		}
@@ -1220,12 +1224,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {

 	// For safetensors LLM models (experimental), populate ModelInfo from config.json
 	if m.Config.ModelFormat == "safetensors" && slices.Contains(m.Config.Capabilities, "completion") {
-		if info, err := xserver.GetSafetensorsLLMInfo(name.String()); err == nil {
+		if info, err := xserver.GetSafetensorsLLMInfo(name); err == nil {
 			resp.ModelInfo = info
 		}
 		// Populate tensor info if verbose
 		if req.Verbose {
-			if tensors, err := xserver.GetSafetensorsTensorInfo(name.String()); err == nil {
+			if tensors, err := xserver.GetSafetensorsTensorInfo(name); err == nil {
 				resp.Tensors = tensors
 			}
 		}
@@ -1282,7 +1286,7 @@ func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) {
 }

 func (s *Server) ListHandler(c *gin.Context) {
-	ms, err := Manifests(true)
+	ms, err := manifest.Manifests(true)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1313,8 +1317,8 @@ func (s *Server) ListHandler(c *gin.Context) {
 			RemoteModel: cf.RemoteModel,
 			RemoteHost:  cf.RemoteHost,
 			Size:        m.Size(),
-			Digest:      m.digest,
-			ModifiedAt:  m.fi.ModTime(),
+			Digest:      m.Digest(),
+			ModifiedAt:  m.FileInfo().ModTime(),
 			Details: api.ModelDetails{
 				Format:            cf.ModelFormat,
 				Family:            cf.ModelFamily,
@@ -1373,7 +1377,7 @@ func (s *Server) CopyHandler(c *gin.Context) {
 }

 func (s *Server) HeadBlobHandler(c *gin.Context) {
-	path, err := GetBlobsPath(c.Param("digest"))
+	path, err := manifest.BlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
@@ -1389,7 +1393,7 @@ func (s *Server) HeadBlobHandler(c *gin.Context) {

 func (s *Server) CreateBlobHandler(c *gin.Context) {
 	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
-		p, err := GetBlobsPath(ib)
+		p, err := manifest.BlobsPath(ib)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 			return
@@ -1407,7 +1411,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		}
 	}

-	path, err := GetBlobsPath(c.Param("digest"))
+	path, err := manifest.BlobsPath(c.Param("digest"))
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 		return
@@ -1425,7 +1429,7 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
 		return
 	}

-	layer, err := NewLayer(c.Request.Body, "")
+	layer, err := manifest.NewLayer(c.Request.Body, "")
 	if err != nil {
 		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		return
@@ -1600,8 +1604,9 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
 	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
-	// OpenAI-compatible image generation endpoint
+	// OpenAI-compatible image generation endpoints
 	r.POST("/v1/images/generations", middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
+	r.POST("/v1/images/edits", middleware.ImageEditsMiddleware(), s.GenerateHandler)

 	// Inference (Anthropic compatibility)
 	r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
@@ -1625,7 +1630,7 @@ func Serve(ln net.Listener) error {
 	slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
 	slog.Info("server config", "env", envconfig.Values())

-	blobsDir, err := GetBlobsPath("")
+	blobsDir, err := manifest.BlobsPath("")
 	if err != nil {
 		return err
 	}
@@ -1634,7 +1639,7 @@ func Serve(ln net.Listener) error {
 	}

 	if !envconfig.NoPrune() {
-		if _, err := Manifests(false); err != nil {
+		if _, err := manifest.Manifests(false); err != nil {
 			slog.Warn("corrupt manifests detected, skipping prune operation.  Re-pull or delete to clear", "error", err)
 		} else {
 			// clean up unused layers and manifests
@@ -1642,12 +1647,12 @@ func Serve(ln net.Listener) error {
 				return err
 			}

-			manifestsPath, err := GetManifestPath()
+			manifestsPath, err := manifest.Path()
 			if err != nil {
 				return err
 			}

-			if err := PruneDirectory(manifestsPath); err != nil {
+			if err := manifest.PruneDirectory(manifestsPath); err != nil {
 				return err
 			}
 		}
@@ -2503,8 +2508,14 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		return
 	}

-	// Set headers for streaming response
-	c.Header("Content-Type", "application/x-ndjson")
+	// Check streaming preference
+	isStreaming := req.Stream == nil || *req.Stream
+
+	contentType := "application/x-ndjson"
+	if !isStreaming {
+		contentType = "application/json; charset=utf-8"
+	}
+	c.Header("Content-Type", contentType)

 	// Get seed from options if provided
 	var seed int64
@@ -2519,13 +2530,21 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		}
 	}

+	var images []llm.ImageData
+	for i, imgData := range req.Images {
+		images = append(images, llm.ImageData{ID: i, Data: imgData})
+	}
+
 	var streamStarted bool
+	var finalResponse api.GenerateResponse
+
 	if err := runner.Completion(c.Request.Context(), llm.CompletionRequest{
 		Prompt: req.Prompt,
 		Width:  req.Width,
 		Height: req.Height,
 		Steps:  req.Steps,
 		Seed:   seed,
+		Images: images,
 	}, func(cr llm.CompletionResponse) {
 		streamStarted = true
 		res := api.GenerateResponse{
@@ -2549,6 +2568,11 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 			res.Metrics.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 		}

+		if !isStreaming {
+			finalResponse = res
+			return
+		}
+
 		data, _ := json.Marshal(res)
 		c.Writer.Write(append(data, '\n'))
 		c.Writer.Flush()
@@ -2558,5 +2582,10 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		if !streamStarted {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 		}
+		return
+	}
+
+	if !isStreaming {
+		c.JSON(http.StatusOK, finalResponse)
 	}
 }
--- a/server/routes_create_test.go
+++ b/server/routes_create_test.go
@@ -25,6 +25,7 @@ import (
 	"github.com/ollama/ollama/convert"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/ggml"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 )

@@ -223,15 +224,15 @@ func TestCreateFromModelInheritsRendererParser(t *testing.T) {
 		t.Fatalf("expected status code 200, actual %d", w.Code)
 	}

-	manifest, err := ParseNamedManifest(model.ParseName("child"))
+	mf, err := manifest.ParseNamedManifest(model.ParseName("child"))
 	if err != nil {
 		t.Fatalf("parse manifest: %v", err)
 	}
-	if manifest.Config.Digest == "" {
+	if mf.Config.Digest == "" {
 		t.Fatalf("unexpected empty config digest for child manifest")
 	}

-	configPath, err := GetBlobsPath(manifest.Config.Digest)
+	configPath, err := manifest.BlobsPath(mf.Config.Digest)
 	if err != nil {
 		t.Fatalf("config blob path: %v", err)
 	}
--- a/server/routes_delete_test.go
+++ b/server/routes_delete_test.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gin-gonic/gin"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 )

@@ -93,13 +94,13 @@ func TestDeleteDuplicateLayers(t *testing.T) {
 		t.Fatal(err)
 	}

-	config, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	config, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		t.Fatal(err)
 	}

 	// create a manifest with duplicate layers
-	if err := WriteManifest(n, config, []Layer{config}); err != nil {
+	if err := manifest.WriteManifest(n, config, []manifest.Layer{config}); err != nil {
 		t.Fatal(err)
 	}

--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -19,7 +19,9 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/types/model"
 )

 // testPropsMap creates a ToolPropertiesMap from a map (convenience function for tests)
@@ -71,6 +73,8 @@ func (mockRunner) Tokenize(_ context.Context, s string) (tokens []int, err error
 	return
 }

+func (mockRunner) Ping(_ context.Context) error { return nil }
+
 func newMockServer(mock *mockRunner) func(ml.SystemInfo, []ml.DeviceInfo, string, *ggml.GGML, []string, []string, api.Options, int) (llm.LlamaServer, error) {
 	return func(_ ml.SystemInfo, _ []ml.DeviceInfo, _ string, _ *ggml.GGML, _, _ []string, _ api.Options, _ int) (llm.LlamaServer, error) {
 		return mock, nil
@@ -2193,3 +2197,246 @@ func TestGenerateUnload(t *testing.T) {
 		}
 	})
 }
+
+func TestGenerateWithImages(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	mock := mockRunner{
+		CompletionResponse: llm.CompletionResponse{
+			Done:               true,
+			DoneReason:         llm.DoneReasonStop,
+			PromptEvalCount:    1,
+			PromptEvalDuration: 1,
+			EvalCount:          1,
+			EvalDuration:       1,
+		},
+	}
+
+	s := Server{
+		sched: &Scheduler{
+			pendingReqCh:    make(chan *LlmRequest, 1),
+			finishedReqCh:   make(chan *LlmRequest, 1),
+			expiredCh:       make(chan *runnerRef, 1),
+			unloadedCh:      make(chan any, 1),
+			loaded:          make(map[string]*runnerRef),
+			newServerFn:     newMockServer(&mock),
+			getGpuFn:        getGpuFn,
+			getSystemInfoFn: getSystemInfoFn,
+			waitForRecovery: 250 * time.Millisecond,
+			loadFn: func(req *LlmRequest, _ *ggml.GGML, _ ml.SystemInfo, _ []ml.DeviceInfo, _ bool) bool {
+				time.Sleep(time.Millisecond)
+				req.successCh <- &runnerRef{
+					llama: &mock,
+				}
+				return false
+			},
+		},
+	}
+
+	go s.sched.Run(t.Context())
+
+	_, digest := createBinFile(t, ggml.KV{
+		"general.architecture":          "llama",
+		"llama.block_count":             uint32(1),
+		"llama.context_length":          uint32(8192),
+		"llama.embedding_length":        uint32(4096),
+		"llama.attention.head_count":    uint32(32),
+		"llama.attention.head_count_kv": uint32(8),
+		"tokenizer.ggml.tokens":         []string{""},
+		"tokenizer.ggml.scores":         []float32{0},
+		"tokenizer.ggml.token_type":     []int32{0},
+	}, []*ggml.Tensor{
+		{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.ffn_gate.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.ffn_up.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.ffn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.attn_k.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.attn_output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.attn_q.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "blk.0.attn_v.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+		{Name: "output.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
+	})
+
+	w := createRequest(t, s.CreateHandler, api.CreateRequest{
+		Model:  "test",
+		Files:  map[string]string{"file.gguf": digest},
+		Stream: &stream,
+	})
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	t.Run("images passed to completion request", func(t *testing.T) {
+		testImage := []byte("test-image-data")
+
+		mock.CompletionResponse.Content = "Image processed"
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test",
+			Prompt: "Describe this image",
+			Images: []api.ImageData{testImage},
+			Stream: &stream,
+		})
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
+		}
+
+		// Verify images were passed to the completion request
+		if len(mock.CompletionRequest.Images) != 1 {
+			t.Fatalf("expected 1 image in completion request, got %d", len(mock.CompletionRequest.Images))
+		}
+
+		if !bytes.Equal(mock.CompletionRequest.Images[0].Data, testImage) {
+			t.Errorf("image data mismatch in completion request")
+		}
+
+		if mock.CompletionRequest.Images[0].ID != 0 {
+			t.Errorf("expected image ID 0, got %d", mock.CompletionRequest.Images[0].ID)
+		}
+	})
+
+	t.Run("multiple images passed to completion request", func(t *testing.T) {
+		testImage1 := []byte("test-image-1")
+		testImage2 := []byte("test-image-2")
+
+		mock.CompletionResponse.Content = "Images processed"
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test",
+			Prompt: "Compare these images",
+			Images: []api.ImageData{testImage1, testImage2},
+			Stream: &stream,
+		})
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
+		}
+
+		// Verify both images were passed
+		if len(mock.CompletionRequest.Images) != 2 {
+			t.Fatalf("expected 2 images in completion request, got %d", len(mock.CompletionRequest.Images))
+		}
+
+		if !bytes.Equal(mock.CompletionRequest.Images[0].Data, testImage1) {
+			t.Errorf("first image data mismatch")
+		}
+
+		if !bytes.Equal(mock.CompletionRequest.Images[1].Data, testImage2) {
+			t.Errorf("second image data mismatch")
+		}
+
+		if mock.CompletionRequest.Images[0].ID != 0 || mock.CompletionRequest.Images[1].ID != 1 {
+			t.Errorf("expected image IDs 0 and 1, got %d and %d",
+				mock.CompletionRequest.Images[0].ID, mock.CompletionRequest.Images[1].ID)
+		}
+	})
+
+	t.Run("no images when none provided", func(t *testing.T) {
+		mock.CompletionResponse.Content = "No images"
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test",
+			Prompt: "Hello",
+			Stream: &stream,
+		})
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
+		}
+
+		// Verify no images in completion request
+		if len(mock.CompletionRequest.Images) != 0 {
+			t.Fatalf("expected 0 images in completion request, got %d", len(mock.CompletionRequest.Images))
+		}
+	})
+}
+
+// TestImageGenerateStreamFalse tests that image generation respects stream=false
+// and returns a single JSON response instead of streaming ndjson.
+func TestImageGenerateStreamFalse(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	p := t.TempDir()
+	t.Setenv("OLLAMA_MODELS", p)
+
+	mock := mockRunner{}
+	mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
+		fn(llm.CompletionResponse{Step: 1, TotalSteps: 3, Done: false})
+		fn(llm.CompletionResponse{Step: 2, TotalSteps: 3, Done: false})
+		fn(llm.CompletionResponse{Step: 3, TotalSteps: 3, Done: true, DoneReason: llm.DoneReasonStop, Image: "base64image"})
+		return nil
+	}
+
+	opts := api.DefaultOptions()
+	s := Server{
+		sched: &Scheduler{
+			pendingReqCh:  make(chan *LlmRequest, 1),
+			finishedReqCh: make(chan *LlmRequest, 1),
+			expiredCh:     make(chan *runnerRef, 1),
+			unloadedCh:    make(chan any, 1),
+			loaded: map[string]*runnerRef{
+				"": {
+					llama:       &mock,
+					Options:     &opts,
+					model:       &Model{Config: model.ConfigV2{Capabilities: []string{"image"}}},
+					numParallel: 1,
+				},
+			},
+			newServerFn:     newMockServer(&mock),
+			getGpuFn:        getGpuFn,
+			getSystemInfoFn: getSystemInfoFn,
+		},
+	}
+
+	go s.sched.Run(t.Context())
+
+	// Create model manifest with image capability
+	n := model.ParseName("test-image")
+	cfg := model.ConfigV2{Capabilities: []string{"image"}}
+	var b bytes.Buffer
+	if err := json.NewEncoder(&b).Encode(&cfg); err != nil {
+		t.Fatal(err)
+	}
+	configLayer, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := manifest.WriteManifest(n, configLayer, nil); err != nil {
+		t.Fatal(err)
+	}
+
+	streamFalse := false
+	w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+		Model:  "test-image",
+		Prompt: "test prompt",
+		Stream: &streamFalse,
+	})
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	if ct := w.Header().Get("Content-Type"); ct != "application/json; charset=utf-8" {
+		t.Errorf("expected Content-Type 'application/json; charset=utf-8', got %q", ct)
+	}
+
+	body := w.Body.String()
+	lines := strings.Split(strings.TrimSpace(body), "\n")
+	if len(lines) != 1 {
+		t.Errorf("expected 1 response line, got %d:\n%s", len(lines), body)
+	}
+
+	var resp api.GenerateResponse
+	if err := json.Unmarshal([]byte(lines[0]), &resp); err != nil {
+		t.Fatalf("failed to parse response: %v", err)
+	}
+
+	if resp.Image != "base64image" {
+		t.Errorf("expected image 'base64image', got %q", resp.Image)
+	}
+
+	if !resp.Done {
+		t.Errorf("expected done=true")
+	}
+}
--- a/server/upload.go
+++ b/server/upload.go
@@ -21,12 +21,14 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/manifest"
+	"github.com/ollama/ollama/types/model"
 )

 var blobUploadManager sync.Map

 type blobUpload struct {
-	Layer
+	manifest.Layer

 	Total     int64
 	Completed atomic.Int64
@@ -51,7 +53,7 @@ const (
 )

 func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *registryOptions) error {
-	p, err := GetBlobsPath(b.Digest)
+	p, err := manifest.BlobsPath(b.Digest)
 	if err != nil {
 		return err
 	}
@@ -59,7 +61,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *reg
 	if b.From != "" {
 		values := requestURL.Query()
 		values.Add("mount", b.Digest)
-		values.Add("from", ParseModelPath(b.From).GetNamespaceRepository())
+		values.Add("from", model.ParseName(b.From).DisplayNamespaceModel())
 		requestURL.RawQuery = values.Encode()
 	}

@@ -128,7 +130,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
 	defer blobUploadManager.Delete(b.Digest)
 	ctx, b.CancelFunc = context.WithCancel(ctx)

-	p, err := GetBlobsPath(b.Digest)
+	p, err := manifest.BlobsPath(b.Digest)
 	if err != nil {
 		b.err = err
 		return
@@ -364,9 +366,9 @@ func (p *progressWriter) Rollback() {
 	p.written = 0
 }

-func uploadBlob(ctx context.Context, mp ModelPath, layer Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
-	requestURL := mp.BaseURL()
-	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest)
+func uploadBlob(ctx context.Context, n model.Name, layer manifest.Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
+	requestURL := n.BaseURL()
+	requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "blobs", layer.Digest)

 	resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, opts)
 	switch {
@@ -388,8 +390,8 @@ func uploadBlob(ctx context.Context, mp ModelPath, layer Layer, opts *registryOp
 	data, ok := blobUploadManager.LoadOrStore(layer.Digest, &blobUpload{Layer: layer})
 	upload := data.(*blobUpload)
 	if !ok {
-		requestURL := mp.BaseURL()
-		requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs/uploads/")
+		requestURL := n.BaseURL()
+		requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "blobs/uploads/")
 		if err := upload.Prepare(ctx, requestURL, opts); err != nil {
 			blobUploadManager.Delete(layer.Digest)
 			return err
--- a/types/model/name.go
+++ b/types/model/name.go
@@ -7,6 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
+	"net/url"
 	"path/filepath"
 	"strings"
 )
@@ -35,22 +36,25 @@ func Unqualified(n Name) error {
 const MissingPart = "!MISSING!"

 const (
-	defaultHost      = "registry.ollama.ai"
-	defaultNamespace = "library"
-	defaultTag       = "latest"
+	defaultHost           = "registry.ollama.ai"
+	defaultNamespace      = "library"
+	defaultTag            = "latest"
+	defaultProtocolScheme = "https"
 )

 // DefaultName returns a name with the default values for the host, namespace,
-// and tag parts. The model and digest parts are empty.
+// tag, and protocol scheme parts. The model and digest parts are empty.
 //
 //   - The default host is ("registry.ollama.ai")
 //   - The default namespace is ("library")
 //   - The default tag is ("latest")
+//   - The default protocol scheme is ("https")
 func DefaultName() Name {
 	return Name{
-		Host:      defaultHost,
-		Namespace: defaultNamespace,
-		Tag:       defaultTag,
+		Host:           defaultHost,
+		Namespace:      defaultNamespace,
+		Tag:            defaultTag,
+		ProtocolScheme: defaultProtocolScheme,
 	}
 }

@@ -87,10 +91,11 @@ func (k partKind) String() string {
 // It is not guaranteed to be valid. Use [Name.IsValid] to check if the name
 // is valid.
 type Name struct {
-	Host      string
-	Namespace string
-	Model     string
-	Tag       string
+	Host           string
+	Namespace      string
+	Model          string
+	Tag            string
+	ProtocolScheme string
 }

 // ParseName parses and assembles a Name from a name string. The
@@ -160,7 +165,9 @@ func ParseNameBare(s string) Name {
 	}

 	scheme, host, ok := strings.Cut(s, "://")
-	if !ok {
+	if ok {
+		n.ProtocolScheme = scheme
+	} else {
 		host = scheme
 	}
 	n.Host = host
@@ -189,12 +196,13 @@ func ParseNameFromFilepath(s string) (n Name) {
 	return n
 }

-// Merge merges the host, namespace, and tag parts of the two names,
+// Merge merges the host, namespace, tag, and protocol scheme parts of the two names,
 // preferring the non-empty parts of a.
 func Merge(a, b Name) Name {
 	a.Host = cmp.Or(a.Host, b.Host)
 	a.Namespace = cmp.Or(a.Namespace, b.Namespace)
 	a.Tag = cmp.Or(a.Tag, b.Tag)
+	a.ProtocolScheme = cmp.Or(a.ProtocolScheme, b.ProtocolScheme)
 	return a
 }

@@ -305,6 +313,23 @@ func (n Name) EqualFold(o Name) bool {
 		strings.EqualFold(n.Tag, o.Tag)
 }

+// BaseURL returns the base URL for the registry.
+func (n Name) BaseURL() *url.URL {
+	return &url.URL{
+		Scheme: n.ProtocolScheme,
+		Host:   n.Host,
+	}
+}
+
+// DisplayNamespaceModel returns the namespace and model joined by "/".
+func (n Name) DisplayNamespaceModel() string {
+	var b strings.Builder
+	b.WriteString(n.Namespace)
+	b.WriteByte('/')
+	b.WriteString(n.Model)
+	return b.String()
+}
+
 func isValidLen(kind partKind, s string) bool {
 	switch kind {
 	case kindHost:
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bruce MacDonald	e6f5a982d3	cmd: add usage cmd to chat to see token consumption Adding a `/usage` command to interactive cli chat sessions that displays the tokens used in the current sessions. This can be used alongside the models context window to understand when a context shift is going to happen.	2026-01-27 17:14:25 -08:00
Gabe Goodhart	7b62c41060	cmd/config: use envconfig.Host() for base API in launch config packages (#13937 )	2026-01-27 13:30:00 -08:00
Parth Sareen	26acab64b7	docs: add clawdbot (#13925 )	2026-01-26 18:32:54 -08:00
Gyungrai Wang	e0f03790b1	parsers/ministral: fix nested tool call parsing by counting brace nesting (#13905 ) * parsers/ministral: fix nested tool call parsing by counting brace nesting * fix lint error * parsers: refactor ministral parser The old one was very tied to expecting to see only one token at a time, which I don't like to assume (who knows what the future might hold wrt speculative decoding, etc). This new one follows a similar structure to qwen3-coder's parser, which incidentally makes it easier to test as well (since we can test the individual events that come out when given particular inputs). --------- Co-authored-by: Devon Rifkin <drifkin@drifkin.net>	2026-01-26 15:03:43 -08:00
Parth Sareen	3ab842b0f5	cmd: clawdbot config fixes (#13922 )	2026-01-26 14:34:29 -08:00
Parth Sareen	b8e8ef8929	cmd: ollama launch clawdbot (#13921 )	2026-01-26 13:40:59 -08:00
Parth Sareen	465d124183	cmd: fix opencode config (#13894 )	2026-01-24 18:42:56 -08:00
Parth Sareen	d310e56fa3	cmd: add fallback for claude (#13892 )	2026-01-24 18:26:01 -08:00
Jeffrey Morgan	a1ca428c90	glm4moelite: fix attention scale calculation (#13893 ) Use the original key dimension (qkNopeHeadDim + qkRopeHeadDim = 256) for the attention scale instead of the MLA absorbed dimension (kvLoraRank + qkRopeHeadDim = 576). MLA absorption is a mathematically equivalent reorganization of the attention computation - it should not change the effective attention scale. The scale should match training, which uses 1/sqrt(256). This improves tool calling and model looping issues.	2026-01-24 17:48:09 -08:00
Jeffrey Morgan	16750865d1	glm4moelite: quantize more tensors to q8_0 and avoid double BOS token (#13891 )	2026-01-24 16:33:54 -08:00
Jeffrey Morgan	f3b476c592	build: add -O3 optimization to CGO flags (#13877 ) CGO_CFLAGS and CGO_CXXFLAGS were being set without optimization flags, which overrides Go's default -O2 and results in unoptimized C++ code. This caused significant performance degradation in release builds compared to local `go build` which uses the default optimization. - build_darwin.sh: add -O3 to CGO_CFLAGS and CGO_CXXFLAGS exports - Dockerfile: preserve CGO_CFLAGS/CGO_CXXFLAGS from build args instead of overwriting them - app/README.md: update documentation to include -O3	2026-01-24 10:55:38 -08:00
Parth Sareen	5267d31d56	docs: ollama launch (#13852 )	2026-01-23 23:18:50 -08:00
Stillhart	b44f56319f	README: Update the "Ollama for ruby" to the most popular and maintained ruby gem. (#13855 ) * update README ruby link the ollama-ai ruby gem is vastly less popular and seems unmaintained https://rubygems.org/gems/ollama-ai the defacto standard with the most downloads in the ruby ecosystem is ruby_llm https://rubygems.org/gems/ruby_llm I would link to that to avoid complication and guarantee feature compatibility with ollama. * Update gem link ruby_llm from website to GitHub ollama links mostly to github, not project websites, hence link to ruby_llm github.	2026-01-24 01:24:52 -05:00
Jeffrey Morgan	0209c268bb	llama: fix CUDA MMA errors in release build (#13874 )	2026-01-23 20:10:04 -08:00
Jeffrey Morgan	912d984346	llama: fix fattn-tile shared memory overflow on sm_50/52 (#13872 ) Use nthreads=128 for ncols=4 configurations in flash attention tile kernel to reduce shared memory usage below 48KB limit on Maxwell architectures (sm_50/52). With nthreads=256 and ncols=4, np=2 which caused shared memory to exceed 48KB. With nthreads=128 and ncols=4, np=1 keeps shared memory under the limit.	2026-01-23 19:22:32 -08:00
Parth Sareen	aae6ecbaff	cmd: rename ollama config to ollama launch (#13871 )	2026-01-23 18:40:40 -08:00
Jeffrey Morgan	64737330a4	Re-apply "model: add MLA absorption for glm4moelite" with fix (#13870 ) The nvidia_fp32 config for (576, 512) head sizes had nbatch_fa=32, which caused zero-sized arrays when computing array dimensions: nbatch_fa / (np * warp_size) = 32 / (2 * 32) = 0 This resulted in CUDA compilation failures on CUDA 12 (Windows and Linux arm64): - "static assertion failed with nbatch_fa % (np*warp_size) != 0" - "the size of an array must be greater than zero" Fix by changing nbatch_fa from 32 to 64 for all (576, 512) configs in the nvidia_fp32 function, matching the nvidia_fp16 and AMD configs.	2026-01-23 18:40:28 -08:00
Jeffrey Morgan	2eda97f1c3	Revert "model: add MLA absorption for glm4moelite (#13810 )" (#13869 ) This reverts commit `1044b0419a`.	2026-01-23 17:14:15 -08:00
Jeffrey Morgan	66831dcf70	x/imagegen: fix image editing support (#13866 ) - Fix panic in ollama show for image gen models (safe type assertion) - Add vision capability for Flux2KleinPipeline models at create time - Flatten transparent PNG images onto white background for better results	2026-01-23 15:37:17 -08:00
Jeffrey Morgan	1044b0419a	model: add MLA absorption for glm4moelite (#13810 ) * model: add MLA absorption for glm4moelite Split the combined KV_B tensor into separate K_B and V_B tensors during conversion, enabling MLA (Multi-head Latent Attention) absorption which compresses the KV cache for improved efficiency. * ggml: enable MLA flash attention for GLM-4.7-flash Add support for gqa_ratio 4 in MLA flash attention kernels. GLM-4.7-flash uses head size 576 with gqa_ratio 4, which was previously only supported for gqa_ratio 16 (DeepSeek). Metal changes: - Enable head size 576 for flash attention - Increase simdgroups to 8 for large heads (>=512) - Add case 8 kernel dispatch for 8 simdgroups CUDA changes: - Add gqa_ratio 4 support for head 576/512 - Add tile configs for (576, 512, 4) and (576, 512, 8) - Add MMA config cases for ncols 4 - Add template instances for ncols2=4 * model: add compatibility validation for glm4moelite architecture	2026-01-23 14:47:42 -08:00
Parth Sareen	771d9280ec	cmd: ollama config fix droid model name configuration (#13856 )	2026-01-23 11:44:22 -08:00
Jeffrey Morgan	862bc0a3bf	x/imagegen: respect stream=false in /api/generate (#13853 ) When stream=false is set for image generation requests, return a single JSON response instead of streaming multiple ndjson progress updates.	2026-01-22 22:16:39 -08:00
Jeffrey Morgan	c01608b6a1	x/imagegen: add image edit capabilities (#13846 )	2026-01-22 20:35:08 -08:00
Parth Sareen	199c41e16e	cmd: `ollama config` command to help configure integrations to use Ollama (#13712 )	2026-01-22 20:17:11 -08:00
Jeffrey Morgan	3b3bf6c217	x/imagegen: replace memory estimation with actual weight size (#13848 ) Remove static VRAM estimation (EstimateVRAM, CheckMemoryRequirements) which wasn't helpful. Instead, report the actual tensor weight size from the manifest for ollama ps. - Remove memory estimation check from runner startup - Remove EstimateVRAM, CheckMemoryRequirements, modelVRAMEstimates - Add TotalTensorSize() to get actual weight size from manifest - Use weight size for Server.vramSize instead of estimates Note: This is better than showing 0 or inaccurate estimates, but the weight size is a drastic underestimation of actual memory usage since it doesn't account for activations, intermediate tensors, or MLX overhead. Future work should query real-time memory from MLX (e.g., MetalGetActiveMemory) for accurate reporting.	2026-01-22 18:32:41 -08:00
Parth Sareen	f52c21f457	fix: handle Enter key pressed during model loading (#13839 )	2026-01-22 18:32:02 -08:00
Jeffrey Morgan	b5d0f72f16	x/imagegen: remove qwen_image and qwen_image_edit models (#13827 ) Remove the Qwen image generation and image editing model packages to clean up the codebase. These models will be reintroduced later. - Delete x/imagegen/models/qwen_image/ (10 files) - Delete x/imagegen/models/qwen_image_edit/ (5 files) - Remove related CLI flags and imports from cmd/engine/main.go - Update comments in cache/step.go to remove Qwen-specific references	2026-01-21 13:37:08 -08:00
Patrick Devine	148a1be0a3	Clean up the manifest and modelpath (#13807 )	2026-01-21 11:46:17 -08:00
next-n	d6dd430abd	x/imagegen: respect OLLAMA_MODELS for manifests and blobs (#13797 )	2026-01-20 13:01:52 -08:00
Daniel Hiltgen	ae78112c50	test: add lfm2.5-thinking coverage (#13802 )	2026-01-20 12:57:02 -08:00
Jeffrey Morgan	01cf7445f3	model: add lfm2 architecture and LFM2.5-1.2B-Thinking support (#13792 ) Co-Authored-By: TommyBoiss <165361500+TommyBoiss@users.noreply.github.com>	2026-01-20 12:20:53 -08:00
Jeffrey Morgan	31085d5e53	fix: use api.GenerateRequest for image generation test (#13793 ) Remove non-existent x/imagegen/api import and use the standard api.GenerateRequest/GenerateResponse with the Image field instead.	2026-01-20 03:23:31 -08:00
Daniel Hiltgen	c42e9d244f	test: add image gen test case (#13698 ) * test: fix type regression in tools test. * test: add image gen integration test	2026-01-19 16:01:31 -08:00
Devon Rifkin	e98b5e8b4e	`/api/show`: default to empty model_info (#13785 ) For `/api/show`, a fully missing `model_info` field trips up various integrators (including a recent Android Studio integration). The primary source of missing info tends to come from models with a remote that are also missing other data. It seems better to me to return an empty `model_info` than making up some other fields within `model_info` (like saying the architecture is `remote` or something like that). So this does slightly change `/api/show`'s behavior that possibly someone is relying on, but it seems more important to ensure the field is always there (from a quick sampling integrations seem to be robust to missing fields _within_ it). Fixes: https://github.com/ollama/ollama/issues/13783	2026-01-19 15:26:17 -08:00