server: add tests and fix isHuggingFaceURL edge case

- Add comprehensive tests for isHuggingFaceURL and getNumDownloadParts - Fix bug where domains ending in huggingface.co (like nothuggingface.co) would incorrectly match as HuggingFace URLs - Improve code comments with more detailed documentation
server: reduce download concurrency for HuggingFace URLs
2026-01-23 06:53:03 -05:00 · 2026-01-18 16:45:17 -08:00 · 2026-01-18 16:38:49 -08:00
183 changed files with 10460 additions and 28527 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,7 +190,7 @@ if(MLX_ENGINE)
    install(TARGETS mlx mlxc
        RUNTIME_DEPENDENCIES
            DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
-            PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc nvrtc-builtins cudnn nccl openblas gfortran
+            PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc cudnn nccl
            PRE_EXCLUDE_REGEXES ".*"
        RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
        LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
--- a/18
+++ b/18
@@ -32,7 +32,7 @@ ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
 RUN yum install -y yum-utils epel-release \
-    && dnf install -y clang ccache git \
+    && dnf install -y clang ccache \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
 ENV CC=clang CXX=clang++
@@ -149,7 +149,6 @@ COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 COPY x/ml/backend/mlx x/ml/backend/mlx
 COPY go.mod go.sum .
 COPY MLX_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
@@ -157,6 +156,14 @@ RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
 COPY . .
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
 ARG CGO_CFLAGS
 ARG CGO_CXXFLAGS
 RUN mkdir -p dist/bin
 RUN --mount=type=cache,target=/root/.cache/go-build \
    go build -tags mlx -trimpath -buildmode=pie -o dist/bin/ollama-mlx .
 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
@@ -165,14 +172,12 @@ RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 COPY . .
 # Clone mlx-c headers for CGO (version from MLX_VERSION file)
 RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
-ENV CGO_CFLAGS="-I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
+ARG CGO_CFLAGS
 ARG CGO_CXXFLAGS
 RUN --mount=type=cache,target=/root/.cache/go-build \
-    go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .
+    go build -trimpath -buildmode=pie -o /bin/ollama .
 FROM --platform=linux/amd64 scratch AS amd64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
@@ -180,6 +185,7 @@ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
 COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
 COPY --from=vulkan  dist/lib/ollama  /lib/ollama/
 COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/
 COPY --from=mlx     /go/src/github.com/ollama/ollama/dist/bin/ /bin/
 FROM --platform=linux/arm64 scratch AS arm64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
--- a/1
+++ b/1
@@ -1 +0,0 @@
 v0.4.1
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ ollama run gemma3
 ## Model library
-Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library "ollama model library")
+Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library')
 Here are some example models that can be downloaded:
@@ -79,7 +79,7 @@ Here are some example models that can be downloaded:
 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`           |
 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored`   |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`               |
-| Granite-3.3        | 8B         | 4.9GB | `ollama run granite3.3`          |
+| Granite-3.3         | 8B         | 4.9GB | `ollama run granite3.3`          |
 > [!NOTE]
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -260,38 +260,6 @@ Finally, in a separate shell, run a model:
 ./ollama run llama3.2
 ```
 ## Building with MLX (experimental)
 First build the MLX libraries:
 ```shell
 cmake --preset MLX
 cmake --build --preset MLX --parallel
 cmake --install build --component MLX
 ```
 When building with the `-tags mlx` flag, the main `ollama` binary includes MLX support for experimental features like image generation:
 ```shell
 go build -tags mlx .
 ```
 Finally, start the server:
 ```
 ./ollama serve
 ```
 ### Building MLX with CUDA
 When building with CUDA, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with default architectures:
 ```shell
 cmake --preset 'MLX CUDA 13'
 cmake --build --preset 'MLX CUDA 13' --parallel
 cmake --install build --component MLX
 ```
 ## REST API
 Ollama has a REST API for running and managing models.
@@ -322,7 +290,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Web & Desktop
 - [Onyx](https://github.com/onyx-dot-app/onyx)
 - [Open WebUI](https://github.com/open-webui/open-webui)
 - [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat)
 - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
@@ -454,7 +421,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
 - [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
 - [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI)
- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
+- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.) 
 - [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
 - [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
 - [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.)
@@ -526,7 +493,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Database
 - [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
-  - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
+   - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
 - [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
 - [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
 - [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
@@ -669,7 +636,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.
 ### Observability
 - [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native integration to Ollama.
 - [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
 - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
@@ -678,5 +644,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.
 ### Security
 - [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server)
--- a/api/types.go
+++ b/api/types.go
@@ -127,20 +127,6 @@ type GenerateRequest struct {
 	// each with an associated log probability. Only applies when Logprobs is true.
 	// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
 	TopLogprobs int `json:"top_logprobs,omitempty"`
 	// Experimental: Image generation fields (may change or be removed)
 	// Width is the width of the generated image in pixels.
 	// Only used for image generation models.
 	Width int32 `json:"width,omitempty"`
 	// Height is the height of the generated image in pixels.
 	// Only used for image generation models.
 	Height int32 `json:"height,omitempty"`
 	// Steps is the number of diffusion steps for image generation.
 	// Only used for image generation models.
 	Steps int32 `json:"steps,omitempty"`
 }
 // ChatRequest describes a request sent by [Client.Chat].
@@ -749,7 +735,7 @@ type ShowResponse struct {
 	Messages      []Message          `json:"messages,omitempty"`
 	RemoteModel   string             `json:"remote_model,omitempty"`
 	RemoteHost    string             `json:"remote_host,omitempty"`
-	ModelInfo     map[string]any     `json:"model_info"`
+	ModelInfo     map[string]any     `json:"model_info,omitempty"`
 	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
@@ -874,20 +860,6 @@ type GenerateResponse struct {
 	// Logprobs contains log probability information for the generated tokens,
 	// if requested via the Logprobs parameter.
 	Logprobs []Logprob `json:"logprobs,omitempty"`
 	// Experimental: Image generation fields (may change or be removed)
 	// Image contains a base64-encoded generated image.
 	// Only present for image generation models.
 	Image string `json:"image,omitempty"`
 	// Completed is the number of completed steps in image generation.
 	// Only present for image generation models during streaming.
 	Completed int64 `json:"completed,omitempty"`
 	// Total is the total number of steps for image generation.
 	// Only present for image generation models during streaming.
 	Total int64 `json:"total,omitempty"`
 }
 // ModelDetails provides details about a model.
--- a/app/cmd/app/app_darwin.m
+++ b/app/cmd/app/app_darwin.m
@@ -14,7 +14,6 @@ extern NSString *SystemWidePath;
@interface AppDelegate () <NSWindowDelegate, WKNavigationDelegate, WKUIDelegate>
@property(strong, nonatomic) NSStatusItem *statusItem;
@property(assign, nonatomic) BOOL updateAvailable;
@property(assign, nonatomic) BOOL systemShutdownInProgress;
@end
@implementation AppDelegate
@@ -41,13 +40,6 @@ bool firstTimeRun,startHidden; // Set in run before initialization
 }
 - (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
    // Register for system shutdown/restart notification so we can allow termination
    [[[NSWorkspace sharedWorkspace] notificationCenter]
        addObserver:self
           selector:@selector(systemWillPowerOff:)
               name:NSWorkspaceWillPowerOffNotification
             object:nil];
    // if we're in development mode, set the app icon
    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
    if (![bundlePath hasSuffix:@".app"]) {
@@ -286,18 +278,7 @@ bool firstTimeRun,startHidden; // Set in run before initialization
    [NSApp activateIgnoringOtherApps:YES];
 }
 - (void)systemWillPowerOff:(NSNotification *)notification {
    // Set flag so applicationShouldTerminate: knows to allow termination.
    // The system will call applicationShouldTerminate: after posting this notification.
    self.systemShutdownInProgress = YES;
 }
 - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender {
    // Allow termination if the system is shutting down or restarting
    if (self.systemShutdownInProgress) {
        return NSTerminateNow;
    }
    // Otherwise just hide the app (for Cmd+Q, close button, etc.)
    [NSApp hide:nil];
    [NSApp setActivationPolicy:NSApplicationActivationPolicyAccessory];
    return NSTerminateCancel;
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -35,7 +35,6 @@ import (
 	"golang.org/x/term"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/parser"
@@ -47,9 +46,8 @@ import (
 	"github.com/ollama/ollama/types/syncmap"
 	"github.com/ollama/ollama/version"
 	xcmd "github.com/ollama/ollama/x/cmd"
 	"github.com/ollama/ollama/x/create"
 	xcreateclient "github.com/ollama/ollama/x/create/client"
 	"github.com/ollama/ollama/x/imagegen"
 	imagegenclient "github.com/ollama/ollama/x/imagegen/client"
 )
 const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"
@@ -95,87 +93,15 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()
 	// Validate model name early to fail fast
 	modelName := args[0]
 	name := model.ParseName(modelName)
 	if !name.IsValid() {
 		return fmt.Errorf("invalid model name: %s", modelName)
 	}
 	// Check for --experimental flag for safetensors model creation
 	experimental, _ := cmd.Flags().GetBool("experimental")
 	if experimental {
 		// Get Modelfile content - either from -f flag or default to "FROM ."
 		var reader io.Reader
 		filename, err := getModelfileName(cmd)
 		if os.IsNotExist(err) || filename == "" {
 			// No Modelfile specified or found - use default
 			reader = strings.NewReader("FROM .\n")
 		} else if err != nil {
 			return err
 		} else {
 			f, err := os.Open(filename)
 			if err != nil {
 				return err
 			}
 			defer f.Close()
 			reader = f
 		}
 		// Parse the Modelfile
 		modelfile, err := parser.ParseFile(reader)
 		if err != nil {
 			return fmt.Errorf("failed to parse Modelfile: %w", err)
 		}
 		// Extract FROM path and configuration
 		var modelDir string
 		mfConfig := &xcreateclient.ModelfileConfig{}
 		for _, cmd := range modelfile.Commands {
 			switch cmd.Name {
 			case "model":
 				modelDir = cmd.Args
 			case "template":
 				mfConfig.Template = cmd.Args
 			case "system":
 				mfConfig.System = cmd.Args
 			case "license":
 				mfConfig.License = cmd.Args
 			}
 		}
 		if modelDir == "" {
 			modelDir = "."
 		}
 		// Resolve relative paths based on Modelfile location
 		if !filepath.IsAbs(modelDir) && filename != "" {
 			modelDir = filepath.Join(filepath.Dir(filename), modelDir)
 		}
 		quantize, _ := cmd.Flags().GetString("quantize")
 		return xcreateclient.CreateModel(xcreateclient.CreateOptions{
 			ModelName: modelName,
 			ModelDir:  modelDir,
 			Quantize:  quantize,
 			Modelfile: mfConfig,
 		}, p)
 	}
 	var reader io.Reader
 	filename, err := getModelfileName(cmd)
 	if os.IsNotExist(err) {
 		if filename == "" {
 			// No Modelfile found - check if current directory is an image gen model
-			if create.IsTensorModelDir(".") {
+			if imagegen.IsTensorModelDir(".") {
 				quantize, _ := cmd.Flags().GetString("quantize")
-				return xcreateclient.CreateModel(xcreateclient.CreateOptions{
+				return imagegenclient.CreateModel(args[0], ".", quantize, p)
 					ModelName: modelName,
 					ModelDir:  ".",
 					Quantize:  quantize,
 				}, p)
 			}
 			reader = strings.NewReader("FROM .\n")
 		} else {
@@ -208,7 +134,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	}
 	spinner.Stop()
-	req.Model = modelName
+	req.Model = args[0]
 	quantize, _ := cmd.Flags().GetString("quantize")
 	if quantize != "" {
 		req.Quantize = quantize
@@ -601,7 +527,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	// Check if this is an image generation model
-	if slices.Contains(info.Capabilities, model.CapabilityImage) {
+	if slices.Contains(info.Capabilities, model.CapabilityImageGeneration) {
 		if opts.Prompt == "" && !interactive {
 			return errors.New("image generation models require a prompt. Usage: ollama run " + name + " \"your prompt here\"")
 		}
@@ -900,11 +826,11 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 	for _, arg := range args {
 		// Unload the model if it's running before deletion
 		if err := loadOrUnloadModel(cmd, &runOptions{
-			Model:     arg,
+			Model:     args[0],
 			KeepAlive: &api.Duration{Duration: 0},
 		}); err != nil {
 			if !strings.Contains(strings.ToLower(err.Error()), "not found") {
-				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", arg)
+				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", args[0])
 			}
 		}
@@ -1816,22 +1742,15 @@ func NewCLI() *cobra.Command {
 	rootCmd.Flags().BoolP("version", "v", false, "Show version information")
 	createCmd := &cobra.Command{
-		Use:   "create MODEL",
+		Use:     "create MODEL",
-		Short: "Create a model",
+		Short:   "Create a model",
-		Args:  cobra.ExactArgs(1),
+		Args:    cobra.ExactArgs(1),
-		PreRunE: func(cmd *cobra.Command, args []string) error {
+		PreRunE: checkServerHeartbeat,
-			// Skip server check for experimental mode (writes directly to disk)
+		RunE:    CreateHandler,
 			if experimental, _ := cmd.Flags().GetBool("experimental"); experimental {
 				return nil
 			}
 			return checkServerHeartbeat(cmd, args)
 		},
 		RunE: CreateHandler,
 	}
 	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
 	createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation")
 	showCmd := &cobra.Command{
 		Use:     "show MODEL",
@@ -1986,7 +1905,6 @@ func NewCLI() *cobra.Command {
 	} {
 		switch cmd {
 		case runCmd:
 			imagegen.AppendFlagsDocs(cmd)
 			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
 		case serveCmd:
 			appendEnvDocs(cmd, []envconfig.EnvVar{
@@ -2027,7 +1945,6 @@ func NewCLI() *cobra.Command {
 		copyCmd,
 		deleteCmd,
 		runnerCmd,
 		config.ConfigCmd(checkServerHeartbeat),
 	)
 	return rootCmd
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -1555,7 +1555,7 @@ func TestShowInfoImageGen(t *testing.T) {
 			ParameterSize:     "10.3B",
 			QuantizationLevel: "FP8",
 		},
-		Capabilities: []model.Capability{model.CapabilityImage},
+		Capabilities: []model.Capability{model.CapabilityImageGeneration},
 		Requires:     "0.14.0",
 	}, false, &b)
 	if err != nil {
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -1,36 +0,0 @@
 package config
 import (
 	"fmt"
 	"os"
 	"os/exec"
 )
 // Claude implements Runner for Claude Code integration
 type Claude struct{}
 func (c *Claude) String() string { return "Claude Code" }
 func (c *Claude) args(model string) []string {
 	if model != "" {
 		return []string{"--model", model}
 	}
 	return nil
 }
 func (c *Claude) Run(model string) error {
 	if _, err := exec.LookPath("claude"); err != nil {
 		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
 	}
 	cmd := exec.Command("claude", c.args(model)...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = append(os.Environ(),
 		"ANTHROPIC_BASE_URL=http://localhost:11434",
 		"ANTHROPIC_API_KEY=",
 		"ANTHROPIC_AUTH_TOKEN=ollama",
 	)
 	return cmd.Run()
 }
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -1,42 +0,0 @@
 package config
 import (
 	"slices"
 	"testing"
 )
 func TestClaudeIntegration(t *testing.T) {
 	c := &Claude{}
 	t.Run("String", func(t *testing.T) {
 		if got := c.String(); got != "Claude Code" {
 			t.Errorf("String() = %q, want %q", got, "Claude Code")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = c
 	})
 }
 func TestClaudeArgs(t *testing.T) {
 	c := &Claude{}
 	tests := []struct {
 		name  string
 		model string
 		want  []string
 	}{
 		{"with model", "llama3.2", []string{"--model", "llama3.2"}},
 		{"empty model", "", nil},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := c.args(tt.model)
 			if !slices.Equal(got, tt.want) {
 				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
 			}
 		})
 	}
 }
--- a/cmd/config/codex.go
+++ b/cmd/config/codex.go
@@ -1,61 +0,0 @@
 package config
 import (
 	"fmt"
 	"os"
 	"os/exec"
 	"strings"
 	"golang.org/x/mod/semver"
 )
 // Codex implements Runner for Codex integration
 type Codex struct{}
 func (c *Codex) String() string { return "Codex" }
 func (c *Codex) args(model string) []string {
 	args := []string{"--oss"}
 	if model != "" {
 		args = append(args, "-m", model)
 	}
 	return args
 }
 func (c *Codex) Run(model string) error {
 	if err := checkCodexVersion(); err != nil {
 		return err
 	}
 	cmd := exec.Command("codex", c.args(model)...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	return cmd.Run()
 }
 func checkCodexVersion() error {
 	if _, err := exec.LookPath("codex"); err != nil {
 		return fmt.Errorf("codex is not installed, install with: npm install -g @openai/codex")
 	}
 	out, err := exec.Command("codex", "--version").Output()
 	if err != nil {
 		return fmt.Errorf("failed to get codex version: %w", err)
 	}
 	// Parse output like "codex-cli 0.87.0"
 	fields := strings.Fields(strings.TrimSpace(string(out)))
 	if len(fields) < 2 {
 		return fmt.Errorf("unexpected codex version output: %s", string(out))
 	}
 	version := "v" + fields[len(fields)-1]
 	minVersion := "v0.81.0"
 	if semver.Compare(version, minVersion) < 0 {
 		return fmt.Errorf("codex version %s is too old, minimum required is %s, update with: npm update -g @openai/codex", fields[len(fields)-1], "0.81.0")
 	}
 	return nil
 }
--- a/cmd/config/codex_test.go
+++ b/cmd/config/codex_test.go
@@ -1,28 +0,0 @@
 package config
 import (
 	"slices"
 	"testing"
 )
 func TestCodexArgs(t *testing.T) {
 	c := &Codex{}
 	tests := []struct {
 		name  string
 		model string
 		want  []string
 	}{
 		{"with model", "llama3.2", []string{"--oss", "-m", "llama3.2"}},
 		{"empty model", "", []string{"--oss"}},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := c.args(tt.model)
 			if !slices.Equal(got, tt.want) {
 				t.Errorf("args(%q) = %v, want %v", tt.model, got, tt.want)
 			}
 		})
 	}
 }
--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -1,115 +0,0 @@
 // Package config provides integration configuration for external coding tools
 // (Claude Code, Codex, Droid, OpenCode) to use Ollama models.
 package config
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 )
 type integration struct {
 	Models []string `json:"models"`
 }
 type config struct {
 	Integrations map[string]*integration `json:"integrations"`
 }
 func configPath() (string, error) {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(home, ".ollama", "config", "config.json"), nil
 }
 func load() (*config, error) {
 	path, err := configPath()
 	if err != nil {
 		return nil, err
 	}
 	data, err := os.ReadFile(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return &config{Integrations: make(map[string]*integration)}, nil
 		}
 		return nil, err
 	}
 	var cfg config
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w, at: %s", err, path)
 	}
 	if cfg.Integrations == nil {
 		cfg.Integrations = make(map[string]*integration)
 	}
 	return &cfg, nil
 }
 func save(cfg *config) error {
 	path, err := configPath()
 	if err != nil {
 		return err
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
 		return err
 	}
 	data, err := json.MarshalIndent(cfg, "", "  ")
 	if err != nil {
 		return err
 	}
 	return writeWithBackup(path, data)
 }
 func saveIntegration(appName string, models []string) error {
 	if appName == "" {
 		return errors.New("app name cannot be empty")
 	}
 	cfg, err := load()
 	if err != nil {
 		return err
 	}
 	cfg.Integrations[strings.ToLower(appName)] = &integration{
 		Models: models,
 	}
 	return save(cfg)
 }
 func loadIntegration(appName string) (*integration, error) {
 	cfg, err := load()
 	if err != nil {
 		return nil, err
 	}
 	ic, ok := cfg.Integrations[strings.ToLower(appName)]
 	if !ok {
 		return nil, os.ErrNotExist
 	}
 	return ic, nil
 }
 func listIntegrations() ([]integration, error) {
 	cfg, err := load()
 	if err != nil {
 		return nil, err
 	}
 	result := make([]integration, 0, len(cfg.Integrations))
 	for _, ic := range cfg.Integrations {
 		result = append(result, *ic)
 	}
 	return result, nil
 }
--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -1,373 +0,0 @@
 package config
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 // setTestHome sets both HOME (Unix) and USERPROFILE (Windows) for cross-platform tests
 func setTestHome(t *testing.T, dir string) {
 	t.Setenv("HOME", dir)
 	t.Setenv("USERPROFILE", dir)
 }
 // editorPaths is a test helper that safely calls Paths if the runner implements Editor
 func editorPaths(r Runner) []string {
 	if editor, ok := r.(Editor); ok {
 		return editor.Paths()
 	}
 	return nil
 }
 func TestIntegrationConfig(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("save and load round-trip", func(t *testing.T) {
 		models := []string{"llama3.2", "mistral", "qwen2.5"}
 		if err := saveIntegration("claude", models); err != nil {
 			t.Fatal(err)
 		}
 		config, err := loadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(config.Models) != len(models) {
 			t.Errorf("expected %d models, got %d", len(models), len(config.Models))
 		}
 		for i, m := range models {
 			if config.Models[i] != m {
 				t.Errorf("model %d: expected %s, got %s", i, m, config.Models[i])
 			}
 		}
 	})
 	t.Run("defaultModel returns first model", func(t *testing.T) {
 		saveIntegration("codex", []string{"model-a", "model-b"})
 		config, _ := loadIntegration("codex")
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "model-a" {
 			t.Errorf("expected model-a, got %s", defaultModel)
 		}
 	})
 	t.Run("defaultModel returns empty for no models", func(t *testing.T) {
 		config := &integration{Models: []string{}}
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "" {
 			t.Errorf("expected empty string, got %s", defaultModel)
 		}
 	})
 	t.Run("app name is case-insensitive", func(t *testing.T) {
 		saveIntegration("Claude", []string{"model-x"})
 		config, err := loadIntegration("claude")
 		if err != nil {
 			t.Fatal(err)
 		}
 		defaultModel := ""
 		if len(config.Models) > 0 {
 			defaultModel = config.Models[0]
 		}
 		if defaultModel != "model-x" {
 			t.Errorf("expected model-x, got %s", defaultModel)
 		}
 	})
 	t.Run("multiple integrations in single file", func(t *testing.T) {
 		saveIntegration("app1", []string{"model-1"})
 		saveIntegration("app2", []string{"model-2"})
 		config1, _ := loadIntegration("app1")
 		config2, _ := loadIntegration("app2")
 		defaultModel1 := ""
 		if len(config1.Models) > 0 {
 			defaultModel1 = config1.Models[0]
 		}
 		defaultModel2 := ""
 		if len(config2.Models) > 0 {
 			defaultModel2 = config2.Models[0]
 		}
 		if defaultModel1 != "model-1" {
 			t.Errorf("expected model-1, got %s", defaultModel1)
 		}
 		if defaultModel2 != "model-2" {
 			t.Errorf("expected model-2, got %s", defaultModel2)
 		}
 	})
 }
 func TestListIntegrations(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns empty when no integrations", func(t *testing.T) {
 		configs, err := listIntegrations()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(configs) != 0 {
 			t.Errorf("expected 0 integrations, got %d", len(configs))
 		}
 	})
 	t.Run("returns all saved integrations", func(t *testing.T) {
 		saveIntegration("claude", []string{"model-1"})
 		saveIntegration("droid", []string{"model-2"})
 		configs, err := listIntegrations()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(configs) != 2 {
 			t.Errorf("expected 2 integrations, got %d", len(configs))
 		}
 	})
 }
 func TestEditorPaths(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns empty for claude (no Editor)", func(t *testing.T) {
 		r := integrations["claude"]
 		paths := editorPaths(r)
 		if len(paths) != 0 {
 			t.Errorf("expected no paths for claude, got %v", paths)
 		}
 	})
 	t.Run("returns empty for codex (no Editor)", func(t *testing.T) {
 		r := integrations["codex"]
 		paths := editorPaths(r)
 		if len(paths) != 0 {
 			t.Errorf("expected no paths for codex, got %v", paths)
 		}
 	})
 	t.Run("returns empty for droid when no config exists", func(t *testing.T) {
 		r := integrations["droid"]
 		paths := editorPaths(r)
 		if len(paths) != 0 {
 			t.Errorf("expected no paths, got %v", paths)
 		}
 	})
 	t.Run("returns path for droid when config exists", func(t *testing.T) {
 		settingsDir, _ := os.UserHomeDir()
 		settingsDir = filepath.Join(settingsDir, ".factory")
 		os.MkdirAll(settingsDir, 0o755)
 		os.WriteFile(filepath.Join(settingsDir, "settings.json"), []byte(`{}`), 0o644)
 		r := integrations["droid"]
 		paths := editorPaths(r)
 		if len(paths) != 1 {
 			t.Errorf("expected 1 path, got %d", len(paths))
 		}
 	})
 	t.Run("returns paths for opencode when configs exist", func(t *testing.T) {
 		home, _ := os.UserHomeDir()
 		configDir := filepath.Join(home, ".config", "opencode")
 		stateDir := filepath.Join(home, ".local", "state", "opencode")
 		os.MkdirAll(configDir, 0o755)
 		os.MkdirAll(stateDir, 0o755)
 		os.WriteFile(filepath.Join(configDir, "opencode.json"), []byte(`{}`), 0o644)
 		os.WriteFile(filepath.Join(stateDir, "model.json"), []byte(`{}`), 0o644)
 		r := integrations["opencode"]
 		paths := editorPaths(r)
 		if len(paths) != 2 {
 			t.Errorf("expected 2 paths, got %d: %v", len(paths), paths)
 		}
 	})
 }
 func TestLoadIntegration_CorruptedJSON(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Create corrupted config.json file
 	dir := filepath.Join(tmpDir, ".ollama", "config")
 	os.MkdirAll(dir, 0o755)
 	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{corrupted json`), 0o644)
 	// Corrupted file is treated as empty, so loadIntegration returns not found
 	_, err := loadIntegration("test")
 	if err == nil {
 		t.Error("expected error for nonexistent integration in corrupted file")
 	}
 }
 func TestSaveIntegration_NilModels(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	if err := saveIntegration("test", nil); err != nil {
 		t.Fatalf("saveIntegration with nil models failed: %v", err)
 	}
 	config, err := loadIntegration("test")
 	if err != nil {
 		t.Fatalf("loadIntegration failed: %v", err)
 	}
 	if config.Models == nil {
 		// nil is acceptable
 	} else if len(config.Models) != 0 {
 		t.Errorf("expected empty or nil models, got %v", config.Models)
 	}
 }
 func TestSaveIntegration_EmptyAppName(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	err := saveIntegration("", []string{"model"})
 	if err == nil {
 		t.Error("expected error for empty app name, got nil")
 	}
 	if err != nil && !strings.Contains(err.Error(), "app name cannot be empty") {
 		t.Errorf("expected 'app name cannot be empty' error, got: %v", err)
 	}
 }
 func TestLoadIntegration_NonexistentIntegration(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	_, err := loadIntegration("nonexistent")
 	if err == nil {
 		t.Error("expected error for nonexistent integration, got nil")
 	}
 	if !os.IsNotExist(err) {
 		t.Logf("error type is os.ErrNotExist as expected: %v", err)
 	}
 }
 func TestConfigPath(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	path, err := configPath()
 	if err != nil {
 		t.Fatal(err)
 	}
 	expected := filepath.Join(tmpDir, ".ollama", "config", "config.json")
 	if path != expected {
 		t.Errorf("expected %s, got %s", expected, path)
 	}
 }
 func TestLoad(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("returns empty config when file does not exist", func(t *testing.T) {
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if cfg == nil {
 			t.Fatal("expected non-nil config")
 		}
 		if cfg.Integrations == nil {
 			t.Error("expected non-nil Integrations map")
 		}
 		if len(cfg.Integrations) != 0 {
 			t.Errorf("expected empty Integrations, got %d", len(cfg.Integrations))
 		}
 	})
 	t.Run("loads existing config", func(t *testing.T) {
 		path, _ := configPath()
 		os.MkdirAll(filepath.Dir(path), 0o755)
 		os.WriteFile(path, []byte(`{"integrations":{"test":{"models":["model-a"]}}}`), 0o644)
 		cfg, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if cfg.Integrations["test"] == nil {
 			t.Fatal("expected test integration")
 		}
 		if len(cfg.Integrations["test"].Models) != 1 {
 			t.Errorf("expected 1 model, got %d", len(cfg.Integrations["test"].Models))
 		}
 	})
 	t.Run("returns error for corrupted JSON", func(t *testing.T) {
 		path, _ := configPath()
 		os.MkdirAll(filepath.Dir(path), 0o755)
 		os.WriteFile(path, []byte(`{corrupted`), 0o644)
 		_, err := load()
 		if err == nil {
 			t.Error("expected error for corrupted JSON")
 		}
 	})
 }
 func TestSave(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Run("creates config file", func(t *testing.T) {
 		cfg := &config{
 			Integrations: map[string]*integration{
 				"test": {Models: []string{"model-a", "model-b"}},
 			},
 		}
 		if err := save(cfg); err != nil {
 			t.Fatal(err)
 		}
 		path, _ := configPath()
 		if _, err := os.Stat(path); os.IsNotExist(err) {
 			t.Error("config file was not created")
 		}
 	})
 	t.Run("round-trip preserves data", func(t *testing.T) {
 		cfg := &config{
 			Integrations: map[string]*integration{
 				"claude": {Models: []string{"llama3.2", "mistral"}},
 				"codex":  {Models: []string{"qwen2.5"}},
 			},
 		}
 		if err := save(cfg); err != nil {
 			t.Fatal(err)
 		}
 		loaded, err := load()
 		if err != nil {
 			t.Fatal(err)
 		}
 		if len(loaded.Integrations) != 2 {
 			t.Errorf("expected 2 integrations, got %d", len(loaded.Integrations))
 		}
 		if loaded.Integrations["claude"] == nil {
 			t.Error("missing claude integration")
 		}
 		if len(loaded.Integrations["claude"].Models) != 2 {
 			t.Errorf("expected 2 models for claude, got %d", len(loaded.Integrations["claude"].Models))
 		}
 	})
 }
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -1,164 +0,0 @@
 package config
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"slices"
 )
 // Droid implements Runner and Editor for Droid integration
 type Droid struct{}
 // droidModelEntry represents a custom model entry in Droid's settings.json
 type droidModelEntry struct {
 	Model           string `json:"model"`
 	DisplayName     string `json:"displayName"`
 	BaseURL         string `json:"baseUrl"`
 	APIKey          string `json:"apiKey"`
 	Provider        string `json:"provider"`
 	MaxOutputTokens int    `json:"maxOutputTokens"`
 	SupportsImages  bool   `json:"supportsImages"`
 	ID              string `json:"id"`
 	Index           int    `json:"index"`
 }
 func (d *Droid) String() string { return "Droid" }
 func (d *Droid) Run(model string) error {
 	if _, err := exec.LookPath("droid"); err != nil {
 		return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
 	}
 	// Call Edit() to ensure config is up-to-date before launch
 	models := []string{model}
 	if config, err := loadIntegration("droid"); err == nil && len(config.Models) > 0 {
 		models = config.Models
 	}
 	if err := d.Edit(models); err != nil {
 		return fmt.Errorf("setup failed: %w", err)
 	}
 	cmd := exec.Command("droid")
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	return cmd.Run()
 }
 func (d *Droid) Paths() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	p := filepath.Join(home, ".factory", "settings.json")
 	if _, err := os.Stat(p); err == nil {
 		return []string{p}
 	}
 	return nil
 }
 func (d *Droid) Edit(models []string) error {
 	if len(models) == 0 {
 		return nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return err
 	}
 	settingsPath := filepath.Join(home, ".factory", "settings.json")
 	if err := os.MkdirAll(filepath.Dir(settingsPath), 0o755); err != nil {
 		return err
 	}
 	settings := make(map[string]any)
 	if data, err := os.ReadFile(settingsPath); err == nil {
 		if err := json.Unmarshal(data, &settings); err != nil {
 			return fmt.Errorf("failed to parse settings file: %w, at: %s", err, settingsPath)
 		}
 	}
 	customModels, _ := settings["customModels"].([]any)
 	// Keep only non-Ollama models (we'll rebuild Ollama models fresh)
 	nonOllamaModels := slices.DeleteFunc(slices.Clone(customModels), func(m any) bool {
 		entry, ok := m.(droidModelEntry)
 		if !ok {
 			return false
 		}
 		return entry.APIKey != "ollama"
 	})
 	// Build new Ollama model entries with sequential indices (0, 1, 2, ...)
 	var ollamaModels []any
 	var defaultModelID string
 	for i, model := range models {
 		modelID := fmt.Sprintf("custom:%s-%d", model, i)
 		ollamaModels = append(ollamaModels, droidModelEntry{
 			Model:           model,
 			DisplayName:     model,
 			BaseURL:         "http://localhost:11434/v1",
 			APIKey:          "ollama",
 			Provider:        "generic-chat-completion-api",
 			MaxOutputTokens: 64000,
 			SupportsImages:  false,
 			ID:              modelID,
 			Index:           i,
 		})
 		if i == 0 {
 			defaultModelID = modelID
 		}
 	}
 	settings["customModels"] = append(ollamaModels, nonOllamaModels...)
 	sessionSettings, ok := settings["sessionDefaultSettings"].(map[string]any)
 	if !ok {
 		sessionSettings = make(map[string]any)
 	}
 	sessionSettings["model"] = defaultModelID
 	if effort, ok := sessionSettings["reasoningEffort"].(string); !ok || !isValidReasoningEffort(effort) {
 		sessionSettings["reasoningEffort"] = "none"
 	}
 	settings["sessionDefaultSettings"] = sessionSettings
 	data, err := json.MarshalIndent(settings, "", "  ")
 	if err != nil {
 		return err
 	}
 	return writeWithBackup(settingsPath, data)
 }
 func (d *Droid) Models() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	settings, err := readJSONFile(filepath.Join(home, ".factory", "settings.json"))
 	if err != nil {
 		return nil
 	}
 	customModels, _ := settings["customModels"].([]droidModelEntry)
 	var result []string
 	for _, m := range customModels {
 		if m.APIKey != "ollama" {
 			continue
 		}
 		result = append(result, m.Model)
 	}
 	return result
 }
 var validReasoningEfforts = []string{"high", "medium", "low", "none"}
 func isValidReasoningEffort(effort string) bool {
 	return slices.Contains(validReasoningEfforts, effort)
 }
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
@@ -1,454 +0,0 @@
 package config
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestDroidIntegration(t *testing.T) {
 	d := &Droid{}
 	t.Run("String", func(t *testing.T) {
 		if got := d.String(); got != "Droid" {
 			t.Errorf("String() = %q, want %q", got, "Droid")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = d
 	})
 	t.Run("implements Editor", func(t *testing.T) {
 		var _ Editor = d
 	})
 }
 func TestDroidEdit(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	cleanup := func() {
 		os.RemoveAll(settingsDir)
 	}
 	readSettings := func() map[string]any {
 		data, _ := os.ReadFile(settingsPath)
 		var settings map[string]any
 		json.Unmarshal(data, &settings)
 		return settings
 	}
 	getCustomModels := func(settings map[string]any) []map[string]any {
 		models, ok := settings["customModels"].([]any)
 		if !ok {
 			return nil
 		}
 		var result []map[string]any
 		for _, m := range models {
 			if entry, ok := m.(map[string]any); ok {
 				result = append(result, entry)
 			}
 		}
 		return result
 	}
 	t.Run("fresh install creates models with sequential indices", func(t *testing.T) {
 		cleanup()
 		if err := d.Edit([]string{"model-a", "model-b"}); err != nil {
 			t.Fatal(err)
 		}
 		settings := readSettings()
 		models := getCustomModels(settings)
 		if len(models) != 2 {
 			t.Fatalf("expected 2 models, got %d", len(models))
 		}
 		// Check first model
 		if models[0]["model"] != "model-a" {
 			t.Errorf("expected model-a, got %s", models[0]["model"])
 		}
 		if models[0]["id"] != "custom:model-a-[Ollama]-0" {
 			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", models[0]["id"])
 		}
 		if models[0]["index"] != float64(0) {
 			t.Errorf("expected index 0, got %v", models[0]["index"])
 		}
 		// Check second model
 		if models[1]["model"] != "model-b" {
 			t.Errorf("expected model-b, got %s", models[1]["model"])
 		}
 		if models[1]["id"] != "custom:model-b-[Ollama]-1" {
 			t.Errorf("expected custom:model-b-[Ollama]-1, got %s", models[1]["id"])
 		}
 		if models[1]["index"] != float64(1) {
 			t.Errorf("expected index 1, got %v", models[1]["index"])
 		}
 	})
 	t.Run("sets sessionDefaultSettings.model to first model ID", func(t *testing.T) {
 		cleanup()
 		if err := d.Edit([]string{"model-a", "model-b"}); err != nil {
 			t.Fatal(err)
 		}
 		settings := readSettings()
 		session, ok := settings["sessionDefaultSettings"].(map[string]any)
 		if !ok {
 			t.Fatal("sessionDefaultSettings not found")
 		}
 		if session["model"] != "custom:model-a-[Ollama]-0" {
 			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", session["model"])
 		}
 	})
 	t.Run("re-indexes when models removed", func(t *testing.T) {
 		cleanup()
 		// Add three models
 		d.Edit([]string{"model-a", "model-b", "model-c"})
 		// Remove middle model
 		d.Edit([]string{"model-a", "model-c"})
 		settings := readSettings()
 		models := getCustomModels(settings)
 		if len(models) != 2 {
 			t.Fatalf("expected 2 models, got %d", len(models))
 		}
 		// Check indices are sequential 0, 1
 		if models[0]["index"] != float64(0) {
 			t.Errorf("expected index 0, got %v", models[0]["index"])
 		}
 		if models[1]["index"] != float64(1) {
 			t.Errorf("expected index 1, got %v", models[1]["index"])
 		}
 		// Check IDs match new indices
 		if models[0]["id"] != "custom:model-a-[Ollama]-0" {
 			t.Errorf("expected custom:model-a-[Ollama]-0, got %s", models[0]["id"])
 		}
 		if models[1]["id"] != "custom:model-c-[Ollama]-1" {
 			t.Errorf("expected custom:model-c-[Ollama]-1, got %s", models[1]["id"])
 		}
 	})
 	t.Run("preserves non-Ollama custom models", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(settingsDir, 0o755)
 		// Pre-existing non-Ollama model
 		os.WriteFile(settingsPath, []byte(`{
 			"customModels": [
 				{"model": "gpt-4", "displayName": "GPT-4", "provider": "openai"}
 			]
 		}`), 0o644)
 		d.Edit([]string{"model-a"})
 		settings := readSettings()
 		models := getCustomModels(settings)
 		if len(models) != 2 {
 			t.Fatalf("expected 2 models (1 Ollama + 1 non-Ollama), got %d", len(models))
 		}
 		// Ollama model should be first
 		if models[0]["model"] != "model-a" {
 			t.Errorf("expected Ollama model first, got %s", models[0]["model"])
 		}
 		// Non-Ollama model should be preserved at end
 		if models[1]["model"] != "gpt-4" {
 			t.Errorf("expected gpt-4 preserved, got %s", models[1]["model"])
 		}
 	})
 	t.Run("preserves other settings", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(settingsDir, 0o755)
 		os.WriteFile(settingsPath, []byte(`{
 			"theme": "dark",
 			"enableHooks": true,
 			"sessionDefaultSettings": {"autonomyMode": "auto-high"}
 		}`), 0o644)
 		d.Edit([]string{"model-a"})
 		settings := readSettings()
 		if settings["theme"] != "dark" {
 			t.Error("theme was not preserved")
 		}
 		if settings["enableHooks"] != true {
 			t.Error("enableHooks was not preserved")
 		}
 		session := settings["sessionDefaultSettings"].(map[string]any)
 		if session["autonomyMode"] != "auto-high" {
 			t.Error("autonomyMode was not preserved")
 		}
 	})
 	t.Run("required fields present", func(t *testing.T) {
 		cleanup()
 		d.Edit([]string{"test-model"})
 		settings := readSettings()
 		models := getCustomModels(settings)
 		if len(models) != 1 {
 			t.Fatal("expected 1 model")
 		}
 		model := models[0]
 		requiredFields := []string{"model", "displayName", "baseUrl", "apiKey", "provider", "maxOutputTokens", "id", "index"}
 		for _, field := range requiredFields {
 			if model[field] == nil {
 				t.Errorf("missing required field: %s", field)
 			}
 		}
 		if model["baseUrl"] != "http://localhost:11434/v1" {
 			t.Errorf("unexpected baseUrl: %s", model["baseUrl"])
 		}
 		if model["apiKey"] != "ollama" {
 			t.Errorf("unexpected apiKey: %s", model["apiKey"])
 		}
 		if model["provider"] != "generic-chat-completion-api" {
 			t.Errorf("unexpected provider: %s", model["provider"])
 		}
 	})
 	t.Run("fixes invalid reasoningEffort", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(settingsDir, 0o755)
 		// Pre-existing settings with invalid reasoningEffort
 		os.WriteFile(settingsPath, []byte(`{
 			"sessionDefaultSettings": {"reasoningEffort": "off"}
 		}`), 0o644)
 		d.Edit([]string{"model-a"})
 		settings := readSettings()
 		session := settings["sessionDefaultSettings"].(map[string]any)
 		if session["reasoningEffort"] != "none" {
 			t.Errorf("expected reasoningEffort to be fixed to 'none', got %s", session["reasoningEffort"])
 		}
 	})
 	t.Run("preserves valid reasoningEffort", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(settingsDir, 0o755)
 		os.WriteFile(settingsPath, []byte(`{
 			"sessionDefaultSettings": {"reasoningEffort": "high"}
 		}`), 0o644)
 		d.Edit([]string{"model-a"})
 		settings := readSettings()
 		session := settings["sessionDefaultSettings"].(map[string]any)
 		if session["reasoningEffort"] != "high" {
 			t.Errorf("expected reasoningEffort to remain 'high', got %s", session["reasoningEffort"])
 		}
 	})
 }
 // Edge case tests for droid.go
 func TestDroidEdit_CorruptedJSON(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	os.MkdirAll(settingsDir, 0o755)
 	os.WriteFile(settingsPath, []byte(`{corrupted json content`), 0o644)
 	// Corrupted JSON should return an error so user knows something is wrong
 	err := d.Edit([]string{"model-a"})
 	if err == nil {
 		t.Fatal("expected error for corrupted JSON, got nil")
 	}
 	// Original corrupted file should be preserved (not overwritten)
 	data, _ := os.ReadFile(settingsPath)
 	if string(data) != `{corrupted json content` {
 		t.Errorf("corrupted file was modified: got %s", string(data))
 	}
 }
 func TestDroidEdit_WrongTypeCustomModels(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	os.MkdirAll(settingsDir, 0o755)
 	// customModels is a string instead of array
 	os.WriteFile(settingsPath, []byte(`{"customModels": "not an array"}`), 0o644)
 	// Should not panic - wrong type should be handled gracefully
 	err := d.Edit([]string{"model-a"})
 	if err != nil {
 		t.Fatalf("Edit failed with wrong type customModels: %v", err)
 	}
 	// Verify models were added correctly
 	data, _ := os.ReadFile(settingsPath)
 	var settings map[string]any
 	json.Unmarshal(data, &settings)
 	customModels, ok := settings["customModels"].([]any)
 	if !ok {
 		t.Fatalf("customModels should be array after setup, got %T", settings["customModels"])
 	}
 	if len(customModels) != 1 {
 		t.Errorf("expected 1 model, got %d", len(customModels))
 	}
 }
 func TestDroidEdit_EmptyModels(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	os.MkdirAll(settingsDir, 0o755)
 	originalContent := `{"customModels": [{"model": "existing"}]}`
 	os.WriteFile(settingsPath, []byte(originalContent), 0o644)
 	// Empty models should be no-op
 	err := d.Edit([]string{})
 	if err != nil {
 		t.Fatalf("Edit with empty models failed: %v", err)
 	}
 	// Original content should be preserved (file not modified)
 	data, _ := os.ReadFile(settingsPath)
 	if string(data) != originalContent {
 		t.Errorf("empty models should not modify file, but content changed")
 	}
 }
 func TestDroidEdit_DuplicateModels(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	// Add same model twice
 	err := d.Edit([]string{"model-a", "model-a"})
 	if err != nil {
 		t.Fatalf("Edit with duplicates failed: %v", err)
 	}
 	settings, err := readJSONFile(settingsPath)
 	if err != nil {
 		t.Fatalf("readJSONFile failed: %v", err)
 	}
 	customModels, _ := settings["customModels"].([]any)
 	// Document current behavior: duplicates are kept as separate entries
 	if len(customModels) != 2 {
 		t.Logf("Note: duplicates result in %d entries (documenting behavior)", len(customModels))
 	}
 }
 func TestDroidEdit_MalformedModelEntry(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	os.MkdirAll(settingsDir, 0o755)
 	// Model entry is a string instead of a map
 	os.WriteFile(settingsPath, []byte(`{"customModels": ["not a map", 123]}`), 0o644)
 	err := d.Edit([]string{"model-a"})
 	if err != nil {
 		t.Fatalf("Edit with malformed entries failed: %v", err)
 	}
 	// Malformed entries should be preserved in nonOllamaModels
 	settings, _ := readJSONFile(settingsPath)
 	customModels, _ := settings["customModels"].([]any)
 	// Should have: 1 new Ollama model + 2 preserved malformed entries
 	if len(customModels) != 3 {
 		t.Errorf("expected 3 entries (1 new + 2 preserved malformed), got %d", len(customModels))
 	}
 }
 func TestDroidEdit_WrongTypeSessionSettings(t *testing.T) {
 	d := &Droid{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	settingsDir := filepath.Join(tmpDir, ".factory")
 	settingsPath := filepath.Join(settingsDir, "settings.json")
 	os.MkdirAll(settingsDir, 0o755)
 	// sessionDefaultSettings is a string instead of map
 	os.WriteFile(settingsPath, []byte(`{"sessionDefaultSettings": "not a map"}`), 0o644)
 	err := d.Edit([]string{"model-a"})
 	if err != nil {
 		t.Fatalf("Edit with wrong type sessionDefaultSettings failed: %v", err)
 	}
 	// Should create proper sessionDefaultSettings
 	settings, _ := readJSONFile(settingsPath)
 	session, ok := settings["sessionDefaultSettings"].(map[string]any)
 	if !ok {
 		t.Fatalf("sessionDefaultSettings should be map after setup, got %T", settings["sessionDefaultSettings"])
 	}
 	if session["model"] == nil {
 		t.Error("expected model to be set in sessionDefaultSettings")
 	}
 }
 func TestIsValidReasoningEffort(t *testing.T) {
 	tests := []struct {
 		effort string
 		valid  bool
 	}{
 		{"high", true},
 		{"medium", true},
 		{"low", true},
 		{"none", true},
 		{"off", false},
 		{"", false},
 		{"HIGH", false}, // case sensitive
 		{"max", false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.effort, func(t *testing.T) {
 			got := isValidReasoningEffort(tt.effort)
 			if got != tt.valid {
 				t.Errorf("isValidReasoningEffort(%q) = %v, want %v", tt.effort, got, tt.valid)
 			}
 		})
 	}
 }
--- a/cmd/config/files.go
+++ b/cmd/config/files.go
@@ -1,99 +0,0 @@
 package config
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"time"
 )
 func readJSONFile(path string) (map[string]any, error) {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
 	}
 	var result map[string]any
 	if err := json.Unmarshal(data, &result); err != nil {
 		return nil, err
 	}
 	return result, nil
 }
 func copyFile(src, dst string) error {
 	info, err := os.Stat(src)
 	if err != nil {
 		return err
 	}
 	data, err := os.ReadFile(src)
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(dst, data, info.Mode().Perm())
 }
 func backupDir() string {
 	return filepath.Join(os.TempDir(), "ollama-backups")
 }
 func backupToTmp(srcPath string) (string, error) {
 	dir := backupDir()
 	if err := os.MkdirAll(dir, 0o755); err != nil {
 		return "", err
 	}
 	backupPath := filepath.Join(dir, fmt.Sprintf("%s.%d", filepath.Base(srcPath), time.Now().Unix()))
 	if err := copyFile(srcPath, backupPath); err != nil {
 		return "", err
 	}
 	return backupPath, nil
 }
 // writeWithBackup writes data to path via temp file + rename, backing up any existing file first
 func writeWithBackup(path string, data []byte) error {
 	var backupPath string
 	// backup must be created before any writes to the target file
 	if existingContent, err := os.ReadFile(path); err == nil {
 		if !bytes.Equal(existingContent, data) {
 			backupPath, err = backupToTmp(path)
 			if err != nil {
 				return fmt.Errorf("backup failed: %w", err)
 			}
 		}
 	} else if !os.IsNotExist(err) {
 		return fmt.Errorf("read existing file: %w", err)
 	}
 	dir := filepath.Dir(path)
 	tmp, err := os.CreateTemp(dir, ".tmp-*")
 	if err != nil {
 		return fmt.Errorf("create temp failed: %w", err)
 	}
 	tmpPath := tmp.Name()
 	if _, err := tmp.Write(data); err != nil {
 		_ = tmp.Close()
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("write failed: %w", err)
 	}
 	if err := tmp.Sync(); err != nil {
 		_ = tmp.Close()
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("sync failed: %w", err)
 	}
 	if err := tmp.Close(); err != nil {
 		_ = os.Remove(tmpPath)
 		return fmt.Errorf("close failed: %w", err)
 	}
 	if err := os.Rename(tmpPath, path); err != nil {
 		_ = os.Remove(tmpPath)
 		if backupPath != "" {
 			_ = copyFile(backupPath, path)
 		}
 		return fmt.Errorf("rename failed: %w", err)
 	}
 	return nil
 }
--- a/cmd/config/files_test.go
+++ b/cmd/config/files_test.go
@@ -1,502 +0,0 @@
 package config
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"runtime"
 	"testing"
 )
 func mustMarshal(t *testing.T, v any) []byte {
 	t.Helper()
 	data, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
 		t.Fatal(err)
 	}
 	return data
 }
 func TestWriteWithBackup(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Run("creates file", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "new.json")
 		data := mustMarshal(t, map[string]string{"key": "value"})
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		content, err := os.ReadFile(path)
 		if err != nil {
 			t.Fatal(err)
 		}
 		var result map[string]string
 		if err := json.Unmarshal(content, &result); err != nil {
 			t.Fatal(err)
 		}
 		if result["key"] != "value" {
 			t.Errorf("expected value, got %s", result["key"])
 		}
 	})
 	t.Run("creates backup in /tmp/ollama-backups", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "backup.json")
 		os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 		data := mustMarshal(t, map[string]bool{"updated": true})
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, err := os.ReadDir(backupDir())
 		if err != nil {
 			t.Fatal("backup directory not created")
 		}
 		var foundBackup bool
 		for _, entry := range entries {
 			if filepath.Ext(entry.Name()) != ".json" {
 				name := entry.Name()
 				if len(name) > len("backup.json.") && name[:len("backup.json.")] == "backup.json." {
 					backupPath := filepath.Join(backupDir(), name)
 					backup, err := os.ReadFile(backupPath)
 					if err == nil {
 						var backupData map[string]bool
 						json.Unmarshal(backup, &backupData)
 						if backupData["original"] {
 							foundBackup = true
 							os.Remove(backupPath)
 							break
 						}
 					}
 				}
 			}
 		}
 		if !foundBackup {
 			t.Error("backup file not created in /tmp/ollama-backups")
 		}
 		current, _ := os.ReadFile(path)
 		var currentData map[string]bool
 		json.Unmarshal(current, &currentData)
 		if !currentData["updated"] {
 			t.Error("file doesn't contain updated data")
 		}
 	})
 	t.Run("no backup for new file", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "nobak.json")
 		data := mustMarshal(t, map[string]string{"new": "file"})
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, _ := os.ReadDir(backupDir())
 		for _, entry := range entries {
 			if len(entry.Name()) > len("nobak.json.") && entry.Name()[:len("nobak.json.")] == "nobak.json." {
 				t.Error("backup should not exist for new file")
 			}
 		}
 	})
 	t.Run("no backup when content unchanged", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "unchanged.json")
 		data := mustMarshal(t, map[string]string{"key": "value"})
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries1, _ := os.ReadDir(backupDir())
 		countBefore := 0
 		for _, e := range entries1 {
 			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
 				countBefore++
 			}
 		}
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries2, _ := os.ReadDir(backupDir())
 		countAfter := 0
 		for _, e := range entries2 {
 			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
 				countAfter++
 			}
 		}
 		if countAfter != countBefore {
 			t.Errorf("backup was created when content unchanged (before=%d, after=%d)", countBefore, countAfter)
 		}
 	})
 	t.Run("backup filename contains unix timestamp", func(t *testing.T) {
 		path := filepath.Join(tmpDir, "timestamped.json")
 		os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
 		data := mustMarshal(t, map[string]int{"v": 2})
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatal(err)
 		}
 		entries, _ := os.ReadDir(backupDir())
 		var found bool
 		for _, entry := range entries {
 			name := entry.Name()
 			if len(name) > len("timestamped.json.") && name[:len("timestamped.json.")] == "timestamped.json." {
 				timestamp := name[len("timestamped.json."):]
 				for _, c := range timestamp {
 					if c < '0' || c > '9' {
 						t.Errorf("backup filename timestamp contains non-numeric character: %s", name)
 					}
 				}
 				found = true
 				os.Remove(filepath.Join(backupDir(), name))
 				break
 			}
 		}
 		if !found {
 			t.Error("backup file with timestamp not found")
 		}
 	})
 }
 // Edge case tests for files.go
 // TestWriteWithBackup_FailsIfBackupFails documents critical behavior: if backup fails, we must not proceed.
 // User could lose their config with no way to recover.
 func TestWriteWithBackup_FailsIfBackupFails(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "config.json")
 	// Create original file
 	originalContent := []byte(`{"original": true}`)
 	os.WriteFile(path, originalContent, 0o644)
 	// Make backup directory read-only to force backup failure
 	backupDir := backupDir()
 	os.MkdirAll(backupDir, 0o755)
 	os.Chmod(backupDir, 0o444) // Read-only
 	defer os.Chmod(backupDir, 0o755)
 	newContent := []byte(`{"updated": true}`)
 	err := writeWithBackup(path, newContent)
 	// Should fail because backup couldn't be created
 	if err == nil {
 		t.Error("expected error when backup fails, got nil")
 	}
 	// Original file should be preserved
 	current, _ := os.ReadFile(path)
 	if string(current) != string(originalContent) {
 		t.Errorf("original file was modified despite backup failure: got %s", string(current))
 	}
 }
 // TestWriteWithBackup_PermissionDenied verifies clear error when target file has wrong permissions.
 // Common issue when config owned by root or wrong perms.
 func TestWriteWithBackup_PermissionDenied(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := t.TempDir()
 	// Create a read-only directory
 	readOnlyDir := filepath.Join(tmpDir, "readonly")
 	os.MkdirAll(readOnlyDir, 0o755)
 	os.Chmod(readOnlyDir, 0o444)
 	defer os.Chmod(readOnlyDir, 0o755)
 	path := filepath.Join(readOnlyDir, "config.json")
 	err := writeWithBackup(path, []byte(`{"test": true}`))
 	if err == nil {
 		t.Error("expected permission error, got nil")
 	}
 }
 // TestWriteWithBackup_DirectoryDoesNotExist verifies behavior when target directory doesn't exist.
 // writeWithBackup doesn't create directories - caller is responsible.
 func TestWriteWithBackup_DirectoryDoesNotExist(t *testing.T) {
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "nonexistent", "subdir", "config.json")
 	err := writeWithBackup(path, []byte(`{"test": true}`))
 	// Should fail because directory doesn't exist
 	if err == nil {
 		t.Error("expected error for nonexistent directory, got nil")
 	}
 }
 // TestWriteWithBackup_SymlinkTarget documents behavior when target is a symlink.
 // Documents what happens if user symlinks their config file.
 func TestWriteWithBackup_SymlinkTarget(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("symlink tests may require admin on Windows")
 	}
 	tmpDir := t.TempDir()
 	realFile := filepath.Join(tmpDir, "real.json")
 	symlink := filepath.Join(tmpDir, "link.json")
 	// Create real file and symlink
 	os.WriteFile(realFile, []byte(`{"v": 1}`), 0o644)
 	os.Symlink(realFile, symlink)
 	// Write through symlink
 	err := writeWithBackup(symlink, []byte(`{"v": 2}`))
 	if err != nil {
 		t.Fatalf("writeWithBackup through symlink failed: %v", err)
 	}
 	// The real file should be updated (symlink followed for temp file creation)
 	content, _ := os.ReadFile(symlink)
 	if string(content) != `{"v": 2}` {
 		t.Errorf("symlink target not updated correctly: got %s", string(content))
 	}
 }
 // TestBackupToTmp_SpecialCharsInFilename verifies backup works with special characters.
 // User may have config files with unusual names.
 func TestBackupToTmp_SpecialCharsInFilename(t *testing.T) {
 	tmpDir := t.TempDir()
 	// File with spaces and special chars
 	path := filepath.Join(tmpDir, "my config (backup).json")
 	os.WriteFile(path, []byte(`{"test": true}`), 0o644)
 	backupPath, err := backupToTmp(path)
 	if err != nil {
 		t.Fatalf("backupToTmp with special chars failed: %v", err)
 	}
 	// Verify backup exists and has correct content
 	content, err := os.ReadFile(backupPath)
 	if err != nil {
 		t.Fatalf("could not read backup: %v", err)
 	}
 	if string(content) != `{"test": true}` {
 		t.Errorf("backup content mismatch: got %s", string(content))
 	}
 	os.Remove(backupPath)
 }
 // TestCopyFile_PreservesPermissions verifies that copyFile preserves file permissions.
 func TestCopyFile_PreservesPermissions(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission preservation tests unreliable on Windows")
 	}
 	tmpDir := t.TempDir()
 	src := filepath.Join(tmpDir, "src.json")
 	dst := filepath.Join(tmpDir, "dst.json")
 	// Create source with specific permissions
 	os.WriteFile(src, []byte(`{"test": true}`), 0o600)
 	err := copyFile(src, dst)
 	if err != nil {
 		t.Fatalf("copyFile failed: %v", err)
 	}
 	srcInfo, _ := os.Stat(src)
 	dstInfo, _ := os.Stat(dst)
 	if srcInfo.Mode().Perm() != dstInfo.Mode().Perm() {
 		t.Errorf("permissions not preserved: src=%v, dst=%v", srcInfo.Mode().Perm(), dstInfo.Mode().Perm())
 	}
 }
 // TestCopyFile_SourceNotFound verifies clear error when source doesn't exist.
 func TestCopyFile_SourceNotFound(t *testing.T) {
 	tmpDir := t.TempDir()
 	src := filepath.Join(tmpDir, "nonexistent.json")
 	dst := filepath.Join(tmpDir, "dst.json")
 	err := copyFile(src, dst)
 	if err == nil {
 		t.Error("expected error for nonexistent source, got nil")
 	}
 }
 // TestWriteWithBackup_TargetIsDirectory verifies error when path points to a directory.
 func TestWriteWithBackup_TargetIsDirectory(t *testing.T) {
 	tmpDir := t.TempDir()
 	dirPath := filepath.Join(tmpDir, "actualdir")
 	os.MkdirAll(dirPath, 0o755)
 	err := writeWithBackup(dirPath, []byte(`{"test": true}`))
 	if err == nil {
 		t.Error("expected error when target is a directory, got nil")
 	}
 }
 // TestWriteWithBackup_EmptyData verifies writing zero bytes works correctly.
 func TestWriteWithBackup_EmptyData(t *testing.T) {
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "empty.json")
 	err := writeWithBackup(path, []byte{})
 	if err != nil {
 		t.Fatalf("writeWithBackup with empty data failed: %v", err)
 	}
 	content, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("could not read file: %v", err)
 	}
 	if len(content) != 0 {
 		t.Errorf("expected empty file, got %d bytes", len(content))
 	}
 }
 // TestWriteWithBackup_FileUnreadableButDirWritable verifies behavior when existing file
 // cannot be read (for backup comparison) but directory is writable.
 func TestWriteWithBackup_FileUnreadableButDirWritable(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "unreadable.json")
 	// Create file and make it unreadable
 	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 	os.Chmod(path, 0o000)
 	defer os.Chmod(path, 0o644)
 	// Should fail because we can't read the file to compare/backup
 	err := writeWithBackup(path, []byte(`{"updated": true}`))
 	if err == nil {
 		t.Error("expected error when file is unreadable, got nil")
 	}
 }
 // TestWriteWithBackup_RapidSuccessiveWrites verifies backup works with multiple writes
 // within the same second (timestamp collision scenario).
 func TestWriteWithBackup_RapidSuccessiveWrites(t *testing.T) {
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "rapid.json")
 	// Create initial file
 	os.WriteFile(path, []byte(`{"v": 0}`), 0o644)
 	// Rapid successive writes
 	for i := 1; i <= 3; i++ {
 		data := []byte(fmt.Sprintf(`{"v": %d}`, i))
 		if err := writeWithBackup(path, data); err != nil {
 			t.Fatalf("write %d failed: %v", i, err)
 		}
 	}
 	// Verify final content
 	content, _ := os.ReadFile(path)
 	if string(content) != `{"v": 3}` {
 		t.Errorf("expected final content {\"v\": 3}, got %s", string(content))
 	}
 	// Verify at least one backup exists
 	entries, _ := os.ReadDir(backupDir())
 	var backupCount int
 	for _, e := range entries {
 		if len(e.Name()) > len("rapid.json.") && e.Name()[:len("rapid.json.")] == "rapid.json." {
 			backupCount++
 		}
 	}
 	if backupCount == 0 {
 		t.Error("expected at least one backup file from rapid writes")
 	}
 }
 // TestWriteWithBackup_BackupDirIsFile verifies error when backup directory path is a file.
 func TestWriteWithBackup_BackupDirIsFile(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("test modifies system temp directory")
 	}
 	// Create a file at the backup directory path
 	backupPath := backupDir()
 	// Clean up any existing directory first
 	os.RemoveAll(backupPath)
 	// Create a file instead of directory
 	os.WriteFile(backupPath, []byte("not a directory"), 0o644)
 	defer func() {
 		os.Remove(backupPath)
 		os.MkdirAll(backupPath, 0o755)
 	}()
 	tmpDir := t.TempDir()
 	path := filepath.Join(tmpDir, "test.json")
 	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
 	err := writeWithBackup(path, []byte(`{"updated": true}`))
 	if err == nil {
 		t.Error("expected error when backup dir is a file, got nil")
 	}
 }
 // TestWriteWithBackup_NoOrphanTempFiles verifies temp files are cleaned up on failure.
 func TestWriteWithBackup_NoOrphanTempFiles(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("permission tests unreliable on Windows")
 	}
 	tmpDir := t.TempDir()
 	// Count existing temp files
 	countTempFiles := func() int {
 		entries, _ := os.ReadDir(tmpDir)
 		count := 0
 		for _, e := range entries {
 			if len(e.Name()) > 4 && e.Name()[:4] == ".tmp" {
 				count++
 			}
 		}
 		return count
 	}
 	before := countTempFiles()
 	// Create a file, then make directory read-only to cause rename failure
 	path := filepath.Join(tmpDir, "orphan.json")
 	os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
 	// Make a subdirectory and try to write there after making parent read-only
 	subDir := filepath.Join(tmpDir, "subdir")
 	os.MkdirAll(subDir, 0o755)
 	subPath := filepath.Join(subDir, "config.json")
 	os.WriteFile(subPath, []byte(`{"v": 1}`), 0o644)
 	// Make subdir read-only after creating temp file would succeed but rename would fail
 	// This is tricky to test - the temp file is created in the same dir, so if we can't
 	// rename, we also couldn't create. Let's just verify normal failure cleanup works.
 	// Force a failure by making the target a directory
 	badPath := filepath.Join(tmpDir, "isdir")
 	os.MkdirAll(badPath, 0o755)
 	_ = writeWithBackup(badPath, []byte(`{"test": true}`))
 	after := countTempFiles()
 	if after > before {
 		t.Errorf("orphan temp files left behind: before=%d, after=%d", before, after)
 	}
 }
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -1,361 +0,0 @@
 package config
 import (
 	"context"
 	"errors"
 	"fmt"
 	"maps"
 	"os"
 	"os/exec"
 	"runtime"
 	"slices"
 	"strings"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/spf13/cobra"
 )
 // Runners execute the launching of a model with the integration - claude, codex
 // Editors can edit config files (supports multi-model selection) - opencode, droid
 // They are composable interfaces where in some cases an editor is also a runner - opencode, droid
 // Runner can run an integration with a model.
 type Runner interface {
 	Run(model string) error
 	// String returns the human-readable name of the integration
 	String() string
 }
 // Editor can edit config files (supports multi-model selection)
 type Editor interface {
 	// Paths returns the paths to the config files for the integration
 	Paths() []string
 	// Edit updates the config files for the integration with the given models
 	Edit(models []string) error
 	// Models returns the models currently configured for the integration
 	Models() []string
 }
 // integrations is the registry of available integrations.
 var integrations = map[string]Runner{
 	"claude":   &Claude{},
 	"codex":    &Codex{},
 	"droid":    &Droid{},
 	"opencode": &OpenCode{},
 }
 func selectIntegration() (string, error) {
 	if len(integrations) == 0 {
 		return "", fmt.Errorf("no integrations available")
 	}
 	names := slices.Sorted(maps.Keys(integrations))
 	var items []selectItem
 	for _, name := range names {
 		r := integrations[name]
 		description := r.String()
 		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
 			description = fmt.Sprintf("%s (%s)", r.String(), conn.Models[0])
 		}
 		items = append(items, selectItem{Name: name, Description: description})
 	}
 	return selectPrompt("Select integration:", items)
 }
 // selectModels lets the user select models for an integration
 func selectModels(ctx context.Context, name, current string) ([]string, error) {
 	r, ok := integrations[name]
 	if !ok {
 		return nil, fmt.Errorf("unknown integration: %s", name)
 	}
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return nil, err
 	}
 	models, err := client.List(ctx)
 	if err != nil {
 		return nil, err
 	}
 	if len(models.Models) == 0 {
 		return nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
 	}
 	var items []selectItem
 	cloudModels := make(map[string]bool)
 	for _, m := range models.Models {
 		if m.RemoteModel != "" {
 			cloudModels[m.Name] = true
 		}
 		items = append(items, selectItem{Name: m.Name})
 	}
 	if len(items) == 0 {
 		return nil, fmt.Errorf("no local models available, run 'ollama pull <model>' first")
 	}
 	// Get previously configured models (saved config takes precedence)
 	var preChecked []string
 	if saved, err := loadIntegration(name); err == nil {
 		preChecked = saved.Models
 	} else if editor, ok := r.(Editor); ok {
 		preChecked = editor.Models()
 	}
 	checked := make(map[string]bool, len(preChecked))
 	for _, n := range preChecked {
 		checked[n] = true
 	}
 	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
 	for _, item := range items {
 		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
 			current = item.Name
 			break
 		}
 	}
 	// If current model is configured, move to front of preChecked
 	if checked[current] {
 		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
 	}
 	// Sort: checked first, then alphabetical
 	slices.SortFunc(items, func(a, b selectItem) int {
 		ac, bc := checked[a.Name], checked[b.Name]
 		if ac != bc {
 			if ac {
 				return -1
 			}
 			return 1
 		}
 		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
 	})
 	var selected []string
 	// only editors support multi-model selection
 	if _, ok := r.(Editor); ok {
 		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
 		if err != nil {
 			return nil, err
 		}
 	} else {
 		model, err := selectPrompt(fmt.Sprintf("Select model for %s:", r), items)
 		if err != nil {
 			return nil, err
 		}
 		selected = []string{model}
 	}
 	// if any model in selected is a cloud model, ensure signed in
 	var selectedCloudModels []string
 	for _, m := range selected {
 		if cloudModels[m] {
 			selectedCloudModels = append(selectedCloudModels, m)
 		}
 	}
 	if len(selectedCloudModels) > 0 {
 		// ensure user is signed in
 		user, err := client.Whoami(ctx)
 		if err == nil && user != nil && user.Name != "" {
 			return selected, nil
 		}
 		var aErr api.AuthorizationError
 		if !errors.As(err, &aErr) || aErr.SigninURL == "" {
 			return nil, err
 		}
 		modelList := strings.Join(selectedCloudModels, ", ")
 		yes, err := confirmPrompt(fmt.Sprintf("sign in to use %s?", modelList))
 		if err != nil || !yes {
 			return nil, fmt.Errorf("%s requires sign in", modelList)
 		}
 		fmt.Fprintf(os.Stderr, "\nTo sign in, navigate to:\n    %s\n\n", aErr.SigninURL)
 		// TODO(parthsareen): extract into auth package for cmd
 		// Auto-open browser (best effort, fail silently)
 		switch runtime.GOOS {
 		case "darwin":
 			_ = exec.Command("open", aErr.SigninURL).Start()
 		case "linux":
 			_ = exec.Command("xdg-open", aErr.SigninURL).Start()
 		case "windows":
 			_ = exec.Command("rundll32", "url.dll,FileProtocolHandler", aErr.SigninURL).Start()
 		}
 		spinnerFrames := []string{"|", "/", "-", "\\"}
 		frame := 0
 		fmt.Fprintf(os.Stderr, "\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[0])
 		ticker := time.NewTicker(200 * time.Millisecond)
 		defer ticker.Stop()
 		for {
 			select {
 			case <-ctx.Done():
 				fmt.Fprintf(os.Stderr, "\r\033[K")
 				return nil, ctx.Err()
 			case <-ticker.C:
 				frame++
 				fmt.Fprintf(os.Stderr, "\r\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
 				// poll every 10th frame (~2 seconds)
 				if frame%10 == 0 {
 					u, err := client.Whoami(ctx)
 					if err == nil && u != nil && u.Name != "" {
 						fmt.Fprintf(os.Stderr, "\r\033[K\033[A\r\033[K\033[1msigned in:\033[0m %s\n", u.Name)
 						return selected, nil
 					}
 				}
 			}
 		}
 	}
 	return selected, nil
 }
 func runIntegration(name, modelName string) error {
 	r, ok := integrations[name]
 	if !ok {
 		return fmt.Errorf("unknown integration: %s", name)
 	}
 	fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
 	return r.Run(modelName)
 }
 // ConfigCmd returns the cobra command for configuring integrations.
 func ConfigCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) error) *cobra.Command {
 	var modelFlag string
 	var launchFlag bool
 	cmd := &cobra.Command{
 		Use:   "config [INTEGRATION]",
 		Short: "Configure an external integration to use Ollama",
 		Long: `Configure an external application to use Ollama models.
 Supported integrations:
  claude    Claude Code
  codex     Codex
  droid     Droid
  opencode  OpenCode
 Examples:
  ollama config
  ollama config claude
  ollama config droid --launch`,
 		Args:    cobra.MaximumNArgs(1),
 		PreRunE: checkServerHeartbeat,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			var name string
 			if len(args) > 0 {
 				name = args[0]
 			} else {
 				var err error
 				name, err = selectIntegration()
 				if errors.Is(err, errCancelled) {
 					return nil
 				}
 				if err != nil {
 					return err
 				}
 			}
 			r, ok := integrations[strings.ToLower(name)]
 			if !ok {
 				return fmt.Errorf("unknown integration: %s", name)
 			}
 			// If --launch without --model, use saved config if available
 			if launchFlag && modelFlag == "" {
 				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
 					return runIntegration(name, config.Models[0])
 				}
 			}
 			var models []string
 			if modelFlag != "" {
 				// When --model is specified, merge with existing models (new model becomes default)
 				models = []string{modelFlag}
 				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
 					for _, m := range existing.Models {
 						if m != modelFlag {
 							models = append(models, m)
 						}
 					}
 				}
 			} else {
 				var err error
 				models, err = selectModels(cmd.Context(), name, "")
 				if errors.Is(err, errCancelled) {
 					return nil
 				}
 				if err != nil {
 					return err
 				}
 			}
 			if editor, isEditor := r.(Editor); isEditor {
 				paths := editor.Paths()
 				if len(paths) > 0 {
 					fmt.Fprintf(os.Stderr, "This will modify your %s configuration:\n", r)
 					for _, p := range paths {
 						fmt.Fprintf(os.Stderr, "  %s\n", p)
 					}
 					fmt.Fprintf(os.Stderr, "Backups will be saved to %s/\n\n", backupDir())
 					if ok, _ := confirmPrompt("Proceed?"); !ok {
 						return nil
 					}
 				}
 			}
 			if err := saveIntegration(name, models); err != nil {
 				return fmt.Errorf("failed to save: %w", err)
 			}
 			if editor, isEditor := r.(Editor); isEditor {
 				if err := editor.Edit(models); err != nil {
 					return fmt.Errorf("setup failed: %w", err)
 				}
 			}
 			if _, isEditor := r.(Editor); isEditor {
 				if len(models) == 1 {
 					fmt.Fprintf(os.Stderr, "Added %s to %s\n", models[0], r)
 				} else {
 					fmt.Fprintf(os.Stderr, "Added %d models to %s (default: %s)\n", len(models), r, models[0])
 				}
 			}
 			if slices.ContainsFunc(models, func(m string) bool {
 				return !strings.HasSuffix(m, "cloud")
 			}) {
 				fmt.Fprintln(os.Stderr)
 				fmt.Fprintln(os.Stderr, "Coding agents work best with at least 64k context. Either:")
 				fmt.Fprintln(os.Stderr, "  - Set the context slider in Ollama app settings")
 				fmt.Fprintln(os.Stderr, "  - Run: OLLAMA_CONTEXT_LENGTH=64000 ollama serve")
 			}
 			if launchFlag {
 				return runIntegration(name, models[0])
 			}
 			if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
 				return runIntegration(name, models[0])
 			}
 			fmt.Fprintf(os.Stderr, "Run 'ollama config %s --launch' to start with %s\n", strings.ToLower(name), models[0])
 			return nil
 		},
 	}
 	cmd.Flags().StringVar(&modelFlag, "model", "", "Model to use")
 	cmd.Flags().BoolVar(&launchFlag, "launch", false, "Launch the integration after configuring")
 	return cmd
 }
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -1,188 +0,0 @@
 package config
 import (
 	"slices"
 	"strings"
 	"testing"
 	"github.com/spf13/cobra"
 )
 func TestIntegrationLookup(t *testing.T) {
 	tests := []struct {
 		name      string
 		input     string
 		wantFound bool
 		wantName  string
 	}{
 		{"claude lowercase", "claude", true, "Claude Code"},
 		{"claude uppercase", "CLAUDE", true, "Claude Code"},
 		{"claude mixed case", "Claude", true, "Claude Code"},
 		{"codex", "codex", true, "Codex"},
 		{"droid", "droid", true, "Droid"},
 		{"opencode", "opencode", true, "OpenCode"},
 		{"unknown integration", "unknown", false, ""},
 		{"empty string", "", false, ""},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			r, found := integrations[strings.ToLower(tt.input)]
 			if found != tt.wantFound {
 				t.Errorf("integrations[%q] found = %v, want %v", tt.input, found, tt.wantFound)
 			}
 			if found && r.String() != tt.wantName {
 				t.Errorf("integrations[%q].String() = %q, want %q", tt.input, r.String(), tt.wantName)
 			}
 		})
 	}
 }
 func TestIntegrationRegistry(t *testing.T) {
 	expectedIntegrations := []string{"claude", "codex", "droid", "opencode"}
 	for _, name := range expectedIntegrations {
 		t.Run(name, func(t *testing.T) {
 			r, ok := integrations[name]
 			if !ok {
 				t.Fatalf("integration %q not found in registry", name)
 			}
 			if r.String() == "" {
 				t.Error("integration.String() should not be empty")
 			}
 		})
 	}
 }
 func TestHasLocalModel(t *testing.T) {
 	tests := []struct {
 		name   string
 		models []string
 		want   bool
 	}{
 		{"empty list", []string{}, false},
 		{"single local model", []string{"llama3.2"}, true},
 		{"single cloud model", []string{"cloud-model"}, false},
 		{"mixed models", []string{"cloud-model", "llama3.2"}, true},
 		{"multiple local models", []string{"llama3.2", "qwen2.5"}, true},
 		{"multiple cloud models", []string{"cloud-a", "cloud-b"}, false},
 		{"local model first", []string{"llama3.2", "cloud-model"}, true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := slices.ContainsFunc(tt.models, func(m string) bool {
 				return !strings.Contains(m, "cloud")
 			})
 			if got != tt.want {
 				t.Errorf("hasLocalModel(%v) = %v, want %v", tt.models, got, tt.want)
 			}
 		})
 	}
 }
 func TestConfigCmd(t *testing.T) {
 	// Mock checkServerHeartbeat that always succeeds
 	mockCheck := func(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 	cmd := ConfigCmd(mockCheck)
 	t.Run("command structure", func(t *testing.T) {
 		if cmd.Use != "config [INTEGRATION]" {
 			t.Errorf("Use = %q, want %q", cmd.Use, "config [INTEGRATION]")
 		}
 		if cmd.Short == "" {
 			t.Error("Short description should not be empty")
 		}
 		if cmd.Long == "" {
 			t.Error("Long description should not be empty")
 		}
 	})
 	t.Run("flags exist", func(t *testing.T) {
 		modelFlag := cmd.Flags().Lookup("model")
 		if modelFlag == nil {
 			t.Error("--model flag should exist")
 		}
 		launchFlag := cmd.Flags().Lookup("launch")
 		if launchFlag == nil {
 			t.Error("--launch flag should exist")
 		}
 	})
 	t.Run("PreRunE is set", func(t *testing.T) {
 		if cmd.PreRunE == nil {
 			t.Error("PreRunE should be set to checkServerHeartbeat")
 		}
 	})
 }
 func TestRunIntegration_UnknownIntegration(t *testing.T) {
 	err := runIntegration("unknown-integration", "model")
 	if err == nil {
 		t.Error("expected error for unknown integration, got nil")
 	}
 	if !strings.Contains(err.Error(), "unknown integration") {
 		t.Errorf("error should mention 'unknown integration', got: %v", err)
 	}
 }
 func TestHasLocalModel_DocumentsHeuristic(t *testing.T) {
 	tests := []struct {
 		name   string
 		models []string
 		want   bool
 		reason string
 	}{
 		{"empty list", []string{}, false, "empty list has no local models"},
 		{"contains-cloud-substring", []string{"deepseek-r1:cloud"}, false, "model with 'cloud' substring is considered cloud"},
 		{"cloud-in-name", []string{"my-cloud-model"}, false, "'cloud' anywhere in name = cloud model"},
 		{"cloudless", []string{"cloudless-model"}, false, "'cloudless' still contains 'cloud'"},
 		{"local-model", []string{"llama3.2"}, true, "no 'cloud' = local"},
 		{"mixed", []string{"cloud-model", "llama3.2"}, true, "one local model = hasLocalModel true"},
 		{"all-cloud", []string{"cloud-a", "cloud-b"}, false, "all contain 'cloud'"},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := slices.ContainsFunc(tt.models, func(m string) bool {
 				return !strings.Contains(m, "cloud")
 			})
 			if got != tt.want {
 				t.Errorf("hasLocalModel(%v) = %v, want %v (%s)", tt.models, got, tt.want, tt.reason)
 			}
 		})
 	}
 }
 func TestConfigCmd_NilHeartbeat(t *testing.T) {
 	// This should not panic - cmd creation should work even with nil
 	cmd := ConfigCmd(nil)
 	if cmd == nil {
 		t.Fatal("ConfigCmd returned nil")
 	}
 	// PreRunE should be nil when passed nil
 	if cmd.PreRunE != nil {
 		t.Log("Note: PreRunE is set even when nil is passed (acceptable)")
 	}
 }
 func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
 	for name, r := range integrations {
 		t.Run(name, func(t *testing.T) {
 			// Test String() doesn't panic and returns non-empty
 			displayName := r.String()
 			if displayName == "" {
 				t.Error("String() should not return empty")
 			}
 			// Test Run() exists (we can't call it without actually running the command)
 			// Just verify the method is available
 			var _ func(string) error = r.Run
 		})
 	}
 }
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -1,203 +0,0 @@
 package config
 import (
 	"encoding/json"
 	"fmt"
 	"maps"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"slices"
 	"strings"
 )
 // OpenCode implements Runner and Editor for OpenCode integration
 type OpenCode struct{}
 func (o *OpenCode) String() string { return "OpenCode" }
 func (o *OpenCode) Run(model string) error {
 	if _, err := exec.LookPath("opencode"); err != nil {
 		return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
 	}
 	// Call Edit() to ensure config is up-to-date before launch
 	models := []string{model}
 	if config, err := loadIntegration("opencode"); err == nil && len(config.Models) > 0 {
 		models = config.Models
 	}
 	if err := o.Edit(models); err != nil {
 		return fmt.Errorf("setup failed: %w", err)
 	}
 	cmd := exec.Command("opencode")
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	return cmd.Run()
 }
 func (o *OpenCode) Paths() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	var paths []string
 	p := filepath.Join(home, ".config", "opencode", "opencode.json")
 	if _, err := os.Stat(p); err == nil {
 		paths = append(paths, p)
 	}
 	sp := filepath.Join(home, ".local", "state", "opencode", "model.json")
 	if _, err := os.Stat(sp); err == nil {
 		paths = append(paths, sp)
 	}
 	return paths
 }
 func (o *OpenCode) Edit(modelList []string) error {
 	if len(modelList) == 0 {
 		return nil
 	}
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return err
 	}
 	configPath := filepath.Join(home, ".config", "opencode", "opencode.json")
 	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
 		return err
 	}
 	config := make(map[string]any)
 	if data, err := os.ReadFile(configPath); err == nil {
 		_ = json.Unmarshal(data, &config) // Ignore parse errors; treat missing/corrupt files as empty
 	}
 	config["$schema"] = "https://opencode.ai/config.json"
 	provider, ok := config["provider"].(map[string]any)
 	if !ok {
 		provider = make(map[string]any)
 	}
 	ollama, ok := provider["ollama"].(map[string]any)
 	if !ok {
 		ollama = map[string]any{
 			"npm":  "@ai-sdk/openai-compatible",
 			"name": "Ollama (local)",
 			"options": map[string]any{
 				"baseURL": "http://localhost:11434/v1",
 			},
 		}
 	}
 	models, ok := ollama["models"].(map[string]any)
 	if !ok {
 		models = make(map[string]any)
 	}
 	selectedSet := make(map[string]bool)
 	for _, m := range modelList {
 		selectedSet[m] = true
 	}
 	for name, cfg := range models {
 		if cfgMap, ok := cfg.(map[string]any); ok {
 			if displayName, ok := cfgMap["name"].(string); ok {
 				if strings.HasSuffix(displayName, "[Ollama]") && !selectedSet[name] {
 					delete(models, name)
 				}
 			}
 		}
 	}
 	for _, model := range modelList {
 		models[model] = map[string]any{
 			"name": fmt.Sprintf("%s [Ollama]", model),
 		}
 	}
 	ollama["models"] = models
 	provider["ollama"] = ollama
 	config["provider"] = provider
 	configData, err := json.MarshalIndent(config, "", "  ")
 	if err != nil {
 		return err
 	}
 	if err := writeWithBackup(configPath, configData); err != nil {
 		return err
 	}
 	statePath := filepath.Join(home, ".local", "state", "opencode", "model.json")
 	if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
 		return err
 	}
 	state := map[string]any{
 		"recent":   []any{},
 		"favorite": []any{},
 		"variant":  map[string]any{},
 	}
 	if data, err := os.ReadFile(statePath); err == nil {
 		_ = json.Unmarshal(data, &state) // Ignore parse errors; use defaults
 	}
 	recent, _ := state["recent"].([]any)
 	modelSet := make(map[string]bool)
 	for _, m := range modelList {
 		modelSet[m] = true
 	}
 	// Filter out existing Ollama models we're about to re-add
 	newRecent := slices.DeleteFunc(slices.Clone(recent), func(entry any) bool {
 		e, ok := entry.(map[string]any)
 		if !ok || e["providerID"] != "ollama" {
 			return false
 		}
 		modelID, _ := e["modelID"].(string)
 		return modelSet[modelID]
 	})
 	// Prepend models in reverse order so first model ends up first
 	for _, model := range slices.Backward(modelList) {
 		newRecent = slices.Insert(newRecent, 0, any(map[string]any{
 			"providerID": "ollama",
 			"modelID":    model,
 		}))
 	}
 	const maxRecentModels = 10
 	newRecent = newRecent[:min(len(newRecent), maxRecentModels)]
 	state["recent"] = newRecent
 	stateData, err := json.MarshalIndent(state, "", "  ")
 	if err != nil {
 		return err
 	}
 	return writeWithBackup(statePath, stateData)
 }
 func (o *OpenCode) Models() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
 		return nil
 	}
 	config, err := readJSONFile(filepath.Join(home, ".config", "opencode", "opencode.json"))
 	if err != nil {
 		return nil
 	}
 	provider, _ := config["provider"].(map[string]any)
 	ollama, _ := provider["ollama"].(map[string]any)
 	models, _ := ollama["models"].(map[string]any)
 	if len(models) == 0 {
 		return nil
 	}
 	keys := slices.Collect(maps.Keys(models))
 	slices.Sort(keys)
 	return keys
 }
--- a/cmd/config/opencode_test.go
+++ b/cmd/config/opencode_test.go
@@ -1,437 +0,0 @@
 package config
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 )
 func TestOpenCodeIntegration(t *testing.T) {
 	o := &OpenCode{}
 	t.Run("String", func(t *testing.T) {
 		if got := o.String(); got != "OpenCode" {
 			t.Errorf("String() = %q, want %q", got, "OpenCode")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = o
 	})
 	t.Run("implements Editor", func(t *testing.T) {
 		var _ Editor = o
 	})
 }
 func TestOpenCodeEdit(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".config", "opencode")
 	configPath := filepath.Join(configDir, "opencode.json")
 	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
 	statePath := filepath.Join(stateDir, "model.json")
 	cleanup := func() {
 		os.RemoveAll(configDir)
 		os.RemoveAll(stateDir)
 	}
 	t.Run("fresh install", func(t *testing.T) {
 		cleanup()
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
 	})
 	t.Run("preserve other providers", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(configDir, 0o755)
 		os.WriteFile(configPath, []byte(`{"provider":{"anthropic":{"apiKey":"xxx"}}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		var cfg map[string]any
 		json.Unmarshal(data, &cfg)
 		provider := cfg["provider"].(map[string]any)
 		if provider["anthropic"] == nil {
 			t.Error("anthropic provider was removed")
 		}
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 	})
 	t.Run("preserve other models", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(configDir, 0o755)
 		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"mistral":{"name":"Mistral"}}}}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		assertOpenCodeModelExists(t, configPath, "mistral")
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 	})
 	t.Run("update existing model", func(t *testing.T) {
 		cleanup()
 		o.Edit([]string{"llama3.2"})
 		o.Edit([]string{"llama3.2"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 	})
 	t.Run("preserve top-level keys", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(configDir, 0o755)
 		os.WriteFile(configPath, []byte(`{"theme":"dark","keybindings":{}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(configPath)
 		var cfg map[string]any
 		json.Unmarshal(data, &cfg)
 		if cfg["theme"] != "dark" {
 			t.Error("theme was removed")
 		}
 		if cfg["keybindings"] == nil {
 			t.Error("keybindings was removed")
 		}
 	})
 	t.Run("model state - insert at index 0", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(stateDir, 0o755)
 		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
 		assertOpenCodeRecentModel(t, statePath, 1, "anthropic", "claude")
 	})
 	t.Run("model state - preserve favorites and variants", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(stateDir, 0o755)
 		os.WriteFile(statePath, []byte(`{"recent":[],"favorite":[{"providerID":"x","modelID":"y"}],"variant":{"a":"b"}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(statePath)
 		var state map[string]any
 		json.Unmarshal(data, &state)
 		if len(state["favorite"].([]any)) != 1 {
 			t.Error("favorite was modified")
 		}
 		if state["variant"].(map[string]any)["a"] != "b" {
 			t.Error("variant was modified")
 		}
 	})
 	t.Run("model state - deduplicate on re-add", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(stateDir, 0o755)
 		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"ollama","modelID":"llama3.2"},{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
 		if err := o.Edit([]string{"llama3.2"}); err != nil {
 			t.Fatal(err)
 		}
 		data, _ := os.ReadFile(statePath)
 		var state map[string]any
 		json.Unmarshal(data, &state)
 		recent := state["recent"].([]any)
 		if len(recent) != 2 {
 			t.Errorf("expected 2 recent entries, got %d", len(recent))
 		}
 		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
 	})
 	t.Run("remove model", func(t *testing.T) {
 		cleanup()
 		// First add two models
 		o.Edit([]string{"llama3.2", "mistral"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeModelExists(t, configPath, "mistral")
 		// Then remove one by only selecting the other
 		o.Edit([]string{"llama3.2"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeModelNotExists(t, configPath, "mistral")
 	})
 	t.Run("remove model preserves non-ollama models", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(configDir, 0o755)
 		// Add a non-Ollama model manually
 		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"external":{"name":"External Model"}}}}}`), 0o644)
 		o.Edit([]string{"llama3.2"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeModelExists(t, configPath, "external") // Should be preserved
 	})
 }
 func assertOpenCodeModelExists(t *testing.T, path, model string) {
 	t.Helper()
 	data, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Fatal(err)
 	}
 	provider, ok := cfg["provider"].(map[string]any)
 	if !ok {
 		t.Fatal("provider not found")
 	}
 	ollama, ok := provider["ollama"].(map[string]any)
 	if !ok {
 		t.Fatal("ollama provider not found")
 	}
 	models, ok := ollama["models"].(map[string]any)
 	if !ok {
 		t.Fatal("models not found")
 	}
 	if models[model] == nil {
 		t.Errorf("model %s not found", model)
 	}
 }
 func assertOpenCodeModelNotExists(t *testing.T, path, model string) {
 	t.Helper()
 	data, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Fatal(err)
 	}
 	provider, ok := cfg["provider"].(map[string]any)
 	if !ok {
 		return // No provider means no model
 	}
 	ollama, ok := provider["ollama"].(map[string]any)
 	if !ok {
 		return // No ollama means no model
 	}
 	models, ok := ollama["models"].(map[string]any)
 	if !ok {
 		return // No models means no model
 	}
 	if models[model] != nil {
 		t.Errorf("model %s should not exist but was found", model)
 	}
 }
 func assertOpenCodeRecentModel(t *testing.T, path string, index int, providerID, modelID string) {
 	t.Helper()
 	data, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var state map[string]any
 	if err := json.Unmarshal(data, &state); err != nil {
 		t.Fatal(err)
 	}
 	recent, ok := state["recent"].([]any)
 	if !ok {
 		t.Fatal("recent not found")
 	}
 	if index >= len(recent) {
 		t.Fatalf("index %d out of range (len=%d)", index, len(recent))
 	}
 	entry, ok := recent[index].(map[string]any)
 	if !ok {
 		t.Fatal("entry is not a map")
 	}
 	if entry["providerID"] != providerID {
 		t.Errorf("expected providerID %s, got %s", providerID, entry["providerID"])
 	}
 	if entry["modelID"] != modelID {
 		t.Errorf("expected modelID %s, got %s", modelID, entry["modelID"])
 	}
 }
 // Edge case tests for opencode.go
 func TestOpenCodeEdit_CorruptedConfigJSON(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".config", "opencode")
 	configPath := filepath.Join(configDir, "opencode.json")
 	os.MkdirAll(configDir, 0o755)
 	os.WriteFile(configPath, []byte(`{corrupted json content`), 0o644)
 	// Should not panic - corrupted JSON should be treated as empty
 	err := o.Edit([]string{"llama3.2"})
 	if err != nil {
 		t.Fatalf("Edit failed with corrupted config: %v", err)
 	}
 	// Verify valid JSON was created
 	data, _ := os.ReadFile(configPath)
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Errorf("resulting config is not valid JSON: %v", err)
 	}
 }
 func TestOpenCodeEdit_CorruptedStateJSON(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
 	statePath := filepath.Join(stateDir, "model.json")
 	os.MkdirAll(stateDir, 0o755)
 	os.WriteFile(statePath, []byte(`{corrupted state`), 0o644)
 	err := o.Edit([]string{"llama3.2"})
 	if err != nil {
 		t.Fatalf("Edit failed with corrupted state: %v", err)
 	}
 	// Verify valid state was created
 	data, _ := os.ReadFile(statePath)
 	var state map[string]any
 	if err := json.Unmarshal(data, &state); err != nil {
 		t.Errorf("resulting state is not valid JSON: %v", err)
 	}
 }
 func TestOpenCodeEdit_WrongTypeProvider(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".config", "opencode")
 	configPath := filepath.Join(configDir, "opencode.json")
 	os.MkdirAll(configDir, 0o755)
 	os.WriteFile(configPath, []byte(`{"provider": "not a map"}`), 0o644)
 	err := o.Edit([]string{"llama3.2"})
 	if err != nil {
 		t.Fatalf("Edit with wrong type provider failed: %v", err)
 	}
 	// Verify provider is now correct type
 	data, _ := os.ReadFile(configPath)
 	var cfg map[string]any
 	json.Unmarshal(data, &cfg)
 	provider, ok := cfg["provider"].(map[string]any)
 	if !ok {
 		t.Fatalf("provider should be map after setup, got %T", cfg["provider"])
 	}
 	if provider["ollama"] == nil {
 		t.Error("ollama provider should be created")
 	}
 }
 func TestOpenCodeEdit_WrongTypeRecent(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
 	statePath := filepath.Join(stateDir, "model.json")
 	os.MkdirAll(stateDir, 0o755)
 	os.WriteFile(statePath, []byte(`{"recent": "not an array", "favorite": [], "variant": {}}`), 0o644)
 	err := o.Edit([]string{"llama3.2"})
 	if err != nil {
 		t.Fatalf("Edit with wrong type recent failed: %v", err)
 	}
 	// The function should handle this gracefully
 	data, _ := os.ReadFile(statePath)
 	var state map[string]any
 	json.Unmarshal(data, &state)
 	// recent should be properly set after setup
 	recent, ok := state["recent"].([]any)
 	if !ok {
 		t.Logf("Note: recent type after setup is %T (documenting behavior)", state["recent"])
 	} else if len(recent) == 0 {
 		t.Logf("Note: recent is empty (documenting behavior)")
 	}
 }
 func TestOpenCodeEdit_EmptyModels(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	configDir := filepath.Join(tmpDir, ".config", "opencode")
 	configPath := filepath.Join(configDir, "opencode.json")
 	os.MkdirAll(configDir, 0o755)
 	originalContent := `{"provider":{"ollama":{"models":{"existing":{}}}}}`
 	os.WriteFile(configPath, []byte(originalContent), 0o644)
 	// Empty models should be no-op
 	err := o.Edit([]string{})
 	if err != nil {
 		t.Fatalf("Edit with empty models failed: %v", err)
 	}
 	// Original content should be preserved (file not modified)
 	data, _ := os.ReadFile(configPath)
 	if string(data) != originalContent {
 		t.Errorf("empty models should not modify file, but content changed")
 	}
 }
 func TestOpenCodeEdit_SpecialCharsInModelName(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	// Model name with special characters (though unusual)
 	specialModel := `model-with-"quotes"`
 	err := o.Edit([]string{specialModel})
 	if err != nil {
 		t.Fatalf("Edit with special chars failed: %v", err)
 	}
 	// Verify it was stored correctly
 	configDir := filepath.Join(tmpDir, ".config", "opencode")
 	configPath := filepath.Join(configDir, "opencode.json")
 	data, _ := os.ReadFile(configPath)
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Fatalf("resulting config is invalid JSON: %v", err)
 	}
 	// Model should be accessible
 	provider, _ := cfg["provider"].(map[string]any)
 	ollama, _ := provider["ollama"].(map[string]any)
 	models, _ := ollama["models"].(map[string]any)
 	if models[specialModel] == nil {
 		t.Errorf("model with special chars not found in config")
 	}
 }
 func TestOpenCodeModels_NoConfig(t *testing.T) {
 	o := &OpenCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	models := o.Models()
 	if len(models) > 0 {
 		t.Errorf("expected nil/empty for missing config, got %v", models)
 	}
 }
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -1,499 +0,0 @@
 package config
 import (
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"strings"
 	"golang.org/x/term"
 )
 // ANSI escape sequences for terminal formatting.
 const (
 	ansiHideCursor = "\033[?25l"
 	ansiShowCursor = "\033[?25h"
 	ansiBold       = "\033[1m"
 	ansiReset      = "\033[0m"
 	ansiGray       = "\033[37m"
 	ansiClearDown  = "\033[J"
 )
 const maxDisplayedItems = 10
 var errCancelled = errors.New("cancelled")
 type selectItem struct {
 	Name        string
 	Description string
 }
 type inputEvent int
 const (
 	eventNone inputEvent = iota
 	eventEnter
 	eventEscape
 	eventUp
 	eventDown
 	eventTab
 	eventBackspace
 	eventChar
 )
 type selectState struct {
 	items        []selectItem
 	filter       string
 	selected     int
 	scrollOffset int
 }
 func newSelectState(items []selectItem) *selectState {
 	return &selectState{items: items}
 }
 func (s *selectState) filtered() []selectItem {
 	return filterItems(s.items, s.filter)
 }
 func (s *selectState) handleInput(event inputEvent, char byte) (done bool, result string, err error) {
 	filtered := s.filtered()
 	switch event {
 	case eventEnter:
 		if len(filtered) > 0 && s.selected < len(filtered) {
 			return true, filtered[s.selected].Name, nil
 		}
 	case eventEscape:
 		return true, "", errCancelled
 	case eventBackspace:
 		if len(s.filter) > 0 {
 			s.filter = s.filter[:len(s.filter)-1]
 			s.selected = 0
 			s.scrollOffset = 0
 		}
 	case eventUp:
 		if s.selected > 0 {
 			s.selected--
 			if s.selected < s.scrollOffset {
 				s.scrollOffset = s.selected
 			}
 		}
 	case eventDown:
 		if s.selected < len(filtered)-1 {
 			s.selected++
 			if s.selected >= s.scrollOffset+maxDisplayedItems {
 				s.scrollOffset = s.selected - maxDisplayedItems + 1
 			}
 		}
 	case eventChar:
 		s.filter += string(char)
 		s.selected = 0
 		s.scrollOffset = 0
 	}
 	return false, "", nil
 }
 type multiSelectState struct {
 	items         []selectItem
 	itemIndex     map[string]int
 	filter        string
 	highlighted   int
 	scrollOffset  int
 	checked       map[int]bool
 	checkOrder    []int
 	focusOnButton bool
 }
 func newMultiSelectState(items []selectItem, preChecked []string) *multiSelectState {
 	s := &multiSelectState{
 		items:     items,
 		itemIndex: make(map[string]int, len(items)),
 		checked:   make(map[int]bool),
 	}
 	for i, item := range items {
 		s.itemIndex[item.Name] = i
 	}
 	for _, name := range preChecked {
 		if idx, ok := s.itemIndex[name]; ok {
 			s.checked[idx] = true
 			s.checkOrder = append(s.checkOrder, idx)
 		}
 	}
 	return s
 }
 func (s *multiSelectState) filtered() []selectItem {
 	return filterItems(s.items, s.filter)
 }
 func (s *multiSelectState) toggleItem() {
 	filtered := s.filtered()
 	if len(filtered) == 0 || s.highlighted >= len(filtered) {
 		return
 	}
 	item := filtered[s.highlighted]
 	origIdx := s.itemIndex[item.Name]
 	if s.checked[origIdx] {
 		delete(s.checked, origIdx)
 		for i, idx := range s.checkOrder {
 			if idx == origIdx {
 				s.checkOrder = append(s.checkOrder[:i], s.checkOrder[i+1:]...)
 				break
 			}
 		}
 	} else {
 		s.checked[origIdx] = true
 		s.checkOrder = append(s.checkOrder, origIdx)
 	}
 }
 func (s *multiSelectState) handleInput(event inputEvent, char byte) (done bool, result []string, err error) {
 	filtered := s.filtered()
 	switch event {
 	case eventEnter:
 		if s.focusOnButton && len(s.checkOrder) > 0 {
 			var res []string
 			for _, idx := range s.checkOrder {
 				res = append(res, s.items[idx].Name)
 			}
 			return true, res, nil
 		} else if !s.focusOnButton {
 			s.toggleItem()
 		}
 	case eventTab:
 		if len(s.checkOrder) > 0 {
 			s.focusOnButton = !s.focusOnButton
 		}
 	case eventEscape:
 		return true, nil, errCancelled
 	case eventBackspace:
 		if len(s.filter) > 0 {
 			s.filter = s.filter[:len(s.filter)-1]
 			s.highlighted = 0
 			s.scrollOffset = 0
 			s.focusOnButton = false
 		}
 	case eventUp:
 		if s.focusOnButton {
 			s.focusOnButton = false
 		} else if s.highlighted > 0 {
 			s.highlighted--
 			if s.highlighted < s.scrollOffset {
 				s.scrollOffset = s.highlighted
 			}
 		}
 	case eventDown:
 		if s.focusOnButton {
 			s.focusOnButton = false
 		} else if s.highlighted < len(filtered)-1 {
 			s.highlighted++
 			if s.highlighted >= s.scrollOffset+maxDisplayedItems {
 				s.scrollOffset = s.highlighted - maxDisplayedItems + 1
 			}
 		}
 	case eventChar:
 		s.filter += string(char)
 		s.highlighted = 0
 		s.scrollOffset = 0
 		s.focusOnButton = false
 	}
 	return false, nil, nil
 }
 func (s *multiSelectState) selectedCount() int {
 	return len(s.checkOrder)
 }
 // Terminal I/O handling
 type terminalState struct {
 	fd       int
 	oldState *term.State
 }
 func enterRawMode() (*terminalState, error) {
 	fd := int(os.Stdin.Fd())
 	oldState, err := term.MakeRaw(fd)
 	if err != nil {
 		return nil, err
 	}
 	fmt.Fprint(os.Stderr, ansiHideCursor)
 	return &terminalState{fd: fd, oldState: oldState}, nil
 }
 func (t *terminalState) restore() {
 	fmt.Fprint(os.Stderr, ansiShowCursor)
 	term.Restore(t.fd, t.oldState)
 }
 func clearLines(n int) {
 	if n > 0 {
 		fmt.Fprintf(os.Stderr, "\033[%dA", n)
 		fmt.Fprint(os.Stderr, ansiClearDown)
 	}
 }
 func parseInput(r io.Reader) (inputEvent, byte, error) {
 	buf := make([]byte, 3)
 	n, err := r.Read(buf)
 	if err != nil {
 		return 0, 0, err
 	}
 	switch {
 	case n == 1 && buf[0] == 13:
 		return eventEnter, 0, nil
 	case n == 1 && (buf[0] == 3 || buf[0] == 27):
 		return eventEscape, 0, nil
 	case n == 1 && buf[0] == 9:
 		return eventTab, 0, nil
 	case n == 1 && buf[0] == 127:
 		return eventBackspace, 0, nil
 	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 65:
 		return eventUp, 0, nil
 	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 66:
 		return eventDown, 0, nil
 	case n == 1 && buf[0] >= 32 && buf[0] < 127:
 		return eventChar, buf[0], nil
 	}
 	return eventNone, 0, nil
 }
 // Rendering
 func renderSelect(w io.Writer, prompt string, s *selectState) int {
 	filtered := s.filtered()
 	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1
 	if len(filtered) == 0 {
 		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
 		lineCount++
 	} else {
 		displayCount := min(len(filtered), maxDisplayedItems)
 		for i := range displayCount {
 			idx := s.scrollOffset + i
 			if idx >= len(filtered) {
 				break
 			}
 			item := filtered[idx]
 			prefix := "    "
 			if idx == s.selected {
 				prefix = "  " + ansiBold + "> "
 			}
 			if item.Description != "" {
 				fmt.Fprintf(w, "%s%s%s %s- %s%s\r\n", prefix, item.Name, ansiReset, ansiGray, item.Description, ansiReset)
 			} else {
 				fmt.Fprintf(w, "%s%s%s\r\n", prefix, item.Name, ansiReset)
 			}
 			lineCount++
 		}
 		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
 			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
 			lineCount++
 		}
 	}
 	return lineCount
 }
 func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 	filtered := s.filtered()
 	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1
 	if len(filtered) == 0 {
 		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
 		lineCount++
 	} else {
 		displayCount := min(len(filtered), maxDisplayedItems)
 		for i := range displayCount {
 			idx := s.scrollOffset + i
 			if idx >= len(filtered) {
 				break
 			}
 			item := filtered[idx]
 			origIdx := s.itemIndex[item.Name]
 			checkbox := "[ ]"
 			if s.checked[origIdx] {
 				checkbox = "[x]"
 			}
 			prefix := "  "
 			suffix := ""
 			if idx == s.highlighted && !s.focusOnButton {
 				prefix = "> "
 			}
 			if len(s.checkOrder) > 0 && s.checkOrder[0] == origIdx {
 				suffix = " " + ansiGray + "(default)" + ansiReset
 			}
 			if idx == s.highlighted && !s.focusOnButton {
 				fmt.Fprintf(w, "  %s%s %s %s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, suffix)
 			} else {
 				fmt.Fprintf(w, "  %s %s %s%s\r\n", prefix, checkbox, item.Name, suffix)
 			}
 			lineCount++
 		}
 		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
 			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
 			lineCount++
 		}
 	}
 	fmt.Fprintf(w, "\r\n")
 	lineCount++
 	count := s.selectedCount()
 	switch {
 	case count == 0:
 		fmt.Fprintf(w, "  %sSelect at least one model.%s\r\n", ansiGray, ansiReset)
 	case s.focusOnButton:
 		fmt.Fprintf(w, "  %s> [ Continue ]%s %s(%d selected)%s\r\n", ansiBold, ansiReset, ansiGray, count, ansiReset)
 	default:
 		fmt.Fprintf(w, "    %s[ Continue ] (%d selected) - press Tab%s\r\n", ansiGray, count, ansiReset)
 	}
 	lineCount++
 	return lineCount
 }
 // selectPrompt prompts the user to select a single item from a list.
 func selectPrompt(prompt string, items []selectItem) (string, error) {
 	if len(items) == 0 {
 		return "", fmt.Errorf("no items to select from")
 	}
 	ts, err := enterRawMode()
 	if err != nil {
 		return "", err
 	}
 	defer ts.restore()
 	state := newSelectState(items)
 	var lastLineCount int
 	render := func() {
 		clearLines(lastLineCount)
 		lastLineCount = renderSelect(os.Stderr, prompt, state)
 	}
 	render()
 	for {
 		event, char, err := parseInput(os.Stdin)
 		if err != nil {
 			return "", err
 		}
 		done, result, err := state.handleInput(event, char)
 		if done {
 			clearLines(lastLineCount)
 			if err != nil {
 				return "", err
 			}
 			return result, nil
 		}
 		render()
 	}
 }
 // multiSelectPrompt prompts the user to select multiple items from a list.
 func multiSelectPrompt(prompt string, items []selectItem, preChecked []string) ([]string, error) {
 	if len(items) == 0 {
 		return nil, fmt.Errorf("no items to select from")
 	}
 	ts, err := enterRawMode()
 	if err != nil {
 		return nil, err
 	}
 	defer ts.restore()
 	state := newMultiSelectState(items, preChecked)
 	var lastLineCount int
 	render := func() {
 		clearLines(lastLineCount)
 		lastLineCount = renderMultiSelect(os.Stderr, prompt, state)
 	}
 	render()
 	for {
 		event, char, err := parseInput(os.Stdin)
 		if err != nil {
 			return nil, err
 		}
 		done, result, err := state.handleInput(event, char)
 		if done {
 			clearLines(lastLineCount)
 			if err != nil {
 				return nil, err
 			}
 			return result, nil
 		}
 		render()
 	}
 }
 func confirmPrompt(prompt string) (bool, error) {
 	fd := int(os.Stdin.Fd())
 	oldState, err := term.MakeRaw(fd)
 	if err != nil {
 		return false, err
 	}
 	defer term.Restore(fd, oldState)
 	fmt.Fprintf(os.Stderr, "%s [y/n] ", prompt)
 	buf := make([]byte, 1)
 	for {
 		if _, err := os.Stdin.Read(buf); err != nil {
 			return false, err
 		}
 		switch buf[0] {
 		case 'Y', 'y', 13:
 			fmt.Fprintf(os.Stderr, "yes\r\n")
 			return true, nil
 		case 'N', 'n', 27, 3:
 			fmt.Fprintf(os.Stderr, "no\r\n")
 			return false, nil
 		}
 	}
 }
 func filterItems(items []selectItem, filter string) []selectItem {
 	if filter == "" {
 		return items
 	}
 	var result []selectItem
 	filterLower := strings.ToLower(filter)
 	for _, item := range items {
 		if strings.Contains(strings.ToLower(item.Name), filterLower) {
 			result = append(result, item)
 		}
 	}
 	return result
 }
--- a/cmd/config/selector_test.go
+++ b/cmd/config/selector_test.go
@@ -1,913 +0,0 @@
 package config
 import (
 	"bytes"
 	"strings"
 	"testing"
 )
 func TestFilterItems(t *testing.T) {
 	items := []selectItem{
 		{Name: "llama3.2:latest"},
 		{Name: "qwen2.5:7b"},
 		{Name: "deepseek-v3:cloud"},
 		{Name: "GPT-OSS:20b"},
 	}
 	t.Run("EmptyFilter_ReturnsAllItems", func(t *testing.T) {
 		result := filterItems(items, "")
 		if len(result) != len(items) {
 			t.Errorf("expected %d items, got %d", len(items), len(result))
 		}
 	})
 	t.Run("CaseInsensitive_UppercaseFilterMatchesLowercase", func(t *testing.T) {
 		result := filterItems(items, "LLAMA")
 		if len(result) != 1 || result[0].Name != "llama3.2:latest" {
 			t.Errorf("expected llama3.2:latest, got %v", result)
 		}
 	})
 	t.Run("CaseInsensitive_LowercaseFilterMatchesUppercase", func(t *testing.T) {
 		result := filterItems(items, "gpt")
 		if len(result) != 1 || result[0].Name != "GPT-OSS:20b" {
 			t.Errorf("expected GPT-OSS:20b, got %v", result)
 		}
 	})
 	t.Run("PartialMatch", func(t *testing.T) {
 		result := filterItems(items, "deep")
 		if len(result) != 1 || result[0].Name != "deepseek-v3:cloud" {
 			t.Errorf("expected deepseek-v3:cloud, got %v", result)
 		}
 	})
 	t.Run("NoMatch_ReturnsEmpty", func(t *testing.T) {
 		result := filterItems(items, "nonexistent")
 		if len(result) != 0 {
 			t.Errorf("expected 0 items, got %d", len(result))
 		}
 	})
 }
 func TestSelectState(t *testing.T) {
 	items := []selectItem{
 		{Name: "item1"},
 		{Name: "item2"},
 		{Name: "item3"},
 	}
 	t.Run("InitialState", func(t *testing.T) {
 		s := newSelectState(items)
 		if s.selected != 0 {
 			t.Errorf("expected selected=0, got %d", s.selected)
 		}
 		if s.filter != "" {
 			t.Errorf("expected empty filter, got %q", s.filter)
 		}
 		if s.scrollOffset != 0 {
 			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
 		}
 	})
 	t.Run("Enter_SelectsCurrentItem", func(t *testing.T) {
 		s := newSelectState(items)
 		done, result, err := s.handleInput(eventEnter, 0)
 		if !done || result != "item1" || err != nil {
 			t.Errorf("expected (true, item1, nil), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("Enter_WithFilter_SelectsFilteredItem", func(t *testing.T) {
 		s := newSelectState(items)
 		s.filter = "item3"
 		done, result, err := s.handleInput(eventEnter, 0)
 		if !done || result != "item3" || err != nil {
 			t.Errorf("expected (true, item3, nil), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("Enter_EmptyFilteredList_DoesNothing", func(t *testing.T) {
 		s := newSelectState(items)
 		s.filter = "nonexistent"
 		done, result, err := s.handleInput(eventEnter, 0)
 		if done || result != "" || err != nil {
 			t.Errorf("expected (false, '', nil), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
 		s := newSelectState(items)
 		done, result, err := s.handleInput(eventEscape, 0)
 		if !done || result != "" || err != errCancelled {
 			t.Errorf("expected (true, '', errCancelled), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("Down_MovesSelection", func(t *testing.T) {
 		s := newSelectState(items)
 		s.handleInput(eventDown, 0)
 		if s.selected != 1 {
 			t.Errorf("expected selected=1, got %d", s.selected)
 		}
 	})
 	t.Run("Down_AtBottom_StaysAtBottom", func(t *testing.T) {
 		s := newSelectState(items)
 		s.selected = 2
 		s.handleInput(eventDown, 0)
 		if s.selected != 2 {
 			t.Errorf("expected selected=2 (stayed at bottom), got %d", s.selected)
 		}
 	})
 	t.Run("Up_MovesSelection", func(t *testing.T) {
 		s := newSelectState(items)
 		s.selected = 2
 		s.handleInput(eventUp, 0)
 		if s.selected != 1 {
 			t.Errorf("expected selected=1, got %d", s.selected)
 		}
 	})
 	t.Run("Up_AtTop_StaysAtTop", func(t *testing.T) {
 		s := newSelectState(items)
 		s.handleInput(eventUp, 0)
 		if s.selected != 0 {
 			t.Errorf("expected selected=0 (stayed at top), got %d", s.selected)
 		}
 	})
 	t.Run("Char_AppendsToFilter", func(t *testing.T) {
 		s := newSelectState(items)
 		s.handleInput(eventChar, 'i')
 		s.handleInput(eventChar, 't')
 		s.handleInput(eventChar, 'e')
 		s.handleInput(eventChar, 'm')
 		s.handleInput(eventChar, '2')
 		if s.filter != "item2" {
 			t.Errorf("expected filter='item2', got %q", s.filter)
 		}
 		filtered := s.filtered()
 		if len(filtered) != 1 || filtered[0].Name != "item2" {
 			t.Errorf("expected [item2], got %v", filtered)
 		}
 	})
 	t.Run("Char_ResetsSelectionToZero", func(t *testing.T) {
 		s := newSelectState(items)
 		s.selected = 2
 		s.handleInput(eventChar, 'x')
 		if s.selected != 0 {
 			t.Errorf("expected selected=0 after typing, got %d", s.selected)
 		}
 	})
 	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
 		s := newSelectState(items)
 		s.filter = "test"
 		s.handleInput(eventBackspace, 0)
 		if s.filter != "tes" {
 			t.Errorf("expected filter='tes', got %q", s.filter)
 		}
 	})
 	t.Run("Backspace_EmptyFilter_DoesNothing", func(t *testing.T) {
 		s := newSelectState(items)
 		s.handleInput(eventBackspace, 0)
 		if s.filter != "" {
 			t.Errorf("expected filter='', got %q", s.filter)
 		}
 	})
 	t.Run("Backspace_ResetsSelectionToZero", func(t *testing.T) {
 		s := newSelectState(items)
 		s.filter = "test"
 		s.selected = 2
 		s.handleInput(eventBackspace, 0)
 		if s.selected != 0 {
 			t.Errorf("expected selected=0 after backspace, got %d", s.selected)
 		}
 	})
 	t.Run("Scroll_DownPastVisibleItems_ScrollsViewport", func(t *testing.T) {
 		// maxDisplayedItems is 10, so with 15 items we need to scroll
 		manyItems := make([]selectItem, 15)
 		for i := range manyItems {
 			manyItems[i] = selectItem{Name: string(rune('a' + i))}
 		}
 		s := newSelectState(manyItems)
 		// move down 12 times (past the 10-item viewport)
 		for range 12 {
 			s.handleInput(eventDown, 0)
 		}
 		if s.selected != 12 {
 			t.Errorf("expected selected=12, got %d", s.selected)
 		}
 		if s.scrollOffset != 3 {
 			t.Errorf("expected scrollOffset=3 (12-10+1), got %d", s.scrollOffset)
 		}
 	})
 	t.Run("Scroll_UpPastScrollOffset_ScrollsViewport", func(t *testing.T) {
 		manyItems := make([]selectItem, 15)
 		for i := range manyItems {
 			manyItems[i] = selectItem{Name: string(rune('a' + i))}
 		}
 		s := newSelectState(manyItems)
 		s.selected = 5
 		s.scrollOffset = 5
 		s.handleInput(eventUp, 0)
 		if s.selected != 4 {
 			t.Errorf("expected selected=4, got %d", s.selected)
 		}
 		if s.scrollOffset != 4 {
 			t.Errorf("expected scrollOffset=4, got %d", s.scrollOffset)
 		}
 	})
 }
 func TestMultiSelectState(t *testing.T) {
 	items := []selectItem{
 		{Name: "item1"},
 		{Name: "item2"},
 		{Name: "item3"},
 	}
 	t.Run("InitialState_NoPrechecked", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		if s.highlighted != 0 {
 			t.Errorf("expected highlighted=0, got %d", s.highlighted)
 		}
 		if s.selectedCount() != 0 {
 			t.Errorf("expected 0 selected, got %d", s.selectedCount())
 		}
 		if s.focusOnButton {
 			t.Error("expected focusOnButton=false initially")
 		}
 	})
 	t.Run("InitialState_WithPrechecked", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item2", "item3"})
 		if s.selectedCount() != 2 {
 			t.Errorf("expected 2 selected, got %d", s.selectedCount())
 		}
 		if !s.checked[1] || !s.checked[2] {
 			t.Error("expected item2 and item3 to be checked")
 		}
 	})
 	t.Run("Prechecked_PreservesSelectionOrder", func(t *testing.T) {
 		// order matters: first checked = default model
 		s := newMultiSelectState(items, []string{"item3", "item1"})
 		if len(s.checkOrder) != 2 {
 			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
 		}
 		if s.checkOrder[0] != 2 || s.checkOrder[1] != 0 {
 			t.Errorf("expected checkOrder=[2,0] (item3 first), got %v", s.checkOrder)
 		}
 	})
 	t.Run("Prechecked_IgnoresInvalidNames", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1", "nonexistent"})
 		if s.selectedCount() != 1 {
 			t.Errorf("expected 1 selected (nonexistent ignored), got %d", s.selectedCount())
 		}
 	})
 	t.Run("Toggle_ChecksUncheckedItem", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.toggleItem()
 		if !s.checked[0] {
 			t.Error("expected item1 to be checked after toggle")
 		}
 	})
 	t.Run("Toggle_UnchecksCheckedItem", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		s.toggleItem()
 		if s.checked[0] {
 			t.Error("expected item1 to be unchecked after toggle")
 		}
 	})
 	t.Run("Toggle_RemovesFromCheckOrder", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1", "item2", "item3"})
 		s.highlighted = 1 // toggle item2
 		s.toggleItem()
 		if len(s.checkOrder) != 2 {
 			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
 		}
 		// should be [0, 2] (item1, item3) with item2 removed
 		if s.checkOrder[0] != 0 || s.checkOrder[1] != 2 {
 			t.Errorf("expected checkOrder=[0,2], got %v", s.checkOrder)
 		}
 	})
 	t.Run("Enter_TogglesWhenNotOnButton", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.handleInput(eventEnter, 0)
 		if !s.checked[0] {
 			t.Error("expected item1 to be checked after enter")
 		}
 	})
 	t.Run("Enter_OnButton_ReturnsSelection", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item2", "item1"})
 		s.focusOnButton = true
 		done, result, err := s.handleInput(eventEnter, 0)
 		if !done || err != nil {
 			t.Errorf("expected done=true, err=nil, got done=%v, err=%v", done, err)
 		}
 		// result should preserve selection order
 		if len(result) != 2 || result[0] != "item2" || result[1] != "item1" {
 			t.Errorf("expected [item2, item1], got %v", result)
 		}
 	})
 	t.Run("Enter_OnButton_EmptySelection_DoesNothing", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.focusOnButton = true
 		done, result, err := s.handleInput(eventEnter, 0)
 		if done || result != nil || err != nil {
 			t.Errorf("expected (false, nil, nil), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("Tab_SwitchesToButton_WhenHasSelection", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		s.handleInput(eventTab, 0)
 		if !s.focusOnButton {
 			t.Error("expected focus on button after tab")
 		}
 	})
 	t.Run("Tab_DoesNothing_WhenNoSelection", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.handleInput(eventTab, 0)
 		if s.focusOnButton {
 			t.Error("tab should not focus button when nothing selected")
 		}
 	})
 	t.Run("Tab_TogglesButtonFocus", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		s.handleInput(eventTab, 0)
 		if !s.focusOnButton {
 			t.Error("expected focus on button after first tab")
 		}
 		s.handleInput(eventTab, 0)
 		if s.focusOnButton {
 			t.Error("expected focus back on list after second tab")
 		}
 	})
 	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		done, result, err := s.handleInput(eventEscape, 0)
 		if !done || result != nil || err != errCancelled {
 			t.Errorf("expected (true, nil, errCancelled), got (%v, %v, %v)", done, result, err)
 		}
 	})
 	t.Run("IsDefault_TrueForFirstChecked", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item2", "item1"})
 		if !(len(s.checkOrder) > 0 && s.checkOrder[0] == 1) {
 			t.Error("expected item2 (idx 1) to be default (first checked)")
 		}
 		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
 			t.Error("expected item1 (idx 0) to NOT be default")
 		}
 	})
 	t.Run("IsDefault_FalseWhenNothingChecked", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
 			t.Error("expected isDefault=false when nothing checked")
 		}
 	})
 	t.Run("Down_MovesHighlight", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.handleInput(eventDown, 0)
 		if s.highlighted != 1 {
 			t.Errorf("expected highlighted=1, got %d", s.highlighted)
 		}
 	})
 	t.Run("Up_MovesHighlight", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.highlighted = 1
 		s.handleInput(eventUp, 0)
 		if s.highlighted != 0 {
 			t.Errorf("expected highlighted=0, got %d", s.highlighted)
 		}
 	})
 	t.Run("Arrow_ReturnsFocusFromButton", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		s.focusOnButton = true
 		s.handleInput(eventDown, 0)
 		if s.focusOnButton {
 			t.Error("expected focus to return to list on arrow key")
 		}
 	})
 	t.Run("Char_AppendsToFilter", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.handleInput(eventChar, 'x')
 		if s.filter != "x" {
 			t.Errorf("expected filter='x', got %q", s.filter)
 		}
 	})
 	t.Run("Char_ResetsHighlightAndScroll", func(t *testing.T) {
 		manyItems := make([]selectItem, 15)
 		for i := range manyItems {
 			manyItems[i] = selectItem{Name: string(rune('a' + i))}
 		}
 		s := newMultiSelectState(manyItems, nil)
 		s.highlighted = 10
 		s.scrollOffset = 5
 		s.handleInput(eventChar, 'x')
 		if s.highlighted != 0 {
 			t.Errorf("expected highlighted=0, got %d", s.highlighted)
 		}
 		if s.scrollOffset != 0 {
 			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
 		}
 	})
 	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		s.filter = "test"
 		s.handleInput(eventBackspace, 0)
 		if s.filter != "tes" {
 			t.Errorf("expected filter='tes', got %q", s.filter)
 		}
 	})
 	t.Run("Backspace_RemovesFocusFromButton", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		s.filter = "x"
 		s.focusOnButton = true
 		s.handleInput(eventBackspace, 0)
 		if s.focusOnButton {
 			t.Error("expected focusOnButton=false after backspace")
 		}
 	})
 }
 func TestParseInput(t *testing.T) {
 	t.Run("Enter", func(t *testing.T) {
 		event, char, err := parseInput(bytes.NewReader([]byte{13}))
 		if err != nil || event != eventEnter || char != 0 {
 			t.Errorf("expected (eventEnter, 0, nil), got (%v, %v, %v)", event, char, err)
 		}
 	})
 	t.Run("Escape", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{27}))
 		if err != nil || event != eventEscape {
 			t.Errorf("expected eventEscape, got %v", event)
 		}
 	})
 	t.Run("CtrlC_TreatedAsEscape", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{3}))
 		if err != nil || event != eventEscape {
 			t.Errorf("expected eventEscape for Ctrl+C, got %v", event)
 		}
 	})
 	t.Run("Tab", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{9}))
 		if err != nil || event != eventTab {
 			t.Errorf("expected eventTab, got %v", event)
 		}
 	})
 	t.Run("Backspace", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{127}))
 		if err != nil || event != eventBackspace {
 			t.Errorf("expected eventBackspace, got %v", event)
 		}
 	})
 	t.Run("UpArrow", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 65}))
 		if err != nil || event != eventUp {
 			t.Errorf("expected eventUp, got %v", event)
 		}
 	})
 	t.Run("DownArrow", func(t *testing.T) {
 		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 66}))
 		if err != nil || event != eventDown {
 			t.Errorf("expected eventDown, got %v", event)
 		}
 	})
 	t.Run("PrintableChars", func(t *testing.T) {
 		tests := []struct {
 			name string
 			char byte
 		}{
 			{"lowercase", 'a'},
 			{"uppercase", 'Z'},
 			{"digit", '5'},
 			{"space", ' '},
 			{"tilde", '~'},
 		}
 		for _, tt := range tests {
 			t.Run(tt.name, func(t *testing.T) {
 				event, char, err := parseInput(bytes.NewReader([]byte{tt.char}))
 				if err != nil || event != eventChar || char != tt.char {
 					t.Errorf("expected (eventChar, %q), got (%v, %q)", tt.char, event, char)
 				}
 			})
 		}
 	})
 }
 func TestRenderSelect(t *testing.T) {
 	items := []selectItem{
 		{Name: "item1", Description: "first item"},
 		{Name: "item2"},
 	}
 	t.Run("ShowsPromptAndItems", func(t *testing.T) {
 		s := newSelectState(items)
 		var buf bytes.Buffer
 		lineCount := renderSelect(&buf, "Select:", s)
 		output := buf.String()
 		if !strings.Contains(output, "Select:") {
 			t.Error("expected prompt in output")
 		}
 		if !strings.Contains(output, "item1") {
 			t.Error("expected item1 in output")
 		}
 		if !strings.Contains(output, "first item") {
 			t.Error("expected description in output")
 		}
 		if !strings.Contains(output, "item2") {
 			t.Error("expected item2 in output")
 		}
 		if lineCount != 3 { // 1 prompt + 2 items
 			t.Errorf("expected 3 lines, got %d", lineCount)
 		}
 	})
 	t.Run("EmptyFilteredList_ShowsNoMatches", func(t *testing.T) {
 		s := newSelectState(items)
 		s.filter = "xyz"
 		var buf bytes.Buffer
 		renderSelect(&buf, "Select:", s)
 		if !strings.Contains(buf.String(), "no matches") {
 			t.Error("expected 'no matches' message")
 		}
 	})
 	t.Run("LongList_ShowsRemainingCount", func(t *testing.T) {
 		manyItems := make([]selectItem, 15)
 		for i := range manyItems {
 			manyItems[i] = selectItem{Name: string(rune('a' + i))}
 		}
 		s := newSelectState(manyItems)
 		var buf bytes.Buffer
 		renderSelect(&buf, "Select:", s)
 		// 15 items - 10 displayed = 5 more
 		if !strings.Contains(buf.String(), "5 more") {
 			t.Error("expected '5 more' indicator")
 		}
 	})
 }
 func TestRenderMultiSelect(t *testing.T) {
 	items := []selectItem{
 		{Name: "item1"},
 		{Name: "item2"},
 	}
 	t.Run("ShowsCheckboxes", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		var buf bytes.Buffer
 		renderMultiSelect(&buf, "Select:", s)
 		output := buf.String()
 		if !strings.Contains(output, "[x]") {
 			t.Error("expected checked checkbox [x]")
 		}
 		if !strings.Contains(output, "[ ]") {
 			t.Error("expected unchecked checkbox [ ]")
 		}
 	})
 	t.Run("ShowsDefaultMarker", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1"})
 		var buf bytes.Buffer
 		renderMultiSelect(&buf, "Select:", s)
 		if !strings.Contains(buf.String(), "(default)") {
 			t.Error("expected (default) marker for first checked item")
 		}
 	})
 	t.Run("ShowsSelectedCount", func(t *testing.T) {
 		s := newMultiSelectState(items, []string{"item1", "item2"})
 		var buf bytes.Buffer
 		renderMultiSelect(&buf, "Select:", s)
 		if !strings.Contains(buf.String(), "2 selected") {
 			t.Error("expected '2 selected' in output")
 		}
 	})
 	t.Run("NoSelection_ShowsHelperText", func(t *testing.T) {
 		s := newMultiSelectState(items, nil)
 		var buf bytes.Buffer
 		renderMultiSelect(&buf, "Select:", s)
 		if !strings.Contains(buf.String(), "Select at least one") {
 			t.Error("expected 'Select at least one' helper text")
 		}
 	})
 }
 func TestErrCancelled(t *testing.T) {
 	t.Run("NotNil", func(t *testing.T) {
 		if errCancelled == nil {
 			t.Error("errCancelled should not be nil")
 		}
 	})
 	t.Run("Message", func(t *testing.T) {
 		if errCancelled.Error() != "cancelled" {
 			t.Errorf("expected 'cancelled', got %q", errCancelled.Error())
 		}
 	})
 }
 // Edge case tests for selector.go
 // TestSelectState_SingleItem verifies that single item list works without crash.
 // List with only one item should still work.
 func TestSelectState_SingleItem(t *testing.T) {
 	items := []selectItem{{Name: "only-one"}}
 	s := newSelectState(items)
 	// Down should do nothing (already at bottom)
 	s.handleInput(eventDown, 0)
 	if s.selected != 0 {
 		t.Errorf("down on single item: expected selected=0, got %d", s.selected)
 	}
 	// Up should do nothing (already at top)
 	s.handleInput(eventUp, 0)
 	if s.selected != 0 {
 		t.Errorf("up on single item: expected selected=0, got %d", s.selected)
 	}
 	// Enter should select the only item
 	done, result, err := s.handleInput(eventEnter, 0)
 	if !done || result != "only-one" || err != nil {
 		t.Errorf("enter on single item: expected (true, 'only-one', nil), got (%v, %q, %v)", done, result, err)
 	}
 }
 // TestSelectState_ExactlyMaxItems verifies boundary condition at maxDisplayedItems.
 // List with exactly maxDisplayedItems items should not scroll.
 func TestSelectState_ExactlyMaxItems(t *testing.T) {
 	items := make([]selectItem, maxDisplayedItems)
 	for i := range items {
 		items[i] = selectItem{Name: string(rune('a' + i))}
 	}
 	s := newSelectState(items)
 	// Move to last item
 	for range maxDisplayedItems - 1 {
 		s.handleInput(eventDown, 0)
 	}
 	if s.selected != maxDisplayedItems-1 {
 		t.Errorf("expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
 	}
 	// Should not scroll when exactly at max
 	if s.scrollOffset != 0 {
 		t.Errorf("expected scrollOffset=0 for exactly maxDisplayedItems, got %d", s.scrollOffset)
 	}
 	// One more down should do nothing
 	s.handleInput(eventDown, 0)
 	if s.selected != maxDisplayedItems-1 {
 		t.Errorf("down at max: expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
 	}
 }
 // TestFilterItems_RegexSpecialChars verifies that filter is literal, not regex.
 // User typing "model.v1" shouldn't match "modelsv1".
 func TestFilterItems_RegexSpecialChars(t *testing.T) {
 	items := []selectItem{
 		{Name: "model.v1"},
 		{Name: "modelsv1"},
 		{Name: "model-v1"},
 	}
 	// Filter with dot should only match literal dot
 	result := filterItems(items, "model.v1")
 	if len(result) != 1 {
 		t.Errorf("expected 1 exact match, got %d", len(result))
 	}
 	if len(result) > 0 && result[0].Name != "model.v1" {
 		t.Errorf("expected 'model.v1', got %s", result[0].Name)
 	}
 	// Other regex special chars should be literal too
 	items2 := []selectItem{
 		{Name: "test[0]"},
 		{Name: "test0"},
 		{Name: "test(1)"},
 	}
 	result2 := filterItems(items2, "test[0]")
 	if len(result2) != 1 || result2[0].Name != "test[0]" {
 		t.Errorf("expected only 'test[0]', got %v", result2)
 	}
 }
 // TestMultiSelectState_DuplicateNames documents handling of duplicate item names.
 // itemIndex uses name as key - duplicates cause collision. This documents
 // the current behavior: the last index for a duplicate name is stored
 func TestMultiSelectState_DuplicateNames(t *testing.T) {
 	// Duplicate names - this is an edge case that shouldn't happen in practice
 	items := []selectItem{
 		{Name: "duplicate"},
 		{Name: "duplicate"},
 		{Name: "unique"},
 	}
 	s := newMultiSelectState(items, nil)
 	// DOCUMENTED BEHAVIOR: itemIndex maps name to LAST index
 	// When there are duplicates, only the last occurrence's index is stored
 	if s.itemIndex["duplicate"] != 1 {
 		t.Errorf("itemIndex should map 'duplicate' to last index (1), got %d", s.itemIndex["duplicate"])
 	}
 	// Toggle item at highlighted=0 (first "duplicate")
 	// Due to name collision, toggleItem uses itemIndex["duplicate"] = 1
 	// So it actually toggles the SECOND duplicate item, not the first
 	s.toggleItem()
 	// This documents the potentially surprising behavior:
 	// We toggled at highlighted=0, but itemIndex lookup returned 1
 	if !s.checked[1] {
 		t.Error("toggle should check index 1 (due to name collision in itemIndex)")
 	}
 	if s.checked[0] {
 		t.Log("Note: index 0 is NOT checked, even though highlighted=0 (name collision behavior)")
 	}
 }
 // TestSelectState_FilterReducesBelowSelection verifies selection resets when filter reduces list.
 // Prevents index-out-of-bounds on next keystroke
 func TestSelectState_FilterReducesBelowSelection(t *testing.T) {
 	items := []selectItem{
 		{Name: "apple"},
 		{Name: "banana"},
 		{Name: "cherry"},
 	}
 	s := newSelectState(items)
 	s.selected = 2 // Select "cherry"
 	// Type a filter that removes cherry from results
 	s.handleInput(eventChar, 'a') // Filter to "a" - matches "apple" and "banana"
 	// Selection should reset to 0
 	if s.selected != 0 {
 		t.Errorf("expected selected=0 after filter, got %d", s.selected)
 	}
 	filtered := s.filtered()
 	if len(filtered) != 2 {
 		t.Errorf("expected 2 filtered items, got %d", len(filtered))
 	}
 }
 // TestFilterItems_UnicodeCharacters verifies filtering works with UTF-8.
 // Model names might contain unicode characters
 func TestFilterItems_UnicodeCharacters(t *testing.T) {
 	items := []selectItem{
 		{Name: "llama-日本語"},
 		{Name: "模型-chinese"},
 		{Name: "émoji-🦙"},
 		{Name: "regular-model"},
 	}
 	t.Run("filter japanese", func(t *testing.T) {
 		result := filterItems(items, "日本")
 		if len(result) != 1 || result[0].Name != "llama-日本語" {
 			t.Errorf("expected llama-日本語, got %v", result)
 		}
 	})
 	t.Run("filter chinese", func(t *testing.T) {
 		result := filterItems(items, "模型")
 		if len(result) != 1 || result[0].Name != "模型-chinese" {
 			t.Errorf("expected 模型-chinese, got %v", result)
 		}
 	})
 	t.Run("filter emoji", func(t *testing.T) {
 		result := filterItems(items, "🦙")
 		if len(result) != 1 || result[0].Name != "émoji-🦙" {
 			t.Errorf("expected émoji-🦙, got %v", result)
 		}
 	})
 	t.Run("filter accented char", func(t *testing.T) {
 		result := filterItems(items, "émoji")
 		if len(result) != 1 || result[0].Name != "émoji-🦙" {
 			t.Errorf("expected émoji-🦙, got %v", result)
 		}
 	})
 }
 // TestMultiSelectState_FilterReducesBelowHighlight verifies highlight resets when filter reduces list.
 func TestMultiSelectState_FilterReducesBelowHighlight(t *testing.T) {
 	items := []selectItem{
 		{Name: "apple"},
 		{Name: "banana"},
 		{Name: "cherry"},
 	}
 	s := newMultiSelectState(items, nil)
 	s.highlighted = 2 // Highlight "cherry"
 	// Type a filter that removes cherry
 	s.handleInput(eventChar, 'a')
 	if s.highlighted != 0 {
 		t.Errorf("expected highlighted=0 after filter, got %d", s.highlighted)
 	}
 }
 // TestMultiSelectState_EmptyItems verifies handling of empty item list.
 // Empty list should be handled gracefully.
 func TestMultiSelectState_EmptyItems(t *testing.T) {
 	s := newMultiSelectState([]selectItem{}, nil)
 	// Toggle should not panic on empty list
 	s.toggleItem()
 	if s.selectedCount() != 0 {
 		t.Errorf("expected 0 selected for empty list, got %d", s.selectedCount())
 	}
 	// Render should handle empty list
 	var buf bytes.Buffer
 	lineCount := renderMultiSelect(&buf, "Select:", s)
 	if lineCount == 0 {
 		t.Error("renderMultiSelect should produce output even for empty list")
 	}
 	if !strings.Contains(buf.String(), "no matches") {
 		t.Error("expected 'no matches' for empty list")
 	}
 }
 // TestSelectState_RenderWithDescriptions verifies rendering items with descriptions.
 func TestSelectState_RenderWithDescriptions(t *testing.T) {
 	items := []selectItem{
 		{Name: "item1", Description: "First item description"},
 		{Name: "item2", Description: ""},
 		{Name: "item3", Description: "Third item"},
 	}
 	s := newSelectState(items)
 	var buf bytes.Buffer
 	renderSelect(&buf, "Select:", s)
 	output := buf.String()
 	if !strings.Contains(output, "First item description") {
 		t.Error("expected description to be rendered")
 	}
 	if !strings.Contains(output, "item2") {
 		t.Error("expected item without description to be rendered")
 	}
 }
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -116,7 +116,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		Prompt:         ">>> ",
 		AltPrompt:      "... ",
 		Placeholder:    "Send a message (/? for help)",
-		AltPlaceholder: "Press Enter to send",
+		AltPlaceholder: `Use """ to end multi-line input`,
 	})
 	if err != nil {
 		return err
@@ -159,7 +159,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			sb.WriteString(before)
 			if !ok {
 				fmt.Fprintln(&sb)
 				scanner.Prompt.UseAlt = true
 				continue
 			}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -311,10 +311,6 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &deepseekocr{}
 	case "DeepseekV3ForCausalLM":
 		conv = &deepseek2Model{}
 	case "Glm4MoeLiteForCausalLM":
 		conv = &glm4MoeLiteModel{}
 	case "Lfm2ForCausalLM":
 		conv = &lfm2Model{}
 	default:
 		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
--- a/convert/convert_glm4moelite.go
+++ b/convert/convert_glm4moelite.go
@@ -1,150 +0,0 @@
 package convert
 import (
 	"cmp"
 	"fmt"
 	"log/slog"
 	"regexp"
 	"strconv"
 	"github.com/ollama/ollama/fs/ggml"
 )
 type glm4MoeLiteModel struct {
 	ModelParameters
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	HiddenLayers          uint32  `json:"num_hidden_layers"`
 	IntermediateSize      uint32  `json:"intermediate_size"`
 	NumAttentionHeads     uint32  `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
 	RMSNormEPS            float32 `json:"rms_norm_eps"`
 	RopeTheta     float32 `json:"rope_theta"`
 	QKNopeHeadDim uint32  `json:"qk_nope_head_dim"`
 	QKRopeHeadDim uint32  `json:"qk_rope_head_dim"`
 	KVLoraRank    uint32  `json:"kv_lora_rank"`
 	QLoraRank     uint32  `json:"q_lora_rank"`
 	VHeadDim      uint32  `json:"v_head_dim"`
 	ExpertCount            uint32  `json:"n_routed_experts"`
 	ExpertSharedCount      uint32  `json:"n_shared_experts"`
 	ExpertIntermediateSize uint32  `json:"moe_intermediate_size"`
 	ExpertUsedCount        uint32  `json:"num_experts_per_tok"`
 	ExpertWeightsNorm      bool    `json:"norm_topk_prob"`
 	ExpertWeightsScale     float32 `json:"routed_scaling_factor"`
 	LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
 }
 func (p *glm4MoeLiteModel) KV(t *Tokenizer) KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "glm4moelite"
 	kv["general.type"] = "model"
 	kv["glm4moelite.block_count"] = p.HiddenLayers
 	numHeads := p.NumAttentionHeads
 	numKVHeads := p.NumKeyValueHeads
 	kv["glm4moelite.attention.head_count"] = numHeads
 	kv["glm4moelite.attention.head_count_kv"] = numKVHeads
 	kv["glm4moelite.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
 	kv["glm4moelite.attention.kv_lora_rank"] = p.KVLoraRank
 	kv["glm4moelite.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
 	kv["glm4moelite.attention.q_lora_rank"] = p.QLoraRank
 	kv["glm4moelite.attention.value_length"] = p.VHeadDim
 	kv["glm4moelite.context_length"] = p.MaxPositionEmbeddings
 	kv["glm4moelite.embedding_length"] = p.HiddenSize
 	kv["glm4moelite.expert_count"] = p.ExpertCount
 	kv["glm4moelite.expert_feed_forward_length"] = p.ExpertIntermediateSize
 	kv["glm4moelite.expert_shared_count"] = p.ExpertSharedCount
 	kv["glm4moelite.expert_gating_func"] = uint32(2)
 	kv["glm4moelite.expert_used_count"] = p.ExpertUsedCount
 	kv["glm4moelite.expert_weights_norm"] = p.ExpertWeightsNorm
 	kv["glm4moelite.expert_weights_scale"] = p.ExpertWeightsScale
 	kv["glm4moelite.feed_forward_length"] = p.IntermediateSize
 	kv["glm4moelite.leading_dense_block_count"] = p.LeadingDenseBlockCount
 	kv["glm4moelite.rope.dimension_count"] = p.QKRopeHeadDim
 	kv["glm4moelite.rope.freq_base"] = cmp.Or(p.RopeTheta, float32(1000000.0))
 	kv["tokenizer.ggml.pre"] = "glm4"
 	return kv
 }
 func (p *glm4MoeLiteModel) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",
 		"input_layernorm", "attn_norm",
 		"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
 		"self_attn.kv_a_layernorm", "attn_kv_a_norm",
 		"self_attn.kv_b_proj", "attn_kv_b",
 		"self_attn.q_a_proj", "attn_q_a",
 		"self_attn.q_a_layernorm", "attn_q_a_norm",
 		"self_attn.q_b_proj", "attn_q_b",
 		"self_attn.o_proj", "attn_output",
 		"post_attention_layernorm", "ffn_norm",
 		"mlp.shared_experts.down_proj", "ffn_down_shexp",
 		"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
 		"mlp.shared_experts.up_proj", "ffn_up_shexp",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
 		"mlp.gate", "ffn_gate_inp",
 	}
 }
 func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
 	merges := make([]merge, p.HiddenLayers*3)
 	for i := range p.HiddenLayers {
 		merges[i*3+0] = merge{
 			fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
 			fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
 		}
 		merges[i*3+1] = merge{
 			fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
 			fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
 		}
 		merges[i*3+2] = merge{
 			fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
 			fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
 		}
 	}
 	skipLayer := func(n string, minValue uint32) bool {
 		re := regexp.MustCompile(`^blk\.(\d+)`)
 		matches := re.FindStringSubmatch(n)
 		if matches == nil {
 			return false
 		}
 		blkNum, err := strconv.Atoi(matches[1])
 		if err != nil {
 			return false
 		}
 		return uint32(blkNum) >= minValue
 	}
 	out, s = mergeTensors(s, merges...)
 	for _, t := range s {
 		// skip any additional layers (such as the Multi-Token Prediction layer)
 		if skipLayer(t.Name(), p.HiddenLayers) {
 			slog.Debug("skipping layer", "name", t.Name())
 			continue
 		}
 		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
 		})
 	}
 	return out
 }
--- a/convert/convert_lfm2.go
+++ b/convert/convert_lfm2.go
@@ -1,100 +0,0 @@
 package convert
 import (
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/fs/ggml"
 )
 type lfm2Model struct {
 	ModelParameters
 	HiddenSize            uint32   `json:"hidden_size"`
 	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
 	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
 	IntermediateSize      uint32   `json:"intermediate_size"`
 	NumAttentionHeads     uint32   `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
 	RopeTheta             float32  `json:"rope_theta"`
 	NormEps               float32  `json:"norm_eps"`
 	ConvLCache            uint32   `json:"conv_L_cache"`
 	LayerTypes            []string `json:"layer_types"`
 	TieEmbedding          bool     `json:"tie_embedding"`
 }
 var _ ModelConverter = (*lfm2Model)(nil)
 func (p *lfm2Model) KV(t *Tokenizer) KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "lfm2"
 	kv["lfm2.vocab_size"] = p.VocabSize
 	kv["lfm2.block_count"] = p.NumHiddenLayers
 	kv["lfm2.embedding_length"] = p.HiddenSize
 	kv["lfm2.feed_forward_length"] = p.IntermediateSize
 	kv["lfm2.context_length"] = p.MaxPositionEmbeddings
 	// Build per-layer KV head count array based on layer_types
 	// (0 = shortconv layer, non-zero = attention layer with that many KV heads)
 	kvHeadCounts := make([]uint32, p.NumHiddenLayers)
 	for i := range p.NumHiddenLayers {
 		if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
 			kvHeadCounts[i] = p.NumKeyValueHeads
 		}
 	}
 	kv["lfm2.attention.head_count"] = p.NumAttentionHeads
 	kv["lfm2.attention.head_count_kv"] = kvHeadCounts
 	kv["lfm2.attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
 	kv["lfm2.attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
 	kv["lfm2.attention.layer_norm_rms_epsilon"] = p.NormEps
 	kv["lfm2.rope.freq_base"] = p.RopeTheta
 	kv["lfm2.shortconv.l_cache"] = p.ConvLCache
 	return kv
 }
 func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
 	var out []*ggml.Tensor
 	for _, t := range ts {
 		shape := t.Shape()
 		// Squeeze conv weights: [D, 1, K] -> [D, K]
 		if strings.HasSuffix(t.Name(), "shortconv.conv.weight") {
 			if len(shape) == 3 && shape[1] == 1 {
 				shape = []uint64{shape[0], shape[2]}
 			}
 		}
 		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    slices.Clone(shape),
 			WriterTo: t,
 		})
 	}
 	return out
 }
 func (p *lfm2Model) Replacements() []string {
 	return []string{
 		"model.embed_tokens", "token_embd",
 		"model.embedding_norm", "output_norm",
 		"model.layers", "blk",
 		"operator_norm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.out_proj", "attn_output",
 		"self_attn.q_layernorm", "attn_q_norm",
 		"self_attn.k_layernorm", "attn_k_norm",
 		"conv.conv", "shortconv.conv",
 		"conv.in_proj", "shortconv.in_proj",
 		"conv.out_proj", "shortconv.out_proj",
 		"feed_forward.w1", "ffn_gate",
 		"feed_forward.w2", "ffn_down",
 		"feed_forward.w3", "ffn_up",
 		"ffn_norm", "ffn_norm",
 	}
 }
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -40,7 +40,6 @@ const (
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		strings.HasSuffix(t.name, ".bias") ||
 		strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.tile_position_embd.weight" ||
--- a/docs/api.md
+++ b/docs/api.md
@@ -16,7 +16,6 @@
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
 - [Version](#version)
 - [Experimental: Image Generation](#image-generation-experimental)
 ## Conventions
@@ -59,15 +58,6 @@ Advanced parameters (optional):
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 - `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
 Experimental image generation parameters (for image generation models only):
 > [!WARNING]
 > These parameters are experimental and may change in future versions.
 - `width`: width of the generated image in pixels
 - `height`: height of the generated image in pixels
 - `steps`: number of diffusion steps
 #### Structured outputs
 Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [structured outputs](#request-structured-outputs) example below.
@@ -1877,55 +1867,3 @@ curl http://localhost:11434/api/version
  "version": "0.5.1"
 }
 ```
 ## Experimental Features
 ### Image Generation (Experimental)
 > [!WARNING]
 > Image generation is experimental and may change in future versions.
 Image generation is now supported through the standard `/api/generate` endpoint when using image generation models. The API automatically detects when an image generation model is being used.
 See the [Generate a completion](#generate-a-completion) section for the full API documentation. The experimental image generation parameters (`width`, `height`, `steps`) are documented there.
 #### Example
 ##### Request
 ```shell
 curl http://localhost:11434/api/generate -d '{
  "model": "x/z-image-turbo",
  "prompt": "a sunset over mountains",
  "width": 1024,
  "height": 768
 }'
 ```
 ##### Response (streaming)
 Progress updates during generation:
 ```json
 {
  "model": "x/z-image-turbo",
  "created_at": "2024-01-15T10:30:00.000000Z",
  "completed": 5,
  "total": 20,
  "done": false
 }
 ```
 ##### Final Response
 ```json
 {
  "model": "x/z-image-turbo",
  "created_at": "2024-01-15T10:30:15.000000Z",
  "image": "iVBORw0KGgoAAAANSUhEUg...",
  "done": true,
  "done_reason": "stop",
  "total_duration": 15000000000,
  "load_duration": 2000000000
 }
 ```
--- a/docs/api/anthropic-compatibility.mdx
+++ b/docs/api/anthropic-compatibility.mdx
@@ -21,7 +21,6 @@ ollama pull glm-4.7:cloud
 To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:
 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama  # required but ignored
 export ANTHROPIC_BASE_URL=http://localhost:11434
 export ANTHROPIC_API_KEY=ollama  # required but ignored
 ```
@@ -248,13 +247,12 @@ curl -X POST http://localhost:11434/v1/messages \
 [Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend:
 ```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
 ```
 Or set the environment variables in your shell profile:
 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama
 export ANTHROPIC_BASE_URL=http://localhost:11434
 export ANTHROPIC_API_KEY=ollama
 ```
--- a/docs/api/openai-compatibility.mdx
+++ b/docs/api/openai-compatibility.mdx
@@ -275,73 +275,6 @@ curl -X POST http://localhost:11434/v1/chat/completions \
 - [x] `dimensions`
 - [ ] `user`
 ### `/v1/images/generations` (experimental)
 > Note: This endpoint is experimental and may change or be removed in future versions.
 Generate images using image generation models.
 <CodeGroup dropdown>
 ```python images.py
 from openai import OpenAI
 client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',  # required but ignored
 )
 response = client.images.generate(
    model='x/z-image-turbo',
    prompt='A cute robot learning to paint',
    size='1024x1024',
    response_format='b64_json',
 )
 print(response.data[0].b64_json[:50] + '...')
 ```
 ```javascript images.js
 import OpenAI from "openai";
 const openai = new OpenAI({
  baseURL: "http://localhost:11434/v1/",
  apiKey: "ollama", // required but ignored
 });
 const response = await openai.images.generate({
  model: "x/z-image-turbo",
  prompt: "A cute robot learning to paint",
  size: "1024x1024",
  response_format: "b64_json",
 });
 console.log(response.data[0].b64_json.slice(0, 50) + "...");
 ```
 ```shell images.sh
 curl -X POST http://localhost:11434/v1/images/generations \
 -H "Content-Type: application/json" \
 -d '{
  "model": "x/z-image-turbo",
  "prompt": "A cute robot learning to paint",
  "size": "1024x1024",
  "response_format": "b64_json"
 }'
 ```
 </CodeGroup>
 #### Supported request fields
 - [x] `model`
 - [x] `prompt`
 - [x] `size` (e.g. "1024x1024")
 - [x] `response_format` (only `b64_json` supported)
 - [ ] `n`
 - [ ] `quality`
 - [ ] `style`
 - [ ] `user`
 ### `/v1/responses`
 > Note: Added in Ollama v0.13.3
--- a/docs/capabilities/web-search.mdx
+++ b/docs/capabilities/web-search.mdx
@@ -110,7 +110,7 @@ More Ollama [Python example](https://github.com/ollama/ollama-python/blob/main/e
 import { Ollama } from "ollama";
 const client = new Ollama();
-const results = await client.webSearch("what is ollama?");
+const results = await client.webSearch({ query: "what is ollama?" });
 console.log(JSON.stringify(results, null, 2));
 ```
@@ -213,7 +213,7 @@ models](https://ollama.com/models)\n\nAvailable for macOS, Windows, and Linux',
 import { Ollama } from "ollama";
 const client = new Ollama();
-const fetchResult = await client.webFetch("https://ollama.com");
+const fetchResult = await client.webFetch({ url: "https://ollama.com" });
 console.log(JSON.stringify(fetchResult, null, 2));
 ```
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -111,9 +111,7 @@
              "/integrations/zed",
              "/integrations/roo-code",
              "/integrations/n8n",
-              "/integrations/xcode",
+              "/integrations/xcode"
              "/integrations/onyx",
              "/integrations/marimo"
            ]
          },
          {
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -22,7 +22,7 @@ Please refer to the [GPU docs](./gpu).
 ## How can I specify the context window size?
-By default, Ollama uses a context window size of 4096 tokens.
+By default, Ollama uses a context window size of 2048 tokens.
 This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
--- a/docs/images/marimo-add-model.png
+++ b/docs/images/marimo-add-model.png
--- a/docs/images/marimo-chat.png
+++ b/docs/images/marimo-chat.png
--- a/docs/images/marimo-code-completion.png
+++ b/docs/images/marimo-code-completion.png
--- a/docs/images/marimo-models.png
+++ b/docs/images/marimo-models.png
--- a/docs/images/marimo-settings.png
+++ b/docs/images/marimo-settings.png
--- a/docs/images/onyx-login.png
+++ b/docs/images/onyx-login.png
--- a/docs/images/onyx-ollama-form.png
+++ b/docs/images/onyx-ollama-form.png
--- a/docs/images/onyx-ollama-llm.png
+++ b/docs/images/onyx-ollama-llm.png
--- a/docs/images/onyx-query.png
+++ b/docs/images/onyx-query.png
--- a/docs/integrations/claude-code.mdx
+++ b/docs/integrations/claude-code.mdx
@@ -2,12 +2,6 @@
 title: Claude Code
 ---
 Claude Code is Anthropic's agentic coding tool that can read, modify, and execute code in your working directory. 
 Open models can be used with Claude Code through Ollama's Anthropic-compatible API, enabling you to use models such as `qwen3-coder`, `gpt-oss:20b`, or other models.
 ![Claude Code with Ollama](https://files.ollama.com/claude-code.png)
 ## Install
 Install [Claude Code](https://code.claude.com/docs/en/overview):
@@ -31,24 +25,22 @@ Claude Code connects to Ollama using the Anthropic-compatible API.
 1. Set the environment variables:
 ```shell
 export ANTHROPIC_AUTH_TOKEN=ollama
 export ANTHROPIC_BASE_URL=http://localhost:11434
 export ANTHROPIC_API_KEY=ollama
 ```
 2. Run Claude Code with an Ollama model:
 ```shell
-claude --model gpt-oss:20b
+claude --model qwen3-coder
 ```
 Or run with environment variables inline:
 ```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 claude --model gpt-oss:20b
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
 ```
 **Note:** Claude Code requires a large context window. We recommend at least 32K tokens. See the [context length documentation](/context-length) for how to adjust context length in Ollama.
 ## Connecting to ollama.com
 1. Create an [API key](https://ollama.com/settings/keys) on ollama.com
@@ -75,4 +67,3 @@ claude --model glm-4.7:cloud
 ### Local models
 - `qwen3-coder` - Excellent for coding tasks
 - `gpt-oss:20b` - Strong general-purpose model
 - `gpt-oss:120b` - Larger general-purpose model for more complex tasks
--- a/docs/integrations/marimo.mdx
+++ b/docs/integrations/marimo.mdx
@@ -1,73 +0,0 @@
 ---
 title: marimo
 ---
 ## Install
 Install [marimo](https://marimo.io). You can use `pip` or `uv` for this. You 
 can also use `uv` to create a sandboxed environment for marimo by running:
 ```
 uvx marimo edit --sandbox notebook.py
 ```
 ## Usage with Ollama
 1. In marimo, go to the user settings and go to the AI tab. From here
 you can find and configure Ollama as an AI provider. For local use you
 would typically point the base url to `http://localhost:11434/v1`.
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/marimo-settings.png" 
    alt="Ollama settings in marimo"
    width="50%"
  />
 </div>
 2. Once the AI provider is set up, you can turn on/off specific AI models you'd like to access. 
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/marimo-models.png" 
    alt="Selecting an Ollama model"
    width="50%"
  />
 </div>
 3. You can also add a model to the list of available models by scrolling to the bottom and using the UI there. 
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/marimo-add-model.png" 
    alt="Adding a new Ollama model"
    width="50%"
  />
 </div>
 4. Once configured, you can now use Ollama for AI chats in marimo.
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/marimo-chat.png" 
    alt="Configure code completion"
    width="50%"
  />
 </div>
 4. Alternatively, you can now use Ollama for **inline code completion** in marimo. This can be configured in the "AI Features" tab. 
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/marimo-code-completion.png" 
    alt="Configure code completion"
    width="50%"
  />
 </div>
 ## Connecting to ollama.com
 1. Sign in to ollama cloud via `ollama signin` 
 2. In the ollama model settings add a model that ollama hosts, like `gpt-oss:120b`.
 3. You can now refer to this model in marimo!
--- a/docs/integrations/onyx.mdx
+++ b/docs/integrations/onyx.mdx
@@ -1,63 +0,0 @@
 ---
 title: Onyx
 ---
 ## Overview
 [Onyx](http://onyx.app/) is a self-hostable Chat UI that integrates with all Ollama models. Features include:
 - Creating custom Agents
 - Web search
 - Deep Research
 - RAG over uploaded documents and connected apps
 - Connectors to applications like Google Drive, Email, Slack, etc.
 - MCP and OpenAPI Actions support
 - Image generation
 - User/Groups management, RBAC, SSO, etc.
 Onyx can be deployed for single users or large organizations.
 ## Install Onyx
 Deploy Onyx with the [quickstart guide](https://docs.onyx.app/deployment/getting_started/quickstart).
 <Info>
 Resourcing/scaling docs [here](https://docs.onyx.app/deployment/getting_started/resourcing).
 </Info>
 ## Usage with Ollama 
 1. Login to your Onyx deployment (create an account first).
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/onyx-login.png" 
    alt="Onyx Login Page"
    width="75%"
  />
 </div>
 2. In the set-up process select `Ollama` as the LLM provider.
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/onyx-ollama-llm.png" 
    alt="Onyx Set Up Form"
    width="75%"
  />
 </div>
 3. Provide your **Ollama API URL** and select your models.
 <Note>If you're running Onyx in Docker, to access your computer's local network use `http://host.docker.internal` instead of `http://127.0.0.1`.</Note>
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/onyx-ollama-form.png" 
    alt="Selecting Ollama Models"
    width="75%"
  />
 </div>
 You can also easily connect up Onyx Cloud with the `Ollama Cloud` tab of the setup.
 ## Send your first query
 <div style={{ display: 'flex', justifyContent: 'center' }}>
  <img 
    src="/images/onyx-query.png" 
    alt="Onyx Query Example"
    width="75%"
  />
 </div>
--- a/docs/linux.mdx
+++ b/docs/linux.mdx
@@ -1,5 +1,5 @@
 ---
-title: Linux
+title: "Linux"
 ---
 ## Install
@@ -13,15 +13,14 @@ curl -fsSL https://ollama.com/install.sh | sh
 ## Manual install
 <Note>
-  If you are upgrading from a prior version, you should remove the old libraries
+  If you are upgrading from a prior version, you should remove the old libraries with `sudo rm -rf /usr/lib/ollama` first.
  with `sudo rm -rf /usr/lib/ollama` first.
 </Note>
 Download and extract the package:
 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
-    | sudo tar x -C /usr
+    | sudo tar zx -C /usr
 ```
 Start Ollama:
@@ -41,8 +40,8 @@ ollama -v
 If you have an AMD GPU, also download and extract the additional ROCm package:
 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
+curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
-    | sudo tar x -C /usr
+    | sudo tar zx -C /usr
 ```
 ### ARM64 install
@@ -50,8 +49,8 @@ curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
 Download and extract the ARM64-specific package:
 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-arm64.tar.zst \
+curl -fsSL https://ollama.com/download/ollama-linux-arm64.tgz \
-    | sudo tar x -C /usr
+    | sudo tar zx -C /usr
 ```
 ### Adding Ollama as a startup service (recommended)
@@ -113,11 +112,7 @@ sudo systemctl status ollama
 ```
 <Note>
-  While AMD has contributed the `amdgpu` driver upstream to the official linux
+  While AMD has contributed the `amdgpu` driver upstream to the official linux kernel source, the version is older and may not support all ROCm features. We recommend you install the latest driver from https://www.amd.com/en/support/linux-drivers for best support of your Radeon GPU.
  kernel source, the version is older and may not support all ROCm features. We
  recommend you install the latest driver from
  https://www.amd.com/en/support/linux-drivers for best support of your Radeon
  GPU.
 </Note>
 ## Customizing
@@ -146,8 +141,8 @@ curl -fsSL https://ollama.com/install.sh | sh
 Or by re-downloading Ollama:
 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
-    | sudo tar x -C /usr
+    | sudo tar zx -C /usr
 ```
 ## Installing specific versions
@@ -196,4 +191,4 @@ Remove the downloaded models and Ollama service user and group:
 sudo userdel ollama
 sudo groupdel ollama
 sudo rm -r /usr/share/ollama
-```
+```
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -269,8 +269,6 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
 		"glm4moelite",
 		"lfm2",
 	}, kv.Architecture())
 }
@@ -858,9 +856,7 @@ func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"bert",
 		"gemma3",
 		"glm4moelite",
 		"gptoss", "gpt-oss",
 		"lfm2",
 		"mistral3",
 		"olmo3",
 		"qwen3", "qwen3moe",
--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -1,148 +0,0 @@
 //go:build integration
 package integration
 import (
 	"context"
 	"encoding/base64"
 	"fmt"
 	"strings"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 )
 func TestImageGeneration(t *testing.T) {
 	skipUnderMinVRAM(t, 8)
 	type testCase struct {
 		imageGenModel string
 		visionModel   string
 		prompt        string
 		expectedWords []string
 	}
 	testCases := []testCase{
 		{
 			imageGenModel: "jmorgan/z-image-turbo",
 			visionModel:   "llama3.2-vision",
 			prompt:        "A cartoon style llama flying like a superhero through the air with clouds in the background",
 			expectedWords: []string{"llama", "flying", "cartoon", "cloud", "sky", "superhero", "air", "animal", "camelid"},
 		},
 	}
 	for _, tc := range testCases {
 		t.Run(fmt.Sprintf("%s->%s", tc.imageGenModel, tc.visionModel), func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			// Pull both models
 			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
 				t.Fatalf("failed to pull image gen model: %v", err)
 			}
 			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
 				t.Fatalf("failed to pull vision model: %v", err)
 			}
 			// Generate the image
 			t.Logf("Generating image with prompt: %s", tc.prompt)
 			imageBase64, err := generateImage(ctx, client, tc.imageGenModel, tc.prompt)
 			if err != nil {
 				if strings.Contains(err.Error(), "image generation not available") {
 					t.Skip("Target system does not support image generation")
 				} else if strings.Contains(err.Error(), "executable file not found in") { // Windows pattern, not yet supported
 					t.Skip("Windows does not support image generation yet")
 				} else if strings.Contains(err.Error(), "CUDA driver version is insufficient") {
 					t.Skip("Driver is too old")
 				} else if strings.Contains(err.Error(), "insufficient memory for image generation") {
 					t.Skip("insufficient memory for image generation")
 				} else if strings.Contains(err.Error(), "error while loading shared libraries: libcuda.so.1") { // AMD GPU or CPU
 					t.Skip("CUDA GPU is not available")
 				} else if strings.Contains(err.Error(), "ollama-mlx: no such file or directory") {
 					// most likely linux arm - not supported yet
 					t.Skip("unsupported architecture")
 				}
 				t.Fatalf("failed to generate image: %v", err)
 			}
 			imageData, err := base64.StdEncoding.DecodeString(imageBase64)
 			if err != nil {
 				t.Fatalf("failed to decode image: %v", err)
 			}
 			t.Logf("Generated image: %d bytes", len(imageData))
 			// Preload vision model and check GPU loading
 			err = client.Generate(ctx, &api.GenerateRequest{Model: tc.visionModel}, func(response api.GenerateResponse) error { return nil })
 			if err != nil {
 				t.Fatalf("failed to load vision model: %v", err)
 			}
 			// Use vision model to describe the image
 			chatReq := api.ChatRequest{
 				Model: tc.visionModel,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "Describe this image in detail. What is shown? What style is it? What is the main subject doing?",
 						Images:  []api.ImageData{imageData},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{
 					"seed":        42,
 					"temperature": 0.0,
 				},
 			}
 			// Verify the vision model's response contains expected keywords
 			response := DoChat(ctx, t, client, chatReq, tc.expectedWords, 240*time.Second, 30*time.Second)
 			if response != nil {
 				t.Logf("Vision model response: %s", response.Content)
 				// Additional detailed check for keywords
 				content := strings.ToLower(response.Content)
 				foundWords := []string{}
 				missingWords := []string{}
 				for _, word := range tc.expectedWords {
 					if strings.Contains(content, word) {
 						foundWords = append(foundWords, word)
 					} else {
 						missingWords = append(missingWords, word)
 					}
 				}
 				t.Logf("Found keywords: %v", foundWords)
 				if len(missingWords) > 0 {
 					t.Logf("Missing keywords (at least one was found so test passed): %v", missingWords)
 				}
 			}
 		})
 	}
 }
 // generateImage calls the Ollama API to generate an image and returns the base64 image data
 func generateImage(ctx context.Context, client *api.Client, model, prompt string) (string, error) {
 	var imageBase64 string
 	err := client.Generate(ctx, &api.GenerateRequest{
 		Model:  model,
 		Prompt: prompt,
 	}, func(resp api.GenerateResponse) error {
 		if resp.Image != "" {
 			imageBase64 = resp.Image
 		}
 		return nil
 	})
 	if err != nil {
 		return "", fmt.Errorf("failed to generate image: %w", err)
 	}
 	if imageBase64 == "" {
 		return "", fmt.Errorf("no image data in response")
 	}
 	return imageBase64, nil
 }
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -131,7 +131,7 @@ func TestAPIToolCalling(t *testing.T) {
 					t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather")
 				}
-				if _, ok := lastToolCall.Function.Arguments.Get("location"); !ok {
+				if _, ok := lastToolCall.Function.Arguments["location"]; !ok {
 					t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String())
 				}
 			case <-ctx.Done():
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -38,7 +38,6 @@ var (
 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
 		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
 		"gpt-oss:20b",
@@ -144,7 +143,6 @@ var (
 		"granite3.3",
 		"hermes3",
 		"internlm2",
 		"lfm2.5-thinking",
 		"llama-guard3",
 		"llama-pro",
 		"llama2-chinese",
@@ -265,7 +263,6 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
 		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
 		"gpt-oss:120b",
--- a/llm/server.go
+++ b/llm/server.go
@@ -1464,12 +1464,6 @@ type CompletionRequest struct {
 	// TopLogprobs specifies the number of most likely alternative tokens to return (0-20)
 	TopLogprobs int
 	// Image generation fields
 	Width  int32 `json:"width,omitempty"`
 	Height int32 `json:"height,omitempty"`
 	Steps  int32 `json:"steps,omitempty"`
 	Seed   int64 `json:"seed,omitempty"`
 }
 // DoneReason represents the reason why a completion response is done
@@ -1518,15 +1512,6 @@ type CompletionResponse struct {
 	// Logprobs contains log probability information if requested
 	Logprobs []Logprob `json:"logprobs,omitempty"`
 	// Image contains base64-encoded image data for image generation
 	Image string `json:"image,omitempty"`
 	// Step is the current step in image generation
 	Step int `json:"step,omitempty"`
 	// TotalSteps is the total number of steps for image generation
 	TotalSteps int `json:"total_steps,omitempty"`
 }
 func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
--- a/manifest/paths.go
+++ b/manifest/paths.go
@@ -1,95 +0,0 @@
 package manifest
 import (
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/types/model"
 )
 var ErrInvalidDigestFormat = errors.New("invalid digest format")
 func Path() (string, error) {
 	path := filepath.Join(envconfig.Models(), "manifests")
 	if err := os.MkdirAll(path, 0o755); err != nil {
 		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
 	}
 	return path, nil
 }
 // PathForName returns the path to the manifest file for a specific model name.
 func PathForName(n model.Name) (string, error) {
 	if !n.IsValid() {
 		return "", os.ErrNotExist
 	}
 	manifests, err := Path()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(manifests, n.Filepath()), nil
 }
 func BlobsPath(digest string) (string, error) {
 	// only accept actual sha256 digests
 	pattern := "^sha256[:-][0-9a-fA-F]{64}$"
 	re := regexp.MustCompile(pattern)
 	if digest != "" && !re.MatchString(digest) {
 		return "", ErrInvalidDigestFormat
 	}
 	digest = strings.ReplaceAll(digest, ":", "-")
 	path := filepath.Join(envconfig.Models(), "blobs", digest)
 	dirPath := filepath.Dir(path)
 	if digest == "" {
 		dirPath = path
 	}
 	if err := os.MkdirAll(dirPath, 0o755); err != nil {
 		return "", fmt.Errorf("%w: ensure path elements are traversable", err)
 	}
 	return path, nil
 }
 // PruneDirectory removes empty directories recursively.
 func PruneDirectory(path string) error {
 	info, err := os.Lstat(path)
 	if err != nil {
 		return err
 	}
 	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
 		entries, err := os.ReadDir(path)
 		if err != nil {
 			return err
 		}
 		for _, entry := range entries {
 			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
 				return err
 			}
 		}
 		entries, err = os.ReadDir(path)
 		if err != nil {
 			return err
 		}
 		if len(entries) > 0 {
 			return nil
 		}
 		return os.Remove(path)
 	}
 	return nil
 }
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -8,7 +8,6 @@ import (
 	"math/rand"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/gin-gonic/gin"
@@ -442,7 +441,6 @@ type ResponsesWriter struct {
 	stream     bool
 	responseID string
 	itemID     string
 	request    openai.ResponsesRequest
 }
 func (w *ResponsesWriter) writeEvent(eventType string, data any) error {
@@ -480,9 +478,7 @@ func (w *ResponsesWriter) writeResponse(data []byte) (int, error) {
 	// Non-streaming response
 	w.ResponseWriter.Header().Set("Content-Type", "application/json")
-	response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse, w.request)
+	response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse)
 	completedAt := time.Now().Unix()
 	response.CompletedAt = &completedAt
 	return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
 }
@@ -527,12 +523,11 @@ func ResponsesMiddleware() gin.HandlerFunc {
 		w := &ResponsesWriter{
 			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
-			converter:  openai.NewResponsesStreamConverter(responseID, itemID, req.Model, req),
+			converter:  openai.NewResponsesStreamConverter(responseID, itemID, req.Model),
 			model:      req.Model,
 			stream:     streamRequested,
 			responseID: responseID,
 			itemID:     itemID,
 			request:    req,
 		}
 		// Set headers based on streaming mode
@@ -546,112 +541,3 @@ func ResponsesMiddleware() gin.HandlerFunc {
 		c.Next()
 	}
 }
 type ImageWriter struct {
 	BaseWriter
 }
 func (w *ImageWriter) writeResponse(data []byte) (int, error) {
 	var generateResponse api.GenerateResponse
 	if err := json.Unmarshal(data, &generateResponse); err != nil {
 		return 0, err
 	}
 	// Only write response when done with image
 	if generateResponse.Done && generateResponse.Image != "" {
 		w.ResponseWriter.Header().Set("Content-Type", "application/json")
 		return len(data), json.NewEncoder(w.ResponseWriter).Encode(openai.ToImageGenerationResponse(generateResponse))
 	}
 	return len(data), nil
 }
 func (w *ImageWriter) Write(data []byte) (int, error) {
 	code := w.ResponseWriter.Status()
 	if code != http.StatusOK {
 		return w.writeError(data)
 	}
 	return w.writeResponse(data)
 }
 func ImageGenerationsMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		var req openai.ImageGenerationRequest
 		if err := c.ShouldBindJSON(&req); err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
 			return
 		}
 		if req.Prompt == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
 			return
 		}
 		if req.Model == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
 			return
 		}
 		var b bytes.Buffer
 		if err := json.NewEncoder(&b).Encode(openai.FromImageGenerationRequest(req)); err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
 			return
 		}
 		c.Request.Body = io.NopCloser(&b)
 		w := &ImageWriter{
 			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
 		}
 		c.Writer = w
 		c.Next()
 	}
 }
 func ImageEditsMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		var req openai.ImageEditRequest
 		if err := c.ShouldBindJSON(&req); err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
 			return
 		}
 		if req.Prompt == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "prompt is required"))
 			return
 		}
 		if req.Model == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
 			return
 		}
 		if req.Image == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "image is required"))
 			return
 		}
 		genReq, err := openai.FromImageEditRequest(req)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
 			return
 		}
 		var b bytes.Buffer
 		if err := json.NewEncoder(&b).Encode(genReq); err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
 			return
 		}
 		c.Request.Body = io.NopCloser(&b)
 		w := &ImageWriter{
 			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
 		}
 		c.Writer = w
 		c.Next()
 	}
 }
--- a/middleware/openai_test.go
+++ b/middleware/openai_test.go
@@ -961,280 +961,3 @@ func TestRetrieveMiddleware(t *testing.T) {
 		}
 	}
 }
 func TestImageGenerationsMiddleware(t *testing.T) {
 	type testCase struct {
 		name string
 		body string
 		req  api.GenerateRequest
 		err  openai.ErrorResponse
 	}
 	var capturedRequest *api.GenerateRequest
 	testCases := []testCase{
 		{
 			name: "image generation basic",
 			body: `{
 				"model": "test-model",
 				"prompt": "a beautiful sunset"
 			}`,
 			req: api.GenerateRequest{
 				Model:  "test-model",
 				Prompt: "a beautiful sunset",
 			},
 		},
 		{
 			name: "image generation with size",
 			body: `{
 				"model": "test-model",
 				"prompt": "a beautiful sunset",
 				"size": "512x768"
 			}`,
 			req: api.GenerateRequest{
 				Model:  "test-model",
 				Prompt: "a beautiful sunset",
 				Width:  512,
 				Height: 768,
 			},
 		},
 		{
 			name: "image generation missing prompt",
 			body: `{
 				"model": "test-model"
 			}`,
 			err: openai.ErrorResponse{
 				Error: openai.Error{
 					Message: "prompt is required",
 					Type:    "invalid_request_error",
 				},
 			},
 		},
 		{
 			name: "image generation missing model",
 			body: `{
 				"prompt": "a beautiful sunset"
 			}`,
 			err: openai.ErrorResponse{
 				Error: openai.Error{
 					Message: "model is required",
 					Type:    "invalid_request_error",
 				},
 			},
 		},
 	}
 	endpoint := func(c *gin.Context) {
 		c.Status(http.StatusOK)
 	}
 	gin.SetMode(gin.TestMode)
 	router := gin.New()
 	router.Use(ImageGenerationsMiddleware(), captureRequestMiddleware(&capturedRequest))
 	router.Handle(http.MethodPost, "/api/generate", endpoint)
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
 			req.Header.Set("Content-Type", "application/json")
 			defer func() { capturedRequest = nil }()
 			resp := httptest.NewRecorder()
 			router.ServeHTTP(resp, req)
 			if tc.err.Error.Message != "" {
 				var errResp openai.ErrorResponse
 				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
 					t.Fatal(err)
 				}
 				if diff := cmp.Diff(tc.err, errResp); diff != "" {
 					t.Fatalf("errors did not match:\n%s", diff)
 				}
 				return
 			}
 			if resp.Code != http.StatusOK {
 				t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
 			}
 			if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
 				t.Fatalf("requests did not match:\n%s", diff)
 			}
 		})
 	}
 }
 func TestImageWriterResponse(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	// Test that ImageWriter transforms GenerateResponse to OpenAI format
 	endpoint := func(c *gin.Context) {
 		resp := api.GenerateResponse{
 			Model:     "test-model",
 			CreatedAt: time.Unix(1234567890, 0).UTC(),
 			Done:      true,
 			Image:     "dGVzdC1pbWFnZS1kYXRh", // base64 of "test-image-data"
 		}
 		data, _ := json.Marshal(resp)
 		c.Writer.Write(append(data, '\n'))
 	}
 	router := gin.New()
 	router.Use(ImageGenerationsMiddleware())
 	router.Handle(http.MethodPost, "/api/generate", endpoint)
 	body := `{"model": "test-model", "prompt": "test"}`
 	req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 	resp := httptest.NewRecorder()
 	router.ServeHTTP(resp, req)
 	if resp.Code != http.StatusOK {
 		t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
 	}
 	var imageResp openai.ImageGenerationResponse
 	if err := json.Unmarshal(resp.Body.Bytes(), &imageResp); err != nil {
 		t.Fatalf("failed to unmarshal response: %v", err)
 	}
 	if imageResp.Created != 1234567890 {
 		t.Errorf("expected created 1234567890, got %d", imageResp.Created)
 	}
 	if len(imageResp.Data) != 1 {
 		t.Fatalf("expected 1 image, got %d", len(imageResp.Data))
 	}
 	if imageResp.Data[0].B64JSON != "dGVzdC1pbWFnZS1kYXRh" {
 		t.Errorf("expected image data 'dGVzdC1pbWFnZS1kYXRh', got %s", imageResp.Data[0].B64JSON)
 	}
 }
 func TestImageEditsMiddleware(t *testing.T) {
 	type testCase struct {
 		name string
 		body string
 		req  api.GenerateRequest
 		err  openai.ErrorResponse
 	}
 	var capturedRequest *api.GenerateRequest
 	// Base64-encoded test image (1x1 pixel PNG)
 	testImage := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII="
 	decodedImage, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=")
 	testCases := []testCase{
 		{
 			name: "image edit basic",
 			body: `{
 				"model": "test-model",
 				"prompt": "make it blue",
 				"image": "` + testImage + `"
 			}`,
 			req: api.GenerateRequest{
 				Model:  "test-model",
 				Prompt: "make it blue",
 				Images: []api.ImageData{decodedImage},
 			},
 		},
 		{
 			name: "image edit with size",
 			body: `{
 				"model": "test-model",
 				"prompt": "make it blue",
 				"image": "` + testImage + `",
 				"size": "512x768"
 			}`,
 			req: api.GenerateRequest{
 				Model:  "test-model",
 				Prompt: "make it blue",
 				Images: []api.ImageData{decodedImage},
 				Width:  512,
 				Height: 768,
 			},
 		},
 		{
 			name: "image edit missing prompt",
 			body: `{
 				"model": "test-model",
 				"image": "` + testImage + `"
 			}`,
 			err: openai.ErrorResponse{
 				Error: openai.Error{
 					Message: "prompt is required",
 					Type:    "invalid_request_error",
 				},
 			},
 		},
 		{
 			name: "image edit missing model",
 			body: `{
 				"prompt": "make it blue",
 				"image": "` + testImage + `"
 			}`,
 			err: openai.ErrorResponse{
 				Error: openai.Error{
 					Message: "model is required",
 					Type:    "invalid_request_error",
 				},
 			},
 		},
 		{
 			name: "image edit missing image",
 			body: `{
 				"model": "test-model",
 				"prompt": "make it blue"
 			}`,
 			err: openai.ErrorResponse{
 				Error: openai.Error{
 					Message: "image is required",
 					Type:    "invalid_request_error",
 				},
 			},
 		},
 	}
 	endpoint := func(c *gin.Context) {
 		c.Status(http.StatusOK)
 	}
 	gin.SetMode(gin.TestMode)
 	router := gin.New()
 	router.Use(ImageEditsMiddleware(), captureRequestMiddleware(&capturedRequest))
 	router.Handle(http.MethodPost, "/api/generate", endpoint)
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
 			req.Header.Set("Content-Type", "application/json")
 			defer func() { capturedRequest = nil }()
 			resp := httptest.NewRecorder()
 			router.ServeHTTP(resp, req)
 			if tc.err.Error.Message != "" {
 				var errResp openai.ErrorResponse
 				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
 					t.Fatal(err)
 				}
 				if diff := cmp.Diff(tc.err, errResp); diff != "" {
 					t.Fatalf("errors did not match:\n%s", diff)
 				}
 				return
 			}
 			if resp.Code != http.StatusOK {
 				t.Fatalf("expected status 200, got %d: %s", resp.Code, resp.Body.String())
 			}
 			if diff := cmp.Diff(&tc.req, capturedRequest); diff != "" {
 				t.Fatalf("requests did not match:\n%s", diff)
 			}
 		})
 	}
 }
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -162,7 +162,6 @@ type Tensor interface {
 	AvgPool2D(ctx Context, k, s int, p float32) Tensor
 	Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
 	Conv3D(ctx Context, weight Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) Tensor
 	SSMConv(ctx Context, kernel Tensor) Tensor
 	IM2Col(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1641,13 +1641,6 @@ func (t *Tensor) Conv3D(ctx ml.Context, t2 ml.Tensor, c, s0, s1, s2, p0, p1, p2,
 	return tt
 }
 func (t *Tensor) SSMConv(ctx ml.Context, kernel ml.Tensor) ml.Tensor {
 	return &Tensor{
 		b: t.b,
 		t: C.ggml_ssm_conv(ctx.(*Context).ctx, t.t, kernel.(*Tensor).t),
 	}
 }
 func (t *Tensor) AvgPool2D(ctx ml.Context, k, s int, p float32) ml.Tensor {
 	return &Tensor{
 		b: t.b,
--- a/model/models/glm4moelite/model.go
+++ b/model/models/glm4moelite/model.go
@@ -1,304 +0,0 @@
 package glm4moelite
 import (
 	"math"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
 )
 type Options struct {
 	numExpertsUsed      int
 	numExperts          int
 	normTopKProb        bool
 	routedScalingFactor float32
 	kvLoraRank,
 	qkNopeHeadDim,
 	qkRopeHeadDim,
 	kqNopeHeadDim,
 	qkHeadDim int
 	qLoraRank int
 	vHeadDim  int
 	hiddenSize,
 	numHeads,
 	numKVHeads int
 	eps,
 	ropeBase float32
 	kqScale float64
 }
 func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, t, p ml.Tensor) ml.Tensor {
 	return nn.RoPE(ctx, t, p, o.qkRopeHeadDim, o.ropeBase, 1.0)
 }
 type Attention struct {
 	Q *nn.Linear `gguf:"attn_q"`
 	QA     *nn.Linear  `gguf:"attn_q_a"`
 	QANorm *nn.RMSNorm `gguf:"attn_q_a_norm"`
 	QB     *nn.Linear  `gguf:"attn_q_b"`
 	KVA     *nn.Linear  `gguf:"attn_kv_a_mqa"`
 	KVANorm *nn.RMSNorm `gguf:"attn_kv_a_norm"`
 	KVB     *nn.Linear  `gguf:"attn_kv_b"`
 	Output *nn.Linear `gguf:"attn_out,alt:attn_output"`
 }
 func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
 	seqLength := hiddenStates.Dim(1)
 	var query ml.Tensor
 	if opts.qLoraRank == 0 {
 		query = attn.Q.Forward(ctx, hiddenStates)
 	} else {
 		query = attn.QA.Forward(ctx, hiddenStates)
 		query = attn.QANorm.Forward(ctx, query, opts.eps)
 		query = attn.QB.Forward(ctx, query)
 	}
 	query = query.Reshape(ctx, query.Dim(0)/opts.numHeads, opts.numHeads, seqLength)
 	queryChunks := query.ChunkSections(ctx, 0, opts.qkNopeHeadDim, opts.qkRopeHeadDim)
 	compressedKV := attn.KVA.Forward(ctx, hiddenStates)
 	kPass := compressedKV.Slice(ctx, 0, 0, opts.kvLoraRank, 1)
 	kRot := compressedKV.View(ctx,
 		opts.kvLoraRank*compressedKV.Stride(0), opts.qkRopeHeadDim,
 		compressedKV.Stride(1), 1,
 		compressedKV.Stride(1), compressedKV.Dim(1),
 	)
 	qRot := opts.applyRotaryPositionEmbeddings(ctx, queryChunks[1], positions)
 	kRot = opts.applyRotaryPositionEmbeddings(ctx, kRot, positions)
 	kPass = attn.KVANorm.Forward(ctx, kPass, opts.eps)
 	kPass = attn.KVB.Forward(ctx, kPass)
 	kv := kPass.Reshape(ctx, kPass.Dim(0)/opts.numKVHeads, opts.numKVHeads, seqLength)
 	kvChunks := kv.ChunkSections(ctx, 0, opts.kqNopeHeadDim, opts.vHeadDim)
 	kRot = kRot.Repeat(ctx, 1, queryChunks[0].Dim(1))
 	query = qRot.Concat(ctx, queryChunks[0], 0)
 	key := kRot.Concat(ctx, kvChunks[0], 0)
 	attention := nn.Attention(ctx, query, key, kvChunks[1], opts.kqScale, cache)
 	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), seqLength)
 	return attn.Output.Forward(ctx, attention)
 }
 type MLP interface {
 	Forward(ml.Context, ml.Tensor, *Options) ml.Tensor
 }
 type sparse struct {
 	Router       *nn.Linear `gguf:"ffn_gate_inp"`
 	Gate         *nn.Linear `gguf:"ffn_gate_exps"`
 	Up           *nn.Linear `gguf:"ffn_up_exps"`
 	Down         *nn.Linear `gguf:"ffn_down_exps"`
 	SharedExpert *dense     `gguf:",suf:_shexp"`
 	ExpProbsBias ml.Tensor  `gguf:"exp_probs_b.bias,alt:exp_probs_b"`
 }
 func (moe *sparse) Moe(ctx ml.Context, hiddenStates, topKIndices, topKWeights ml.Tensor, opts *Options) ml.Tensor {
 	hiddenStates = hiddenStates.Reshape(ctx, hiddenStates.Dim(0), 1, hiddenStates.Dim(1))
 	upStates := moe.Up.Weight.MulmatID(ctx, hiddenStates, topKIndices)
 	hiddenStates = moe.Gate.Weight.MulmatID(ctx, hiddenStates, topKIndices)
 	hiddenStates = hiddenStates.SILU(ctx, upStates)
 	experts := moe.Down.Weight.MulmatID(ctx, hiddenStates, topKIndices)
 	experts = experts.Mul(ctx, topKWeights)
 	nextStates := experts.View(ctx, 0, experts.Dim(0), experts.Stride(2), experts.Dim(2))
 	for i := 1; i < opts.numExpertsUsed; i++ {
 		nextStates = nextStates.Add(ctx, experts.View(ctx, i*experts.Stride(1), experts.Dim(0), experts.Stride(2), experts.Dim(2)))
 	}
 	return nextStates
 }
 func (moe *sparse) topKIndices(ctx ml.Context, scores ml.Tensor, opts *Options) ml.Tensor {
 	if moe.ExpProbsBias != nil {
 		scores = scores.Add(ctx, moe.ExpProbsBias)
 	}
 	topKIndices := scores.TopK(ctx, opts.numExpertsUsed)
 	return topKIndices
 }
 func (moe *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
 	residuals := hiddenStates
 	routerLogits := moe.Router.Forward(ctx, hiddenStates)
 	scores := routerLogits.Sigmoid(ctx)
 	topKIndices := moe.topKIndices(ctx, scores, opts)
 	topKWeights := scores.Reshape(ctx, 1, opts.numExperts, hiddenStates.Dim(1)).Rows(ctx, topKIndices)
 	if opts.normTopKProb {
 		topKWeights = topKWeights.Reshape(ctx, opts.numExpertsUsed, hiddenStates.Dim(1))
 		topKWeights = topKWeights.Div(ctx, topKWeights.SumRows(ctx))
 		topKWeights = topKWeights.Reshape(ctx, 1, opts.numExpertsUsed, hiddenStates.Dim(1))
 	}
 	topKWeights = topKWeights.Scale(ctx, float64(opts.routedScalingFactor))
 	hiddenStates = moe.Moe(ctx, hiddenStates, topKIndices, topKWeights, opts)
 	sharedExpertResult := moe.SharedExpert.Forward(ctx, residuals, opts)
 	hiddenStates = hiddenStates.Add(ctx, sharedExpertResult)
 	return hiddenStates
 }
 type dense struct {
 	Gate *nn.Linear `gguf:"ffn_gate"`
 	Up   *nn.Linear `gguf:"ffn_up"`
 	Down *nn.Linear `gguf:"ffn_down"`
 }
 func (mlp *dense) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
 	hiddenStates = mlp.Gate.Forward(ctx, hiddenStates).SILU(ctx, mlp.Up.Forward(ctx, hiddenStates))
 	return mlp.Down.Forward(ctx, hiddenStates)
 }
 type Layer struct {
 	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
 	Attention     *Attention
 	MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
 	MLP     MLP
 }
 func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
 	residual := hiddenStates
 	hiddenStates = t.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
 	hiddenStates = t.Attention.Forward(ctx, hiddenStates, positions, cache, opts)
 	if outputs != nil {
 		hiddenStates = hiddenStates.Rows(ctx, outputs)
 		residual = residual.Rows(ctx, outputs)
 	}
 	hiddenStates = hiddenStates.Add(ctx, residual)
 	residual = hiddenStates
 	hiddenStates = t.MLPNorm.Forward(ctx, hiddenStates, opts.eps)
 	hiddenStates = t.MLP.Forward(ctx, hiddenStates, opts)
 	hiddenStates = hiddenStates.Add(ctx, residual)
 	return hiddenStates
 }
 type Model struct {
 	model.Base
 	model.BytePairEncoding
 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
 	OutputNorm *nn.RMSNorm `gguf:"output_norm"`
 	Output     *nn.Linear  `gguf:"output,alt:token_embd"`
 	*Options
 }
 func New(c fs.Config) (model.Model, error) {
 	layers := make([]Layer, c.Uint("block_count"))
 	firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))
 	for i := range layers {
 		if i < firstDenseLayerIndex {
 			layers[i].MLP = &dense{}
 		} else {
 			layers[i].MLP = &sparse{}
 		}
 	}
 	keyLength := int(c.Uint("attention.key_length"))
 	valueLength := int(c.Uint("attention.value_length"))
 	kqScale := 1.0 / math.Sqrt(float64(keyLength))
 	var pre []string
 	switch c.String("tokenizer.ggml.pre") {
 	case "glm4":
 		pre = []string{
 			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
 		}
 	default:
 		return nil, model.ErrUnsupportedTokenizer
 	}
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			&model.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
 				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				EOS: append(
 					[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 					c.Ints("tokenizer.ggml.eos_token_ids")...,
 				),
 			},
 			pre...,
 		),
 		Layers: layers,
 		Options: &Options{
 			hiddenSize:     int(c.Uint("embedding_length")),
 			numHeads:       int(c.Uint("attention.head_count")),
 			numKVHeads:     int(c.Uint("attention.head_count_kv")),
 			eps:            c.Float("attention.layer_norm_rms_epsilon"),
 			ropeBase:       c.Float("rope.freq_base"),
 			numExperts:     int(c.Uint("expert_count")),
 			numExpertsUsed: int(c.Uint("expert_used_count")),
 			normTopKProb:   c.Bool("expert_weights_norm", true),
 			qLoraRank:     int(c.Uint("attention.q_lora_rank")),
 			kvLoraRank:    int(c.Uint("attention.kv_lora_rank")),
 			qkHeadDim:     keyLength,
 			vHeadDim:      valueLength,
 			qkRopeHeadDim: int(c.Uint("rope.dimension_count")),
 			qkNopeHeadDim: keyLength - int(c.Uint("rope.dimension_count")),
 			kqNopeHeadDim: keyLength - int(c.Uint("rope.dimension_count")),
 			routedScalingFactor: c.Float("expert_weights_scale"),
 			kqScale: kqScale,
 		},
 	}
 	m.Cache = kvcache.NewCausalCache(m.Shift)
 	return &m, nil
 }
 func (m Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
 	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
 }
 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
 	hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs)
 	for i, layer := range m.Layers {
 		m.Cache.SetLayer(i)
 		var outputs ml.Tensor
 		if i == len(m.Layers)-1 {
 			outputs = batch.Outputs
 		}
 		hiddenStates = layer.Forward(ctx, hiddenStates, positions, outputs, m.Cache, m.Options)
 	}
 	hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps)
 	return m.Output.Forward(ctx, hiddenStates), nil
 }
 func init() {
 	model.Register("glm4moelite", New)
 }
--- a/model/models/lfm2/cache.go
+++ b/model/models/lfm2/cache.go
@@ -1,410 +0,0 @@
 package lfm2
 import (
 	"slices"
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model/input"
 )
 var _ kvcache.Cache = (*HybridCache)(nil)
 // HybridCache stores:
 // - a standard causal KV cache for attention layers
 // - a per-sequence recurrent conv state for shortconv layers
 //
 // Conv state shape (per layer, per sequence): [dConv, hiddenSize] where dConv = L_cache - 1.
 // Stored internally as a tensor of shape [dConv * hiddenSize, maxSlots].
 type HybridCache struct {
 	kv *kvcache.Causal
 	backend      ml.Backend
 	dtype        ml.DType
 	maxSequences int
 	hiddenSize int
 	dConv      int
 	// slot mapping for recurrent state
 	slotForSeq map[int]int
 	refCount   []int
 	freeSlots  []int
 	// per-layer conv state buffers (allocated lazily)
 	convCtxs   map[int]ml.Context
 	convStates map[int]ml.Tensor // [dConv*hiddenSize, maxSlots]
 	// current forward batch (derived in StartForward)
 	curSeqs       []int
 	curSlots      []int
 	curSlotsInput ml.Tensor
 	curSeqTokens  int
 	// track if EnsureWritable has been called for this forward pass
 	writableEnsured bool
 	// track any error from EnsureWritable to propagate later
 	writableError error
 }
 func NewHybridCache(shift func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error), hiddenSize, dConv int) *HybridCache {
 	return &HybridCache{
 		kv:         kvcache.NewCausalCache(shift),
 		hiddenSize: hiddenSize,
 		dConv:      dConv,
 		slotForSeq: make(map[int]int),
 		convCtxs:   make(map[int]ml.Context),
 		convStates: make(map[int]ml.Tensor),
 	}
 }
 func (c *HybridCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
 	c.backend = backend
 	c.dtype = dtype
 	c.maxSequences = maxSequences
 	// initialize slot allocator
 	c.refCount = make([]int, maxSequences)
 	c.freeSlots = c.freeSlots[:0]
 	for i := maxSequences - 1; i >= 0; i-- {
 		c.freeSlots = append(c.freeSlots, i)
 	}
 	c.kv.Init(backend, dtype, maxSequences, capacity, maxBatch)
 }
 func (c *HybridCache) Close() {
 	for _, ctx := range c.convCtxs {
 		ctx.Close()
 	}
 	c.kv.Close()
 }
 func (c *HybridCache) SetConfig(config ml.CacheConfig) {
 	c.kv.SetConfig(config)
 }
 func (c *HybridCache) SetLayer(layer int) {
 	c.kv.SetLayer(layer)
 }
 func (c *HybridCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
 	return c.kv.Get(ctx)
 }
 func (c *HybridCache) Put(ctx ml.Context, key, value ml.Tensor) {
 	c.kv.Put(ctx, key, value)
 }
 func (c *HybridCache) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
 	if err := c.kv.StartForward(ctx, batch, reserve); err != nil {
 		return err
 	}
 	// Derive equal-length sequence layout for shortconv.
 	// LFM2 shortconv assumes tokens form a [seq_tokens, seqs] grid.
 	seqCounts := make(map[int]int)
 	c.curSeqs = c.curSeqs[:0]
 	for _, s := range batch.Sequences {
 		if _, ok := seqCounts[s]; !ok {
 			c.curSeqs = append(c.curSeqs, s)
 		}
 		seqCounts[s]++
 	}
 	if len(c.curSeqs) == 0 {
 		return nil
 	}
 	nTokens := len(batch.Sequences)
 	nSeqs := len(c.curSeqs)
 	want := nTokens / nSeqs
 	for _, s := range c.curSeqs {
 		if seqCounts[s] != want {
 			return kvcache.ErrNotSupported
 		}
 	}
 	c.curSeqTokens = want
 	// When reserving memory for estimation, use fake slot assignments
 	// without modifying permanent state (slotForSeq, refCount)
 	if reserve {
 		c.curSlots = c.curSlots[:0]
 		slots := make([]int32, nSeqs)
 		for i := range nSeqs {
 			c.curSlots = append(c.curSlots, i)
 			slots[i] = int32(i)
 		}
 		c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
 		return nil
 	}
 	// Ensure slots exist for sequences in this batch
 	c.curSlots = c.curSlots[:0]
 	var newSlots []int // track newly allocated slots that need zeroing
 	for _, s := range c.curSeqs {
 		slot, ok := c.slotForSeq[s]
 		if !ok {
 			var err error
 			slot, err = c.allocSlot()
 			if err != nil {
 				return err
 			}
 			c.slotForSeq[s] = slot
 			c.refCount[slot] = 1
 			newSlots = append(newSlots, slot)
 		}
 		c.curSlots = append(c.curSlots, slot)
 	}
 	// Zero conv state for newly allocated slots to clear stale data from previous sequences
 	if len(newSlots) > 0 {
 		c.zeroConvSlots(ctx, newSlots)
 	}
 	// Create a tensor for the current slots
 	slots := make([]int32, len(c.curSlots))
 	for i, v := range c.curSlots {
 		slots[i] = int32(v)
 	}
 	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
 	// Reset writable state for new forward pass
 	c.writableEnsured = false
 	c.writableError = nil
 	return nil
 }
 func (c *HybridCache) allocSlot() (int, error) {
 	if len(c.freeSlots) == 0 {
 		return 0, kvcache.ErrKvCacheFull
 	}
 	slot := c.freeSlots[len(c.freeSlots)-1]
 	c.freeSlots = c.freeSlots[:len(c.freeSlots)-1]
 	return slot, nil
 }
 func (c *HybridCache) freeSlot(slot int) {
 	// Bounds check before freeing
 	if slot >= 0 && slot < c.maxSequences {
 		c.freeSlots = append(c.freeSlots, slot)
 	}
 }
 // zeroConvSlots zeros the conv state for the given slots across all layers.
 // This must be called when recycling slots to prevent stale state from affecting new sequences.
 func (c *HybridCache) zeroConvSlots(ctx ml.Context, slots []int) {
 	if len(slots) == 0 || len(c.convStates) == 0 {
 		return
 	}
 	// Use input context for creating tensors
 	inputCtx := ctx.Input()
 	// Create slot indices tensor
 	slotIndices := make([]int32, len(slots))
 	for i, s := range slots {
 		slotIndices[i] = int32(s)
 	}
 	slotsTensor := inputCtx.FromInts(slotIndices, len(slotIndices))
 	// Create zero tensor for the slots (SetRows requires F32 source)
 	zeros := inputCtx.Zeros(ml.DTypeF32, c.dConv*c.hiddenSize, len(slots))
 	// Zero each layer's conv state for these slots
 	for _, buf := range c.convStates {
 		ctx.Forward(buf.SetRows(ctx, zeros, slotsTensor))
 	}
 }
 // EnsureWritable ensures that sequences in the current batch have private (non-shared) conv slots.
 // Returns an error if slot allocation fails.
 func (c *HybridCache) EnsureWritable(ctx ml.Context) error {
 	for i, seq := range c.curSeqs {
 		slot, ok := c.slotForSeq[seq]
 		if !ok {
 			continue
 		}
 		// Bounds check
 		if slot < 0 || slot >= len(c.refCount) {
 			continue
 		}
 		if c.refCount[slot] <= 1 {
 			continue
 		}
 		newSlot, err := c.allocSlot()
 		if err != nil {
 			return err
 		}
 		c.refCount[slot]--
 		c.refCount[newSlot] = 1
 		c.slotForSeq[seq] = newSlot
 		c.curSlots[i] = newSlot
 		// Copy existing conv state for all initialized layers
 		for _, buf := range c.convStates {
 			// buf: [dConv*hiddenSize, maxSlots]
 			src := buf.Rows(ctx, ctx.Input().FromInts([]int32{int32(slot)}, 1))
 			// SetRows requires F32 source
 			srcF32 := src.Cast(ctx, ml.DTypeF32)
 			ctx.Forward(buf.SetRows(ctx, srcF32, ctx.Input().FromInts([]int32{int32(newSlot)}, 1)))
 		}
 	}
 	// Rebuild current slots tensor
 	slots := make([]int32, len(c.curSlots))
 	for i, v := range c.curSlots {
 		slots[i] = int32(v)
 	}
 	c.curSlotsInput = ctx.Input().FromInts(slots, len(slots))
 	return nil
 }
 func (c *HybridCache) CopyPrefix(srcSeq, dstSeq int, prefixLen int32) {
 	// KV cache shares prefix metadata (no copy) which is correct for prefix reuse.
 	c.kv.CopyPrefix(srcSeq, dstSeq, prefixLen)
 	// For shortconv state we implement copy-on-write: dst shares the same slot as src.
 	// On the first write to dst, EnsureWritable will create a private slot.
 	if dstSlot, ok := c.slotForSeq[dstSeq]; ok {
 		// Bounds check before decrementing
 		if dstSlot >= 0 && dstSlot < len(c.refCount) {
 			c.refCount[dstSlot]--
 			if c.refCount[dstSlot] <= 0 {
 				c.refCount[dstSlot] = 0
 				c.freeSlot(dstSlot)
 			}
 		}
 		delete(c.slotForSeq, dstSeq)
 	}
 	srcSlot, ok := c.slotForSeq[srcSeq]
 	if !ok {
 		// src may not have a slot yet; dst will allocate on demand
 		return
 	}
 	// Bounds check before incrementing
 	if srcSlot >= 0 && srcSlot < len(c.refCount) {
 		c.slotForSeq[dstSeq] = srcSlot
 		c.refCount[srcSlot]++
 	}
 }
 func (c *HybridCache) CanResume(seq int, pos int32) bool {
 	return c.kv.CanResume(seq, pos)
 }
 func (c *HybridCache) Remove(seq int, beginIndex, endIndex int32) error {
 	if err := c.kv.Remove(seq, beginIndex, endIndex); err != nil {
 		return err
 	}
 	// For recurrent state, any removal invalidates the state because
 	// the state at position N depends on all previous positions.
 	// Drop the slot mapping so it resets on next use.
 	slot, ok := c.slotForSeq[seq]
 	if !ok {
 		return nil
 	}
 	// Bounds check
 	if slot < 0 || slot >= len(c.refCount) {
 		delete(c.slotForSeq, seq)
 		return nil
 	}
 	c.refCount[slot]--
 	if c.refCount[slot] <= 0 {
 		c.refCount[slot] = 0
 		c.freeSlot(slot)
 	}
 	delete(c.slotForSeq, seq)
 	return nil
 }
 func (c *HybridCache) slotsTensor() ml.Tensor {
 	return c.curSlotsInput
 }
 func (c *HybridCache) seqTokens() int {
 	return c.curSeqTokens
 }
 func (c *HybridCache) numSeqs() int {
 	return len(c.curSeqs)
 }
 func (c *HybridCache) convBuffer(ctx ml.Context, layer int) ml.Tensor {
 	if buf, ok := c.convStates[layer]; ok {
 		return buf
 	}
 	if _, ok := c.convCtxs[layer]; !ok {
 		c.convCtxs[layer] = c.backend.NewContextSize(1).Layer(layer)
 	}
 	buf := c.convCtxs[layer].Zeros(c.dtype, c.dConv*c.hiddenSize, c.maxSequences)
 	c.convStates[layer] = buf
 	return buf
 }
 // ConvState returns the conv state for current batch sequences as shape [dConv, hiddenSize, nSeqs].
 // Returns an error if copy-on-write allocation fails.
 func (c *HybridCache) ConvState(ctx ml.Context, layer int) (ml.Tensor, error) {
 	if !c.writableEnsured {
 		needsWritable := false
 		for _, seq := range c.curSeqs {
 			slot, ok := c.slotForSeq[seq]
 			if !ok {
 				continue
 			}
 			if slot >= 0 && slot < len(c.refCount) && c.refCount[slot] > 1 {
 				needsWritable = true
 				break
 			}
 		}
 		if needsWritable {
 			if err := c.EnsureWritable(ctx); err != nil {
 				c.writableError = err
 			}
 		}
 		c.writableEnsured = true
 	}
 	if c.writableError != nil {
 		return nil, c.writableError
 	}
 	buf := c.convBuffer(ctx, layer)
 	cur := buf.Rows(ctx, c.slotsTensor())
 	return cur.Reshape(ctx, c.dConv, c.hiddenSize, c.numSeqs()), nil
 }
 // UpdateConvState writes a new conv state for current batch sequences.
 // newState must have shape [dConv, hiddenSize, nSeqs].
 func (c *HybridCache) UpdateConvState(ctx ml.Context, layer int, newState ml.Tensor) {
 	buf := c.convBuffer(ctx, layer)
 	src := newState.Reshape(ctx, c.dConv*c.hiddenSize, c.numSeqs())
 	// SetRows requires F32 source
 	srcF32 := src.Cast(ctx, ml.DTypeF32)
 	ctx.Forward(buf.SetRows(ctx, srcF32, c.slotsTensor()))
 }
 // IsSupportedForBatch returns true if the current batch layout supports shortconv.
 func (c *HybridCache) IsSupportedForBatch() bool {
 	return c.curSeqTokens > 0 && len(c.curSeqs) > 0
 }
 // Seqs returns the ordered unique sequences for the current forward pass.
 func (c *HybridCache) Seqs() []int {
 	return slices.Clone(c.curSeqs)
 }
--- a/model/models/lfm2/cache_test.go
+++ b/model/models/lfm2/cache_test.go
@@ -1,444 +0,0 @@
 package lfm2
 import (
 	"testing"
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 )
 // TestHybridCache tests verify the slot management logic of HybridCache.
 // These tests focus on the recurrent state slot allocation, reference counting,
 // and copy-on-write semantics without requiring a full ML backend.
 // createSlotOnlyCache creates a HybridCache with only the slot management
 // fields initialized. Used to test slot logic in isolation.
 func createSlotOnlyCache(maxSequences int) *HybridCache {
 	return &HybridCache{
 		hiddenSize:   256,
 		dConv:        3,
 		maxSequences: maxSequences,
 		refCount:     make([]int, maxSequences),
 		freeSlots:    initFreeSlots(maxSequences),
 		slotForSeq:   make(map[int]int),
 		convCtxs:     make(map[int]ml.Context),
 		convStates:   make(map[int]ml.Tensor),
 	}
 }
 func initFreeSlots(n int) []int {
 	slots := make([]int, 0, n)
 	for i := n - 1; i >= 0; i-- {
 		slots = append(slots, i)
 	}
 	return slots
 }
 func TestHybridCache_SlotAllocation(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Verify initial state
 	if len(cache.freeSlots) != 4 {
 		t.Errorf("expected 4 free slots, got %d", len(cache.freeSlots))
 	}
 	// Allocate all slots
 	for range 4 {
 		slot, err := cache.allocSlot()
 		if err != nil {
 			t.Fatalf("allocSlot failed: %v", err)
 		}
 		cache.refCount[slot] = 1
 	}
 	// Should be full now
 	if len(cache.freeSlots) != 0 {
 		t.Errorf("expected 0 free slots, got %d", len(cache.freeSlots))
 	}
 	// Trying to allocate another should fail
 	_, err := cache.allocSlot()
 	if err != kvcache.ErrKvCacheFull {
 		t.Errorf("expected ErrKvCacheFull, got %v", err)
 	}
 }
 func TestHybridCache_SlotReuse(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Allocate a slot
 	slot1, _ := cache.allocSlot()
 	cache.refCount[slot1] = 1
 	// Free it
 	cache.refCount[slot1] = 0
 	cache.freeSlot(slot1)
 	// Allocate again - should get the same slot back (LIFO)
 	slot2, _ := cache.allocSlot()
 	if slot2 != slot1 {
 		t.Errorf("expected slot %d to be reused, got %d", slot1, slot2)
 	}
 }
 func TestHybridCache_SlotRefCounting_ShareSlot(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Allocate slot for seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Simulate sharing slot with seq 2 (copy-on-write style)
 	cache.slotForSeq[2] = slot1
 	cache.refCount[slot1]++
 	// Should share the same slot
 	if cache.slotForSeq[2] != slot1 {
 		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
 	}
 	// Ref count should be 2
 	if cache.refCount[slot1] != 2 {
 		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
 	}
 }
 func TestHybridCache_SlotRefCounting_DecRef(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Allocate slot for seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Share with seq 2
 	cache.slotForSeq[2] = slot1
 	cache.refCount[slot1]++
 	// Unshare seq 2
 	cache.refCount[slot1]--
 	delete(cache.slotForSeq, 2)
 	// Ref count should be back to 1
 	if cache.refCount[slot1] != 1 {
 		t.Errorf("expected refCount 1 after unshare, got %d", cache.refCount[slot1])
 	}
 	// Seq 2 should no longer have a slot
 	if _, ok := cache.slotForSeq[2]; ok {
 		t.Error("seq 2 should not have a slot after unshare")
 	}
 }
 func TestHybridCache_SlotFreeWhenUnused(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	initialFreeSlots := len(cache.freeSlots)
 	// Allocate slot for seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Free the slot when refCount drops to 0
 	cache.refCount[slot1]--
 	if cache.refCount[slot1] <= 0 {
 		cache.refCount[slot1] = 0
 		cache.freeSlot(slot1)
 	}
 	delete(cache.slotForSeq, 1)
 	// Slot should be freed
 	if len(cache.freeSlots) != initialFreeSlots {
 		t.Errorf("expected %d free slots, got %d", initialFreeSlots, len(cache.freeSlots))
 	}
 	// Ref count should be 0
 	if cache.refCount[slot1] != 0 {
 		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
 	}
 }
 func TestHybridCache_SlotOverwrite(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Allocate slots for seq 1 and seq 2
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	slot2, _ := cache.allocSlot()
 	cache.slotForSeq[2] = slot2
 	cache.refCount[slot2] = 1
 	initialFreeSlots := len(cache.freeSlots)
 	// Simulate overwriting seq 2's slot with slot1 (sharing)
 	// First free the old slot
 	cache.refCount[slot2]--
 	if cache.refCount[slot2] <= 0 {
 		cache.refCount[slot2] = 0
 		cache.freeSlot(slot2)
 	}
 	// Then share slot1
 	cache.slotForSeq[2] = slot1
 	cache.refCount[slot1]++
 	// Seq 2 should now share slot1
 	if cache.slotForSeq[2] != slot1 {
 		t.Errorf("expected seq 2 to share slot %d, got %d", slot1, cache.slotForSeq[2])
 	}
 	// Old slot2 should be freed
 	if len(cache.freeSlots) != initialFreeSlots+1 {
 		t.Errorf("expected %d free slots, got %d", initialFreeSlots+1, len(cache.freeSlots))
 	}
 }
 func TestHybridCache_BoundsChecking(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Test freeing invalid slot (should not panic)
 	cache.freeSlot(-1)
 	cache.freeSlot(100) // out of bounds
 	// freeSlot does bounds checking, so invalid slots should be ignored
 	if len(cache.freeSlots) != 4 {
 		t.Errorf("invalid slots should not affect free list, got %d slots", len(cache.freeSlots))
 	}
 }
 func TestHybridCache_MultipleSequences_RefCounting(t *testing.T) {
 	cache := createSlotOnlyCache(8)
 	// Allocate slot for seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Fork to seq 2, 3, 4 (all share slot1)
 	for _, seq := range []int{2, 3, 4} {
 		cache.slotForSeq[seq] = slot1
 		cache.refCount[slot1]++
 	}
 	// Ref count should be 4
 	if cache.refCount[slot1] != 4 {
 		t.Errorf("expected refCount 4, got %d", cache.refCount[slot1])
 	}
 	// Remove seq 2, 3
 	for _, seq := range []int{2, 3} {
 		delete(cache.slotForSeq, seq)
 		cache.refCount[slot1]--
 	}
 	if cache.refCount[slot1] != 2 {
 		t.Errorf("expected refCount 2, got %d", cache.refCount[slot1])
 	}
 	// Slot should still be allocated (not in free list)
 	found := false
 	for _, s := range cache.freeSlots {
 		if s == slot1 {
 			found = true
 			break
 		}
 	}
 	if found {
 		t.Error("slot1 should not be in free list yet")
 	}
 	// Remove remaining sequences
 	for _, seq := range []int{1, 4} {
 		delete(cache.slotForSeq, seq)
 		cache.refCount[slot1]--
 	}
 	if cache.refCount[slot1] != 0 {
 		t.Errorf("expected refCount 0, got %d", cache.refCount[slot1])
 	}
 }
 func TestHybridCache_ChainedSharing(t *testing.T) {
 	cache := createSlotOnlyCache(8)
 	// Create seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Share 1 -> 2
 	cache.slotForSeq[2] = slot1
 	cache.refCount[slot1]++
 	// Share 2 -> 3 (should still share slot1)
 	cache.slotForSeq[3] = cache.slotForSeq[2] // which is slot1
 	cache.refCount[slot1]++
 	// All should share slot1
 	if cache.slotForSeq[1] != slot1 || cache.slotForSeq[2] != slot1 || cache.slotForSeq[3] != slot1 {
 		t.Error("all sequences should share slot1")
 	}
 	if cache.refCount[slot1] != 3 {
 		t.Errorf("expected refCount 3, got %d", cache.refCount[slot1])
 	}
 }
 func TestHybridCache_CacheParameters(t *testing.T) {
 	cache := NewHybridCache(nil, 512, 5) // hiddenSize=512, dConv=5
 	if cache.hiddenSize != 512 {
 		t.Errorf("expected hiddenSize 512, got %d", cache.hiddenSize)
 	}
 	if cache.dConv != 5 {
 		t.Errorf("expected dConv 5, got %d", cache.dConv)
 	}
 }
 func TestHybridCache_NumSeqs(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Initially no sequences
 	if cache.numSeqs() != 0 {
 		t.Errorf("expected 0 seqs, got %d", cache.numSeqs())
 	}
 	// Manually set up current batch state
 	cache.curSeqs = []int{1, 2, 3}
 	if cache.numSeqs() != 3 {
 		t.Errorf("expected 3 seqs, got %d", cache.numSeqs())
 	}
 }
 func TestHybridCache_SeqTokens(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Initially 0
 	if cache.seqTokens() != 0 {
 		t.Errorf("expected 0 seqTokens, got %d", cache.seqTokens())
 	}
 	// Manually set up current batch state
 	cache.curSeqTokens = 16
 	if cache.seqTokens() != 16 {
 		t.Errorf("expected 16 seqTokens, got %d", cache.seqTokens())
 	}
 }
 // Test that Seqs returns a clone of curSeqs
 func TestHybridCache_Seqs_ReturnsClone(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	cache.curSeqs = []int{1, 2, 3}
 	seqs := cache.Seqs()
 	// Modify returned slice
 	seqs[0] = 999
 	// Original should be unchanged
 	if cache.curSeqs[0] != 1 {
 		t.Error("Seqs should return a clone, not the original slice")
 	}
 }
 func TestHybridCache_IsSupportedForBatch(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Initially not supported (no batch set up)
 	if cache.IsSupportedForBatch() {
 		t.Error("expected IsSupportedForBatch to be false initially")
 	}
 	// Set up a valid batch
 	cache.curSeqTokens = 1
 	cache.curSeqs = []int{1}
 	if !cache.IsSupportedForBatch() {
 		t.Error("expected IsSupportedForBatch to be true with valid batch")
 	}
 }
 func TestHybridCache_ZeroConvSlots_EmptyInputs(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// zeroConvSlots should handle empty slots without panicking
 	cache.zeroConvSlots(nil, nil)
 	cache.zeroConvSlots(nil, []int{})
 	// zeroConvSlots should handle empty convStates without panicking
 	cache.zeroConvSlots(nil, []int{0, 1, 2})
 }
 func TestHybridCache_SlotRecycling_TracksNewSlots(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Allocate slot for seq 1
 	slot1, _ := cache.allocSlot()
 	cache.slotForSeq[1] = slot1
 	cache.refCount[slot1] = 1
 	// Free the slot (simulating sequence removal)
 	cache.refCount[slot1]--
 	cache.freeSlot(slot1)
 	delete(cache.slotForSeq, 1)
 	// Verify slot is in free list
 	if len(cache.freeSlots) != 4 {
 		t.Errorf("expected 4 free slots after freeing, got %d", len(cache.freeSlots))
 	}
 	// Allocate for new seq 2 - should get recycled slot
 	slot2, _ := cache.allocSlot()
 	if slot2 != slot1 {
 		t.Errorf("expected recycled slot %d, got %d", slot1, slot2)
 	}
 	// This recycled slot would need zeroing in the real implementation
 	// The actual zeroing is tested via integration tests since it requires ML context
 }
 func TestHybridCache_NewSequence_GetsTrackedForZeroing(t *testing.T) {
 	cache := createSlotOnlyCache(4)
 	// Simulate the slot allocation flow from StartForward
 	// When a sequence doesn't have a slot, it gets allocated and tracked as "new"
 	newSlots := []int{}
 	// Seq 1 doesn't have a slot - allocate and track
 	seq := 1
 	if _, ok := cache.slotForSeq[seq]; !ok {
 		slot, err := cache.allocSlot()
 		if err != nil {
 			t.Fatalf("allocSlot failed: %v", err)
 		}
 		cache.slotForSeq[seq] = slot
 		cache.refCount[slot] = 1
 		newSlots = append(newSlots, slot)
 	}
 	// Verify newSlots contains the allocated slot
 	if len(newSlots) != 1 {
 		t.Errorf("expected 1 new slot, got %d", len(newSlots))
 	}
 	// Seq 1 already has a slot - should NOT be tracked as new
 	newSlots2 := []int{}
 	if _, ok := cache.slotForSeq[seq]; !ok {
 		slot, _ := cache.allocSlot()
 		cache.slotForSeq[seq] = slot
 		cache.refCount[slot] = 1
 		newSlots2 = append(newSlots2, slot)
 	}
 	// Verify no new slots for existing sequence
 	if len(newSlots2) != 0 {
 		t.Errorf("expected 0 new slots for existing sequence, got %d", len(newSlots2))
 	}
 }
--- a/model/models/lfm2/model.go
+++ b/model/models/lfm2/model.go
@@ -1,253 +0,0 @@
 package lfm2
 import (
 	"cmp"
 	"math"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
 )
 type Options struct {
 	hiddenSize       int
 	headDim, ropeDim int
 	eps, ropeBase, ropeScale float32
 	ropeType              string
 	originalContextLength int
 	// per-layer head counts (LFM2 alternates attention and recurrent layers)
 	numHeadsByLayer   []int
 	numKVHeadsByLayer []int
 }
 func (o Options) headDimValue() int {
 	// Head dim is shared across layers; fall back to first attention layer head count.
 	for _, h := range o.numHeadsByLayer {
 		if h > 0 {
 			return cmp.Or(o.headDim, o.hiddenSize/h)
 		}
 	}
 	return cmp.Or(o.headDim, o.hiddenSize)
 }
 func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions ml.Tensor) ml.Tensor {
 	opts := []func(*rope.Options){rope.WithTypeNeoX()}
 	if o.ropeType == "yarn" {
 		attnFactor := float32(1.0 / (1.0 + 0.1*math.Log(float64(o.ropeScale))))
 		opts = append(opts,
 			rope.WithOriginalContextLength(o.originalContextLength),
 			rope.WithExtrapolationFactor(1.),
 			rope.WithAttentionFactor(attnFactor),
 		)
 	}
 	headCount := 1
 	for _, h := range o.numHeadsByLayer {
 		if h > 0 {
 			headCount = h
 			break
 		}
 	}
 	return nn.RoPE(ctx, states, positions, cmp.Or(o.ropeDim, o.headDim, o.hiddenSize/headCount), o.ropeBase, 1./o.ropeScale, opts...)
 }
 type Model struct {
 	model.Base
 	model.TextProcessor
 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
 	OutputNorm     *nn.RMSNorm   `gguf:"output_norm,alt:token_embd_norm"`
 	Output         *nn.Linear    `gguf:"output,alt:token_embd"`
 	Options
 }
 func New(c fs.Config) (model.Model, error) {
 	if c.Uint("expert_count") > 0 {
 		return nil, model.ErrUnsupportedModel
 	}
 	if c.String("tokenizer.ggml.model") != "gpt2" {
 		return nil, model.ErrUnsupportedTokenizer
 	}
 	vocabulary := model.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
 		Merges: c.Strings("tokenizer.ggml.merges"),
 		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 		BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 		EOS: append(
 			[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
 			c.Ints("tokenizer.ggml.eos_token_ids")...,
 		),
 	}
 	var pretokenizers []string
 	switch c.String("tokenizer.ggml.pre") {
 	case "default":
 		// use default BPE pretokenizer
 	default:
 		// llama-bpe style (default for LFM2)
 		pretokenizers = []string{
 			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
 		}
 	}
 	m := Model{
 		TextProcessor: model.NewBytePairEncoding(&vocabulary, pretokenizers...),
 		Layers:        make([]Layer, c.Uint("block_count")),
 		Options: Options{
 			hiddenSize:            int(c.Uint("embedding_length")),
 			headDim:               int(c.Uint("attention.key_length")),
 			ropeDim:               int(c.Uint("rope.dimension_count")),
 			eps:                   c.Float("attention.layer_norm_rms_epsilon"),
 			ropeType:              c.String("rope.scaling.type"),
 			ropeBase:              c.Float("rope.freq_base"),
 			ropeScale:             c.Float("rope.scaling.factor", 1),
 			originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
 		},
 	}
 	type headCounts interface {
 		HeadCount() []uint64
 		HeadCountKV() []uint64
 	}
 	hc, ok := c.(headCounts)
 	if !ok {
 		return nil, model.ErrUnsupportedModel
 	}
 	headCount := hc.HeadCount()
 	headCountKV := hc.HeadCountKV()
 	m.numHeadsByLayer = make([]int, len(m.Layers))
 	m.numKVHeadsByLayer = make([]int, len(m.Layers))
 	for i := range m.Layers {
 		m.numHeadsByLayer[i] = int(headCount[i])
 		m.numKVHeadsByLayer[i] = int(headCountKV[i])
 		if m.numKVHeadsByLayer[i] == 0 {
 			m.Layers[i].Operator = &ShortConv{}
 		} else {
 			m.Layers[i].Operator = &Attention{}
 		}
 	}
 	lCache := int(c.Uint("shortconv.l_cache"))
 	dConv := max(0, lCache-1)
 	m.Cache = NewHybridCache(m.Shift, m.hiddenSize, dConv)
 	return &m, nil
 }
 type Operator interface {
 	Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor
 }
 type Attention struct {
 	Query     *nn.Linear  `gguf:"attn_q"`
 	QueryNorm *nn.RMSNorm `gguf:"attn_q_norm"`
 	Key       *nn.Linear  `gguf:"attn_k"`
 	KeyNorm   *nn.RMSNorm `gguf:"attn_k_norm"`
 	Value     *nn.Linear  `gguf:"attn_v"`
 	Output    *nn.Linear  `gguf:"attn_output,alt:attn_out"`
 }
 func (sa *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
 	batchSize := hiddenStates.Dim(1)
 	headDim := opts.headDimValue()
 	numHeads := opts.numHeadsByLayer[layer]
 	numKVHeads := opts.numKVHeadsByLayer[layer]
 	query := sa.Query.Forward(ctx, hiddenStates)
 	key := sa.Key.Forward(ctx, hiddenStates)
 	value := sa.Value.Forward(ctx, hiddenStates)
 	query = query.Reshape(ctx, headDim, numHeads, batchSize)
 	key = key.Reshape(ctx, headDim, numKVHeads, batchSize)
 	value = value.Reshape(ctx, headDim, numKVHeads, batchSize)
 	query = sa.QueryNorm.Forward(ctx, query, opts.eps)
 	key = sa.KeyNorm.Forward(ctx, key, opts.eps)
 	query = opts.applyRotaryPositionEmbeddings(ctx, query, positions)
 	key = opts.applyRotaryPositionEmbeddings(ctx, key, positions)
 	attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(headDim)), cache)
 	attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), batchSize)
 	return sa.Output.Forward(ctx, attention)
 }
 type MLP struct {
 	Up   *nn.Linear `gguf:"ffn_up"`
 	Down *nn.Linear `gguf:"ffn_down"`
 	Gate *nn.Linear `gguf:"ffn_gate"`
 }
 func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
 	hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
 	return mlp.Down.Forward(ctx, hiddenState)
 }
 type Layer struct {
 	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
 	Operator      Operator
 	MLPNorm       *nn.RMSNorm `gguf:"ffn_norm"`
 	MLP           *MLP
 }
 func (l *Layer) Forward(ctx ml.Context, layer int, hiddenState, positions, outputs ml.Tensor, cache *HybridCache, opts *Options) ml.Tensor {
 	residual := hiddenState
 	hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
 	hiddenState = l.Operator.Forward(ctx, hiddenState, positions, cache, layer, opts)
 	if outputs != nil {
 		hiddenState = hiddenState.Rows(ctx, outputs)
 		residual = residual.Rows(ctx, outputs)
 	}
 	hiddenState = hiddenState.Add(ctx, residual)
 	residual = hiddenState
 	hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
 	hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
 	return hiddenState.Add(ctx, residual)
 }
 func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
 	return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
 }
 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
 	hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
 	for i, layer := range m.Layers {
 		m.Cache.SetLayer(i)
 		var outputs ml.Tensor
 		if i == len(m.Layers)-1 {
 			outputs = batch.Outputs
 		}
 		hiddenState = layer.Forward(ctx, i, hiddenState, positions, outputs, m.Cache.(*HybridCache), &m.Options)
 	}
 	hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
 	return m.Output.Forward(ctx, hiddenState), nil
 }
 func init() {
 	model.Register("lfm2", New)
 }
--- a/model/models/lfm2/shortconv.go
+++ b/model/models/lfm2/shortconv.go
@@ -1,50 +0,0 @@
 package lfm2
 import (
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 )
 type shortConvKernel struct {
 	Weight ml.Tensor `gguf:"weight"`
 }
 // ShortConv implements the LFM2 short-convolution block (GGML_OP_SSM_CONV) with a recurrent
 // state stored in the HybridCache.
 type ShortConv struct {
 	Conv    *shortConvKernel `gguf:"shortconv.conv"`
 	InProj  *nn.Linear       `gguf:"shortconv.in_proj"`
 	OutProj *nn.Linear       `gguf:"shortconv.out_proj"`
 }
 func (sc *ShortConv) Forward(ctx ml.Context, hiddenStates ml.Tensor, _ ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
 	nSeqs := cache.numSeqs()
 	seqTokens := cache.seqTokens()
 	hiddenSize := hiddenStates.Dim(0)
 	if nSeqs <= 0 || seqTokens <= 0 || hiddenStates.Dim(1) != nSeqs*seqTokens {
 		panic("lfm2: unsupported batch layout for shortconv")
 	}
 	bcx := sc.InProj.Forward(ctx, hiddenStates).Reshape(ctx, 3*hiddenSize, seqTokens, nSeqs)
 	elementSize := bcx.Stride(0)
 	b := bcx.View(ctx, 0*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
 	c := bcx.View(ctx, 1*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
 	x := bcx.View(ctx, 2*hiddenSize*elementSize, hiddenSize, bcx.Stride(1), seqTokens, bcx.Stride(2), nSeqs)
 	bx := b.Mul(ctx, x).Permute(ctx, 1, 0, 2, 3)
 	state, err := cache.ConvState(ctx, layer)
 	if err != nil {
 		panic("lfm2: failed to get conv state: " + err.Error())
 	}
 	sx := state.Concat(ctx, bx, 0)
 	convOut := sx.SSMConv(ctx, sc.Conv.Weight)
 	y := c.Mul(ctx, convOut)
 	dConv := sx.Dim(0) - seqTokens
 	cache.UpdateConvState(ctx, layer, sx.Slice(ctx, 0, sx.Dim(0)-dConv, sx.Dim(0), 1))
 	return sc.OutProj.Forward(ctx, y.Reshape(ctx, hiddenSize, seqTokens*nSeqs))
 }
--- a/model/models/models.go
+++ b/model/models/models.go
@@ -7,9 +7,7 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma2"
 	_ "github.com/ollama/ollama/model/models/gemma3"
 	_ "github.com/ollama/ollama/model/models/gemma3n"
 	_ "github.com/ollama/ollama/model/models/glm4moelite"
 	_ "github.com/ollama/ollama/model/models/gptoss"
 	_ "github.com/ollama/ollama/model/models/lfm2"
 	_ "github.com/ollama/ollama/model/models/llama"
 	_ "github.com/ollama/ollama/model/models/llama4"
 	_ "github.com/ollama/ollama/model/models/mistral3"
--- a/model/parsers/glm46.go
+++ b/model/parsers/glm46.go
@@ -1,410 +0,0 @@
 package parsers
 import (
 	"context"
 	"encoding/xml"
 	"fmt"
 	"log/slog"
 	"strings"
 	"unicode"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/logutil"
 )
 type glm46ParserState int
 const (
 	glm46ParserState_LookingForThinkingOpen glm46ParserState = iota
 	glm46ParserState_ThinkingStartedEatingWhitespace
 	glm46ParserState_CollectingThinking
 	glm46ParserState_ThinkingDoneEatingWhitespace
 	glm46ParserState_CollectingContent
 	glm46ParserState_ToolStartedEatingWhitespace
 	glm46ParserState_CollectingToolContent
 )
 const (
 	glm46ThinkingOpenTag  = "<think>"
 	glm46ThinkingCloseTag = "</think>"
 	glm46ToolOpenTag      = "<tool_call>"
 	glm46ToolCloseTag     = "</tool_call>"
 )
 type GLM46Parser struct {
 	state  glm46ParserState
 	buffer strings.Builder
 	tools  []api.Tool
 }
 func (p *GLM46Parser) HasToolSupport() bool {
 	return true
 }
 func (p *GLM46Parser) HasThinkingSupport() bool {
 	return true
 }
 // func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
 	p.tools = tools
 	return tools
 }
 type glm46Event interface {
 	isGLM46Event()
 }
 type glm46EventContent struct {
 	content string
 }
 func (glm46EventContent) isGLM46Event() {}
 type glm46EventRawToolCall struct {
 	raw string
 }
 func (glm46EventRawToolCall) isGLM46Event() {}
 type glm46EventThinkingContent struct {
 	content string
 }
 func (glm46EventThinkingContent) isGLM46Event() {}
 func (p *GLM46Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
 	p.buffer.WriteString(s)
 	events := p.parseEvents()
 	var toolCalls []api.ToolCall
 	var contentSb strings.Builder
 	var thinkingSb strings.Builder
 	for _, event := range events {
 		switch event := event.(type) {
 		case glm46EventRawToolCall:
 			toolCall, err := parseGLM46ToolCall(event, p.tools)
 			if err != nil {
 				slog.Warn("glm-4.6 tool call parsing failed", "error", err)
 				return "", "", nil, err
 			}
 			toolCalls = append(toolCalls, toolCall)
 		case glm46EventThinkingContent:
 			thinkingSb.WriteString(event.content)
 		case glm46EventContent:
 			// TODO(drifkin): if the same turn contains multiple interleaved content
 			// events, we naively append them together here.
 			contentSb.WriteString(event.content)
 		}
 	}
 	return contentSb.String(), thinkingSb.String(), toolCalls, nil
 }
 func (p *GLM46Parser) parseEvents() []glm46Event {
 	var all []glm46Event
 	keepLooping := true
 	for keepLooping {
 		var events []glm46Event
 		events, keepLooping = p.eat()
 		if len(events) > 0 {
 			all = append(all, events...)
 		}
 	}
 	if len(all) > 0 {
 		slog.Log(context.TODO(), logutil.LevelTrace, "glm-4.6 events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
 	}
 	return all
 }
 // eatLeadingWhitespaceAndTransitionTo consumes leading whitespace from the buffer
 // and transitions to the next state. Returns (nil, false) if only whitespace remains
 // in the buffer (needs more input), or (nil, true) if we successfully transitioned.
 func (p *GLM46Parser) eatLeadingWhitespaceAndTransitionTo(nextState glm46ParserState) ([]glm46Event, bool) {
 	trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
 	p.buffer.Reset()
 	if trimmed == "" {
 		return nil, false // Still only whitespace, keep waiting for more input
 	}
 	p.state = nextState
 	p.buffer.WriteString(trimmed)
 	return nil, true // Successfully transitioned
 }
 // glm46SplitAtTag splits the buffer at the given tag, returns the content before (trimmed of trailing whitespace),
 // the content after (optionally trimmed of leading whitespace), and updates the buffer
 func glm46SplitAtTag(p *GLM46Parser, tag string, trimAfter bool) (string, string) {
 	split := strings.SplitN(p.buffer.String(), tag, 2)
 	before := split[0]
 	before = strings.TrimRightFunc(before, unicode.IsSpace)
 	after := split[1]
 	if trimAfter {
 		after = strings.TrimLeftFunc(after, unicode.IsSpace)
 	}
 	p.buffer.Reset()
 	p.buffer.WriteString(after)
 	return before, after
 }
 func (p *GLM46Parser) eat() ([]glm46Event, bool) {
 	var events []glm46Event
 	switch p.state {
 	case glm46ParserState_LookingForThinkingOpen:
 		trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
 		if strings.HasPrefix(trimmed, glm46ThinkingOpenTag) {
 			// Found <think> opening tag
 			after := strings.TrimPrefix(trimmed, glm46ThinkingOpenTag)
 			after = strings.TrimLeftFunc(after, unicode.IsSpace)
 			p.buffer.Reset()
 			p.buffer.WriteString(after)
 			if after == "" {
 				p.state = glm46ParserState_ThinkingStartedEatingWhitespace
 			} else {
 				p.state = glm46ParserState_CollectingThinking
 			}
 			return events, true
 		} else if strings.HasPrefix(glm46ThinkingOpenTag, trimmed) {
 			// Partial opening tag seen, keep accumulating
 			return events, false
 		} else if trimmed == "" {
 			// Only whitespace, keep accumulating
 			return events, false
 		} else {
 			// No thinking tag found, skip to content collection
 			p.state = glm46ParserState_CollectingContent
 			// Don't trim - we want to keep the original content
 			return events, true
 		}
 	case glm46ParserState_ThinkingStartedEatingWhitespace:
 		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingThinking)
 	case glm46ParserState_CollectingThinking:
 		acc := p.buffer.String()
 		if strings.Contains(acc, glm46ThinkingCloseTag) {
 			thinking, remaining := glm46SplitAtTag(p, glm46ThinkingCloseTag, true)
 			if len(thinking) > 0 {
 				events = append(events, glm46EventThinkingContent{content: thinking})
 			}
 			if remaining == "" {
 				p.state = glm46ParserState_ThinkingDoneEatingWhitespace
 			} else {
 				p.state = glm46ParserState_CollectingContent
 			}
 			return events, true
 		} else if overlapLen := overlap(acc, glm46ThinkingCloseTag); overlapLen > 0 {
 			// Partial closing tag - withhold it along with any trailing whitespace before it
 			beforePartialTag := acc[:len(acc)-overlapLen]
 			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
 			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
 			unambiguous := acc[:ambiguousStart]
 			ambiguous := acc[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, glm46EventThinkingContent{content: unambiguous})
 			}
 			return events, false
 		} else {
 			// Pure thinking content - withhold trailing whitespace (might precede closing tag)
 			whitespaceLen := trailingWhitespaceLen(acc)
 			ambiguousStart := len(acc) - whitespaceLen
 			unambiguous := acc[:ambiguousStart]
 			ambiguous := acc[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, glm46EventThinkingContent{content: unambiguous})
 			}
 			return events, false
 		}
 	case glm46ParserState_ThinkingDoneEatingWhitespace:
 		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingContent)
 	case glm46ParserState_CollectingContent:
 		if strings.Contains(p.buffer.String(), glm46ToolOpenTag) {
 			before, after := glm46SplitAtTag(p, glm46ToolOpenTag, true)
 			if len(before) > 0 {
 				events = append(events, glm46EventContent{content: before})
 			}
 			if after == "" {
 				p.state = glm46ParserState_ToolStartedEatingWhitespace
 			} else {
 				p.state = glm46ParserState_CollectingToolContent
 			}
 			return events, true
 		} else if overlapLen := overlap(p.buffer.String(), glm46ToolOpenTag); overlapLen > 0 {
 			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
 			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
 			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
 			unambiguous := p.buffer.String()[:ambiguousStart]
 			ambiguous := p.buffer.String()[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, glm46EventContent{content: unambiguous})
 			}
 			return events, false
 		} else {
 			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
 			ambiguousStart := len(p.buffer.String()) - whitespaceLen
 			unambiguous := p.buffer.String()[:ambiguousStart]
 			ambiguous := p.buffer.String()[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, glm46EventContent{content: unambiguous})
 			}
 			return events, false
 		}
 	case glm46ParserState_ToolStartedEatingWhitespace:
 		return p.eatLeadingWhitespaceAndTransitionTo(glm46ParserState_CollectingToolContent)
 	case glm46ParserState_CollectingToolContent:
 		acc := p.buffer.String()
 		if strings.Contains(acc, glm46ToolCloseTag) {
 			toolContent, _ := glm46SplitAtTag(p, glm46ToolCloseTag, true)
 			if len(toolContent) == 0 {
 				slog.Warn("glm46 tool call closing tag found but no content before it")
 			}
 			events = append(events, glm46EventRawToolCall{raw: toolContent})
 			p.state = glm46ParserState_CollectingContent
 			return events, true
 		} else {
 			// Keep accumulating - tool calls are not streamed
 			// We just wait for the closing tag
 			return events, false
 		}
 	default:
 		panic("unreachable")
 	}
 }
 // GLMToolCallXML represents the structure of a GLM-4.6 tool call for XML parsing
 type GLMToolCallXML struct {
 	XMLName xml.Name `xml:"tool_call"`
 	Content string   `xml:",chardata"` // Function name (text nodes between tags)
 	Keys    []string `xml:"arg_key"`   // All arg_key elements in document order
 	Values  []string `xml:"arg_value"` // All arg_value elements in document order
 }
 // escapeGLM46Content escapes XML entities in text content while preserving arg_key/arg_value tags
 func escapeGLM46Content(s string) string {
 	var result strings.Builder
 	inTag := false
 	for i := range len(s) {
 		ch := s[i]
 		if ch == '<' {
 			// Check if this is a known tag
 			if strings.HasPrefix(s[i:], "<arg_key>") ||
 				strings.HasPrefix(s[i:], "</arg_key>") ||
 				strings.HasPrefix(s[i:], "<arg_value>") ||
 				strings.HasPrefix(s[i:], "</arg_value>") {
 				inTag = true
 			}
 		}
 		if inTag {
 			result.WriteByte(ch)
 			if ch == '>' {
 				inTag = false
 			}
 		} else {
 			// Escape special characters in text content
 			switch ch {
 			case '&':
 				result.WriteString("&amp;")
 			case '<':
 				result.WriteString("&lt;")
 			case '>':
 				result.WriteString("&gt;")
 			default:
 				result.WriteByte(ch)
 			}
 		}
 	}
 	return result.String()
 }
 func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
 	// Escape any unescaped entities in text content
 	// We need to escape text between tags, but not the tags themselves
 	escaped := escapeGLM46Content(raw.raw)
 	// Wrap the content in a root element to make it valid XML
 	xmlString := "<tool_call>" + escaped + "</tool_call>"
 	// Parse XML into struct
 	var parsed GLMToolCallXML
 	if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
 		return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
 	}
 	// Extract and trim function name
 	functionName := strings.TrimSpace(parsed.Content)
 	if functionName == "" {
 		return api.ToolCall{}, fmt.Errorf("empty function name")
 	}
 	// Verify keys and values are paired correctly
 	if len(parsed.Keys) != len(parsed.Values) {
 		return api.ToolCall{}, fmt.Errorf("mismatched arg_key and arg_value counts: %d keys, %d values", len(parsed.Keys), len(parsed.Values))
 	}
 	// Find the matching tool to get parameter types
 	var matchedTool *api.Tool
 	for i := range tools {
 		if tools[i].Function.Name == functionName {
 			matchedTool = &tools[i]
 			break
 		}
 	}
 	// Build arguments map by pairing keys and values
 	toolCall := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name:      functionName,
 			Arguments: api.NewToolCallFunctionArguments(),
 		},
 	}
 	for i := range parsed.Keys {
 		key := strings.TrimSpace(parsed.Keys[i])
 		value := parsed.Values[i] // Don't trim here - parseValue handles it
 		// Look up parameter type
 		var paramType api.PropertyType
 		if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
 			if prop, ok := matchedTool.Function.Parameters.Properties.Get(key); ok {
 				// Handle anyOf by collecting all types from the union
 				if len(prop.AnyOf) > 0 {
 					for _, anyOfProp := range prop.AnyOf {
 						paramType = append(paramType, anyOfProp.Type...)
 					}
 				} else {
 					paramType = prop.Type
 				}
 			}
 		}
 		// Parse value with type coercion
 		toolCall.Function.Arguments.Set(key, parseValue(value, paramType))
 	}
 	return toolCall, nil
 }
--- a/model/parsers/glm46_test.go
+++ b/model/parsers/glm46_test.go
@@ -1,862 +0,0 @@
 package parsers
 import (
 	"encoding/xml"
 	"reflect"
 	"testing"
 	"github.com/ollama/ollama/api"
 )
 func TestGLM46ParserStreaming(t *testing.T) {
 	type step struct {
 		input      string
 		wantEvents []glm46Event
 	}
 	cases := []struct {
 		desc  string
 		steps []step
 		only  bool
 	}{
 		{
 			desc: "leading whitespace before think tag",
 			steps: []step{
 				{
 					input:      "   \n\t  ",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input:      "<think>thinking</think>",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "thinking"}},
 				},
 			},
 		},
 		{
 			desc: "think tag with whitespace inside",
 			steps: []step{
 				{
 					input: "<think>  \n  thinking content  \n  </think>regular content",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking content"},
 						glm46EventContent{content: "regular content"},
 					},
 				},
 			},
 		},
 		{
 			desc: "tool call with leading whitespace after opening tag",
 			steps: []step{
 				{
 					input: "<think></think><tool_call>  \n  test  \n  </tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventRawToolCall{raw: "test"},
 					},
 				},
 			},
 		},
 		{
 			desc: "simple thinking then content",
 			steps: []step{
 				{
 					input: "<think>I am thinking</think>Now I respond",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "I am thinking"},
 						glm46EventContent{content: "Now I respond"},
 					},
 				},
 			},
 		},
 		{
 			desc: "streamed thinking content",
 			steps: []step{
 				{
 					input:      "<think>hello",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "hello"}},
 				},
 				{
 					input:      " world",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: " world"}},
 				},
 				{
 					input: "</think>content",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "content"},
 					},
 				},
 			},
 		},
 		{
 			desc: "content before tool call",
 			steps: []step{
 				{
 					input: "<think>Let me call a tool</think>here is text<tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "Let me call a tool"},
 						glm46EventContent{content: "here is text"},
 					},
 				},
 				{
 					input: "function_name\n<arg_key>param</arg_key>\n<arg_value>value</arg_value>\n</tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventRawToolCall{raw: "function_name\n<arg_key>param</arg_key>\n<arg_value>value</arg_value>"},
 					},
 				},
 			},
 		},
 		{
 			desc: "tool call with content after",
 			steps: []step{
 				{
 					input: "<think>thinking</think><tool_call>test</tool_call>after tool",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 						glm46EventRawToolCall{raw: "test"},
 						glm46EventContent{content: "after tool"},
 					},
 				},
 			},
 		},
 		{
 			desc: "trailing whitespace between content and tool call is trimmed",
 			steps: []step{
 				{
 					input: "<think>thinking</think>content\n  \t  <tool_call>test</tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 						glm46EventContent{content: "content"},
 						glm46EventRawToolCall{raw: "test"},
 					},
 				},
 			},
 		},
 		{
 			desc: "trailing whitespace between tool call and content is trimmed",
 			steps: []step{
 				{
 					input: "<think>think</think><tool_call>test</tool_call>\n\t  after",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "think"},
 						glm46EventRawToolCall{raw: "test"},
 						glm46EventContent{content: "after"},
 					},
 				},
 			},
 		},
 		{
 			desc: "split thinking close tag",
 			steps: []step{
 				{
 					input:      "<think>thinking content</th",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "thinking content"}},
 				},
 				{
 					input: "ink>after",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "after"},
 					},
 				},
 			},
 		},
 		{
 			desc: "split thinking open tag",
 			steps: []step{
 				{
 					input:      "  <thi",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input:      "nk>content</think>",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "content"}},
 				},
 			},
 		},
 		{
 			desc: "split tool open tag",
 			steps: []step{
 				{
 					input:      "<think>think</think>content<tool",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "think"}, glm46EventContent{content: "content"}},
 				},
 				{
 					input:      "_call>inside",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input: "</tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventRawToolCall{raw: "inside"},
 					},
 				},
 			},
 		},
 		{
 			desc: "partial thinking close tag fakeout",
 			steps: []step{
 				{
 					input:      "<think>content</th",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "content"}},
 				},
 				{
 					input:      "ought more",
 					wantEvents: []glm46Event{glm46EventThinkingContent{content: "</thought more"}},
 				},
 			},
 		},
 		{
 			desc: "partial thinking open tag fakeout",
 			steps: []step{
 				{
 					input:      "  <thi",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input: "nking is fun",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "  <thinking is fun"},
 					},
 				},
 			},
 		},
 		{
 			desc: "partial tool open tag fakeout",
 			steps: []step{
 				{
 					input: "<think></think>content\n<tool",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "content"},
 					},
 				},
 				{
 					input: " fakeout",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "\n<tool fakeout"},
 					},
 				},
 			},
 		},
 		{
 			desc: "partial tool close tag fakeout",
 			steps: []step{
 				{
 					input:      "<think></think><tool_call>content</tool",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input:      " fakeout",
 					wantEvents: []glm46Event{},
 				},
 				{
 					input: "</tool_call>",
 					wantEvents: []glm46Event{
 						glm46EventRawToolCall{raw: "content</tool fakeout"},
 					},
 				},
 			},
 		},
 		{
 			desc: "empty thinking tag",
 			steps: []step{
 				{
 					input: "<think></think>content here",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "content here"},
 					},
 				},
 			},
 		},
 		{
 			desc: "multiple tool calls in sequence",
 			steps: []step{
 				{
 					input: "<think>think</think><tool_call>first</tool_call>between<tool_call>second</tool_call>end",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "think"},
 						glm46EventRawToolCall{raw: "first"},
 						glm46EventContent{content: "between"},
 						glm46EventRawToolCall{raw: "second"},
 						glm46EventContent{content: "end"},
 					},
 				},
 			},
 		},
 		{
 			desc: "no thinking tag - direct to content",
 			steps: []step{
 				{
 					input: "just content here",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "just content here"},
 					},
 				},
 			},
 		},
 		{
 			desc: "no thinking tag - skip to content then tool call",
 			steps: []step{
 				{
 					input: "Here's the answer:<tool_call>test</tool_call>done",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "Here's the answer:"},
 						glm46EventRawToolCall{raw: "test"},
 						glm46EventContent{content: "done"},
 					},
 				},
 			},
 		},
 		{
 			desc: "no thinking tag - whitespace preserved when no tags",
 			steps: []step{
 				{
 					input: "  \n  content with leading whitespace",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "  \n  content with leading whitespace"},
 					},
 				},
 			},
 		},
 		{
 			desc: "whitespace after think close tag gets eaten",
 			steps: []step{
 				{
 					input: "<think>thinking</think>  \n\t  content",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 						glm46EventContent{content: "content"},
 					},
 				},
 			},
 		},
 		{
 			desc: "whitespace after tool_call close tag gets eaten",
 			steps: []step{
 				{
 					input: "<think></think><tool_call>test</tool_call>  \n\t  content",
 					wantEvents: []glm46Event{
 						glm46EventRawToolCall{raw: "test"},
 						glm46EventContent{content: "content"},
 					},
 				},
 			},
 		},
 		{
 			desc: "thinking content withholds trailing whitespace (single chunk)",
 			steps: []step{
 				{
 					input: "<think>thinking content   ",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking content"},
 					},
 				},
 				{
 					input: "</think>after",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "after"},
 					},
 				},
 			},
 		},
 		{
 			desc: "thinking content withholds trailing whitespace with newlines",
 			steps: []step{
 				{
 					input: "<think>thinking\n\n  ",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 					},
 				},
 				{
 					input: "</think>content",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "content"},
 					},
 				},
 			},
 		},
 		{
 			desc: "thinking content trailing whitespace emitted when more content arrives",
 			steps: []step{
 				{
 					input: "<think>thinking   ",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 					},
 				},
 				{
 					input: "more thinking",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "   more thinking"},
 					},
 				},
 				{
 					input:      "</think>",
 					wantEvents: []glm46Event{},
 				},
 			},
 		},
 		{
 			desc: "thinking content withholds trailing whitespace before partial close tag",
 			steps: []step{
 				{
 					input: "<think>thinking   </th",
 					wantEvents: []glm46Event{
 						glm46EventThinkingContent{content: "thinking"},
 					},
 				},
 				{
 					input: "ink>content",
 					wantEvents: []glm46Event{
 						glm46EventContent{content: "content"},
 					},
 				},
 			},
 		},
 	}
 	anyOnlies := false
 	for _, tc := range cases {
 		if tc.only {
 			anyOnlies = true
 		}
 	}
 	for _, tc := range cases {
 		if anyOnlies && !tc.only {
 			continue
 		}
 		t.Run(tc.desc, func(t *testing.T) {
 			parser := GLM46Parser{}
 			for i, step := range tc.steps {
 				parser.buffer.WriteString(step.input)
 				gotEvents := parser.parseEvents()
 				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
 					// avoid deep equal on empty vs. nil slices
 					continue
 				}
 				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
 					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
 				}
 			}
 		})
 	}
 }
 // TestGLMToolCallXMLOrderPreservation verifies that xml.Unmarshal preserves
 // document order when collecting multiple elements with the same tag name into slices.
 // This is a critical assumption for the GLM-4.6 parser's struct-based approach.
 func TestGLMToolCallXMLOrderPreservation(t *testing.T) {
 	testCases := []struct {
 		name       string
 		xml        string
 		wantKeys   []string
 		wantValues []string
 	}{
 		{
 			name: "alternating keys and values",
 			xml: `<tool_call>
 function_name
 <arg_key>first</arg_key>
 <arg_value>A</arg_value>
 <arg_key>second</arg_key>
 <arg_value>B</arg_value>
 <arg_key>third</arg_key>
 <arg_value>C</arg_value>
 </tool_call>`,
 			wantKeys:   []string{"first", "second", "third"},
 			wantValues: []string{"A", "B", "C"},
 		},
 		{
 			name: "all keys then all values",
 			xml: `<tool_call>
 function_name
 <arg_key>key1</arg_key>
 <arg_key>key2</arg_key>
 <arg_key>key3</arg_key>
 <arg_value>val1</arg_value>
 <arg_value>val2</arg_value>
 <arg_value>val3</arg_value>
 </tool_call>`,
 			wantKeys:   []string{"key1", "key2", "key3"},
 			wantValues: []string{"val1", "val2", "val3"},
 		},
 		{
 			name: "mixed grouping",
 			xml: `<tool_call>
 function_name
 <arg_key>a</arg_key>
 <arg_value>1</arg_value>
 <arg_key>b</arg_key>
 <arg_key>c</arg_key>
 <arg_value>2</arg_value>
 <arg_value>3</arg_value>
 </tool_call>`,
 			wantKeys:   []string{"a", "b", "c"},
 			wantValues: []string{"1", "2", "3"},
 		},
 		{
 			name: "reverse order - all values then all keys",
 			xml: `<tool_call>
 function_name
 <arg_value>X</arg_value>
 <arg_value>Y</arg_value>
 <arg_value>Z</arg_value>
 <arg_key>x</arg_key>
 <arg_key>y</arg_key>
 <arg_key>z</arg_key>
 </tool_call>`,
 			wantKeys:   []string{"x", "y", "z"},
 			wantValues: []string{"X", "Y", "Z"},
 		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			var parsed GLMToolCallXML
 			err := xml.Unmarshal([]byte(tc.xml), &parsed)
 			if err != nil {
 				t.Fatalf("failed to unmarshal XML: %v", err)
 			}
 			if !reflect.DeepEqual(parsed.Keys, tc.wantKeys) {
 				t.Errorf("Keys order mismatch:\ngot:  %v\nwant: %v", parsed.Keys, tc.wantKeys)
 			}
 			if !reflect.DeepEqual(parsed.Values, tc.wantValues) {
 				t.Errorf("Values order mismatch:\ngot:  %v\nwant: %v", parsed.Values, tc.wantValues)
 			}
 		})
 	}
 }
 func TestGLM46ToolCallParsing(t *testing.T) {
 	type testCase struct {
 		name         string
 		rawToolCall  string
 		tools        []api.Tool
 		wantToolCall api.ToolCall
 	}
 	cases := []testCase{
 		{
 			name:  "simple tool call",
 			tools: []api.Tool{},
 			rawToolCall: `get-current-weather
 <arg_key>location</arg_key>
 <arg_value>New York, NY</arg_value>
 <arg_key>unit</arg_key>
 <arg_value>celsius</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "get-current-weather",
 					Arguments: args(`{"location": "New York, NY", "unit": "celsius"}`),
 				},
 			},
 		},
 		{
 			name: "tool call with typed parameters",
 			tools: []api.Tool{
 				tool("calculate", map[string]api.ToolProperty{
 					"x":       {Type: api.PropertyType{"number"}},
 					"y":       {Type: api.PropertyType{"integer"}},
 					"enabled": {Type: api.PropertyType{"boolean"}},
 					"items":   {Type: api.PropertyType{"array"}},
 				}),
 			},
 			rawToolCall: `calculate
 <arg_key>x</arg_key>
 <arg_value>3.14</arg_value>
 <arg_key>y</arg_key>
 <arg_value>42</arg_value>
 <arg_key>enabled</arg_key>
 <arg_value>true</arg_value>
 <arg_key>items</arg_key>
 <arg_value>["a", "b", "c"]</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "calculate",
 					Arguments: args(`{"enabled": true, "items": ["a", "b", "c"], "x": 3.14, "y": 42}`),
 				},
 			},
 		},
 		{
 			name:  "function name with whitespace",
 			tools: []api.Tool{},
 			rawToolCall: `  get-weather
 <arg_key>city</arg_key>
 <arg_value>Paris</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "get-weather",
 					Arguments: args(`{"city": "Paris"}`),
 				},
 			},
 		},
 		{
 			name:  "values with special characters",
 			tools: []api.Tool{},
 			rawToolCall: `execute-command
 <arg_key>command</arg_key>
 <arg_value>ls && echo "done"</arg_value>
 <arg_key>message</arg_key>
 <arg_value>a < b and c > d</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "execute-command",
 					Arguments: args(`{"command": "ls && echo \"done\"", "message": "a < b and c > d"}`),
 				},
 			},
 		},
 		{
 			name:  "unicode in function names and values",
 			tools: []api.Tool{},
 			rawToolCall: `获取天气
 <arg_key>城市</arg_key>
 <arg_value>北京</arg_value>
 <arg_key>message</arg_key>
 <arg_value>Hello! 你好! 🌟</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "获取天气",
 					Arguments: args(`{"message": "Hello! 你好! 🌟", "城市": "北京"}`),
 				},
 			},
 		},
 		{
 			name:  "empty value",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>param1</arg_key>
 <arg_value></arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"param1": ""}`),
 				},
 			},
 		},
 		{
 			name:  "special chars in arg_key names",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>param<1></arg_key>
 <arg_value>value1</arg_value>
 <arg_key>a&b</arg_key>
 <arg_value>value2</arg_value>
 <arg_key>x>y</arg_key>
 <arg_value>value3</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"a&b": "value2", "param<1>": "value1", "x>y": "value3"}`),
 				},
 			},
 		},
 		{
 			name:  "multiple consecutive ampersands",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>param</arg_key>
 <arg_value>test &&&& more</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"param": "test &&&& more"}`),
 				},
 			},
 		},
 		{
 			name:  "mixed special chars together",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>param</arg_key>
 <arg_value><>&<>&</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"param": "<>&<>&"}`),
 				},
 			},
 		},
 		{
 			name:  "newlines and tabs in parameter values",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>multiline</arg_key>
 <arg_value>line1
 	indented line2
 line3</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"multiline": "line1\n\tindented line2\nline3"}`),
 				},
 			},
 		},
 		{
 			name:  "single and double quotes in values",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>quotes</arg_key>
 <arg_value>She said "Hello's there!"</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"quotes": "She said \"Hello's there!\""}`),
 				},
 			},
 		},
 		{
 			name:  "CDATA-like content that should be treated as text",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>cdata</arg_key>
 <arg_value><![CDATA[not actual cdata]]></arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"cdata": "<![CDATA[not actual cdata]]>"}`),
 				},
 			},
 		},
 		{
 			name:  "all special XML entities",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>entities</arg_key>
 <arg_value>&lt;&gt;&amp;&apos;&quot;</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"entities": "&lt;&gt;&amp;&apos;&quot;"}`),
 				},
 			},
 		},
 		{
 			name:  "order preservation with multiple parameters",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>first</arg_key>
 <arg_value>value1</arg_value>
 <arg_key>second</arg_key>
 <arg_value>value2</arg_value>
 <arg_key>third</arg_key>
 <arg_value>value3</arg_value>
 <arg_key>fourth</arg_key>
 <arg_value>value4</arg_value>
 <arg_key>fifth</arg_key>
 <arg_value>value5</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test-function",
 					Arguments: args(`{"fifth": "value5", "first": "value1", "fourth": "value4", "second": "value2", "third": "value3"}`),
 				},
 			},
 		},
 		{
 			name:  "order preservation with identical key names but different positions",
 			tools: []api.Tool{},
 			rawToolCall: `test-function
 <arg_key>param</arg_key>
 <arg_value>first occurrence</arg_value>
 <arg_key>other</arg_key>
 <arg_value>middle</arg_value>
 <arg_key>param</arg_key>
 <arg_value>second occurrence</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name: "test-function",
 					// Later occurrence should overwrite earlier one
 					Arguments: args(`{"other": "middle", "param": "second occurrence"}`),
 				},
 			},
 		},
 		{
 			name: "array with mixed types",
 			tools: []api.Tool{
 				tool("process", map[string]api.ToolProperty{
 					"items": {Type: api.PropertyType{"array"}},
 				}),
 			},
 			rawToolCall: `process
 <arg_key>items</arg_key>
 <arg_value>[1, "hello", true, null]</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "process",
 					Arguments: args(`{"items": [1, "hello", true, null]}`),
 				},
 			},
 		},
 		{
 			name: "empty array",
 			tools: []api.Tool{
 				tool("test", map[string]api.ToolProperty{
 					"tags": {Type: api.PropertyType{"array"}},
 				}),
 			},
 			rawToolCall: `test
 <arg_key>tags</arg_key>
 <arg_value>[]</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "test",
 					Arguments: args(`{"tags": []}`),
 				},
 			},
 		},
 		{
 			name: "anyOf array or string - with array of objects",
 			tools: []api.Tool{
 				tool("TodoWrite", map[string]api.ToolProperty{
 					"todos": {AnyOf: []api.ToolProperty{{Type: api.PropertyType{"array"}}, {Type: api.PropertyType{"string"}}}},
 				}),
 			},
 			// <tool_call>TodoWrite
 			// <arg_key>todos</arg_key>
 			// <arg_value>[{"content": "Set up HTML file and basic structure", "id": "1", "priority": "high", "status": "pending"}, {"content": "Create 3D scene with Three.js", "id": "2", "priority": "high", "status": "pending"}, {"content": "Implement terrain generation with blocks", "id": "3", "priority": "high", "status": "pending"}, {"content": "Add player controls (movement, camera)", "id": "4", "priority": "high", "status": "pending"}, {"content": "Implement block placement/destruction", "id": "5", "priority": "medium", "status": "pending"}, {"content": "Add lighting and textures", "id": "6", "priority": "medium", "status": "pending"}, {"content": "Test and optimize performance", "id": "7", "priority": "low", "status": "pending"}]</arg_value>
 			// </tool_call>
 			rawToolCall: `TodoWrite
 <arg_key>todos</arg_key>
 <arg_value>[{"content": "task 1", "status": "pending", "priority": "high", "id": "1"}, {"content": "task 2", "status": "completed", "priority": "low", "id": "2"}]</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "TodoWrite",
 					Arguments: args(`{"todos": [{"content": "task 1", "id": "1", "priority": "high", "status": "pending"}, {"content": "task 2", "id": "2", "priority": "low", "status": "completed"}]}`),
 				},
 			},
 		},
 		{
 			name: "anyOf array or string - with plain string",
 			tools: []api.Tool{
 				tool("TodoWrite", map[string]api.ToolProperty{
 					"todos": {Type: api.PropertyType{"array", "string"}},
 				}),
 			},
 			rawToolCall: `TodoWrite
 <arg_key>todos</arg_key>
 <arg_value>Error: could not load todos</arg_value>`,
 			wantToolCall: api.ToolCall{
 				Function: api.ToolCallFunction{
 					Name:      "TodoWrite",
 					Arguments: args(`{"todos": "Error: could not load todos"}`),
 				},
 			},
 		},
 	}
 	for i, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
 			gotToolCall, err := parseGLM46ToolCall(glm46EventRawToolCall{raw: tc.rawToolCall}, tc.tools)
 			if err != nil {
 				t.Errorf("case %d (%s): %v", i, tc.name, err)
 			}
 			if !toolCallEqual(gotToolCall, tc.wantToolCall) {
 				t.Errorf("case %d (%s): got tool call %#v, want %#v", i, tc.name, gotToolCall, tc.wantToolCall)
 			}
 		})
 	}
 }
--- a/model/parsers/glm47.go
+++ b/model/parsers/glm47.go
@@ -1,20 +0,0 @@
 package parsers
 import "github.com/ollama/ollama/api"
 // GLM47Parser extends GLM46Parser with thinking-aware initialization.
 // GLM-4.7's prompt ends with <think> when thinking is enabled, so the parser
 // must start in CollectingThinking state (the model outputs thinking content directly).
 type GLM47Parser struct {
 	GLM46Parser
 }
 func (p *GLM47Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
 	p.tools = tools
 	// When thinking is enabled (nil or true), the prompt ends with <think>,
 	// so model output starts directly with thinking content (no opening tag).
 	if thinkValue == nil || thinkValue.Bool() {
 		p.state = glm46ParserState_CollectingThinking
 	}
 	return tools
 }
--- a/model/parsers/glm47_test.go
+++ b/model/parsers/glm47_test.go
@@ -1,99 +0,0 @@
 package parsers
 import (
 	"reflect"
 	"testing"
 	"github.com/ollama/ollama/api"
 )
 func TestGLM47ParserAdd(t *testing.T) {
 	parser := GLM47Parser{}
 	parser.Init([]api.Tool{
 		tool("calculate", map[string]api.ToolProperty{
 			"count":   {Type: api.PropertyType{"integer"}},
 			"enabled": {Type: api.PropertyType{"boolean"}},
 		}),
 	}, nil, nil)
 	// When thinking is enabled (thinkValue nil), the prompt ends with <think>,
 	// so the model output does NOT include the opening <think> tag.
 	content, thinking, calls, err := parser.Add("plan</think>Answer<tool_call>calculate<arg_key>count</arg_key><arg_value>3</arg_value><arg_key>enabled</arg_key><arg_value>true</arg_value></tool_call>", true)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
 	}
 	if thinking != "plan" {
 		t.Fatalf("expected thinking 'plan', got %q", thinking)
 	}
 	if content != "Answer" {
 		t.Fatalf("expected content 'Answer', got %q", content)
 	}
 	if len(calls) != 1 {
 		t.Fatalf("expected 1 tool call, got %d", len(calls))
 	}
 	expectedArgs := args(`{"count": 3, "enabled": true}`)
 	if !toolCallEqual(api.ToolCall{Function: api.ToolCallFunction{Arguments: calls[0].Function.Arguments}}, api.ToolCall{Function: api.ToolCallFunction{Arguments: expectedArgs}}) {
 		t.Fatalf("expected args %#v, got %#v", expectedArgs.ToMap(), calls[0].Function.Arguments.ToMap())
 	}
 }
 func TestGLM47ParserNoThinkingContent(t *testing.T) {
 	parser := GLM47Parser{}
 	parser.Init(nil, nil, nil)
 	// When thinking is enabled but model has no thinking to output,
 	// it should output </think> immediately followed by content.
 	content, thinking, calls, err := parser.Add("</think>Plain answer", true)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
 	}
 	if thinking != "" {
 		t.Fatalf("expected empty thinking, got %q", thinking)
 	}
 	if content != "Plain answer" {
 		t.Fatalf("expected content 'Plain answer', got %q", content)
 	}
 	if len(calls) != 0 {
 		t.Fatalf("expected no tool calls, got %d", len(calls))
 	}
 }
 func TestGLM47ParserThinkingDisabled(t *testing.T) {
 	parser := GLM47Parser{}
 	// When thinking is disabled, parser stays in LookingForThinkingOpen state
 	parser.Init(nil, nil, &api.ThinkValue{Value: false})
 	// Model outputs plain content (prompt ended with </think>)
 	content, thinking, calls, err := parser.Add("Plain answer", true)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
 	}
 	if thinking != "" {
 		t.Fatalf("expected empty thinking, got %q", thinking)
 	}
 	if content != "Plain answer" {
 		t.Fatalf("expected content 'Plain answer', got %q", content)
 	}
 	if len(calls) != 0 {
 		t.Fatalf("expected no tool calls, got %d", len(calls))
 	}
 }
 func TestGLM47ParserToolCallEscaping(t *testing.T) {
 	toolCall, err := parseGLM46ToolCall(glm46EventRawToolCall{raw: `exec
 <arg_key>expr</arg_key>
 <arg_value>a < b && c > d</arg_value>`}, nil)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
 	}
 	expected := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name:      "exec",
 			Arguments: args(`{"expr": "a < b && c > d"}`),
 		},
 	}
 	if !reflect.DeepEqual(toolCall, expected) {
 		t.Fatalf("expected %#v, got %#v", expected, toolCall)
 	}
 }
--- a/model/parsers/lfm2.go
+++ b/model/parsers/lfm2.go
@@ -1,498 +0,0 @@
 package parsers
 import (
 	"encoding/json"
 	"errors"
 	"log/slog"
 	"strconv"
 	"strings"
 	"unicode"
 	"github.com/ollama/ollama/api"
 )
 type LFM2ParserState int
 const (
 	LFM2CollectingThinking LFM2ParserState = iota
 	LFM2CollectingContent
 	LFM2CollectingToolCalls
 )
 const (
 	lfm2ThinkingOpenTag  = "<think>"
 	lfm2ThinkingCloseTag = "</think>"
 	lfm2ToolCallStartTag = "<|tool_call_start|>"
 	lfm2ToolCallEndTag   = "<|tool_call_end|>"
 )
 type LFM2Parser struct {
 	state                    LFM2ParserState
 	buffer                   strings.Builder
 	hasThinkingSupport       bool
 	needsThinkingLeadingTrim bool // trim leading whitespace after <think> tag
 	needsContentLeadingTrim  bool // trim leading whitespace after </think> tag
 }
 func (p *LFM2Parser) HasToolSupport() bool {
 	return true
 }
 func (p *LFM2Parser) HasThinkingSupport() bool {
 	return p.hasThinkingSupport
 }
 func (p *LFM2Parser) setInitialState(lastMessage *api.Message, thinkValue *api.ThinkValue) {
 	prefill := lastMessage != nil && lastMessage.Role == "assistant"
 	// Check both model capability AND request preference
 	thinkingEnabled := p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool())
 	if !thinkingEnabled {
 		p.state = LFM2CollectingContent
 		return
 	}
 	if prefill && lastMessage.Content != "" {
 		p.state = LFM2CollectingContent
 		return
 	}
 	p.state = LFM2CollectingThinking
 	p.needsThinkingLeadingTrim = true
 }
 func (p *LFM2Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
 	p.setInitialState(lastMessage, thinkValue)
 	return tools
 }
 type lfm2Event interface {
 	isLFM2Event()
 }
 type lfm2EventThinkingContent struct {
 	content string
 }
 type lfm2EventContent struct {
 	content string
 }
 type lfm2EventToolCall struct {
 	toolCall api.ToolCall
 }
 func (lfm2EventThinkingContent) isLFM2Event() {}
 func (lfm2EventContent) isLFM2Event()         {}
 func (lfm2EventToolCall) isLFM2Event()        {}
 func (p *LFM2Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
 	p.buffer.WriteString(s)
 	events := p.parseEvents()
 	var toolCalls []api.ToolCall
 	var contentSb strings.Builder
 	var thinkingSb strings.Builder
 	for _, event := range events {
 		switch event := event.(type) {
 		case lfm2EventToolCall:
 			toolCalls = append(toolCalls, event.toolCall)
 		case lfm2EventThinkingContent:
 			thinkingSb.WriteString(event.content)
 		case lfm2EventContent:
 			contentSb.WriteString(event.content)
 		}
 	}
 	return contentSb.String(), thinkingSb.String(), toolCalls, nil
 }
 func (p *LFM2Parser) parseEvents() []lfm2Event {
 	var all []lfm2Event
 	keepLooping := true
 	for keepLooping {
 		var events []lfm2Event
 		events, keepLooping = p.eat()
 		if len(events) > 0 {
 			all = append(all, events...)
 		}
 	}
 	return all
 }
 func (p *LFM2Parser) eat() ([]lfm2Event, bool) {
 	var events []lfm2Event
 	bufStr := p.buffer.String()
 	if bufStr == "" {
 		return events, false
 	}
 	switch p.state {
 	case LFM2CollectingThinking:
 		// Strip opening <think> tag if present
 		if strings.HasPrefix(bufStr, lfm2ThinkingOpenTag) {
 			bufStr = bufStr[len(lfm2ThinkingOpenTag):]
 			p.needsThinkingLeadingTrim = true
 			p.buffer.Reset()
 			p.buffer.WriteString(bufStr)
 		}
 		// Trim leading whitespace after <think> tag (may span multiple chunks)
 		if p.needsThinkingLeadingTrim {
 			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
 				bufStr = trimmed
 				p.buffer.Reset()
 				p.buffer.WriteString(bufStr)
 			}
 			// Clear flag once we have non-whitespace content or buffer is empty
 			if len(bufStr) > 0 {
 				p.needsThinkingLeadingTrim = false
 			}
 		}
 		if strings.Contains(bufStr, lfm2ThinkingCloseTag) { // thinking[</think>] -> content
 			split := strings.SplitN(bufStr, lfm2ThinkingCloseTag, 2)
 			thinking := split[0]
 			thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
 			remaining := split[1]
 			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
 			p.buffer.Reset()
 			p.buffer.WriteString(remaining)
 			p.state = LFM2CollectingContent
 			p.needsThinkingLeadingTrim = false
 			// Set flag to trim any additional whitespace that may arrive in later chunks
 			p.needsContentLeadingTrim = len(remaining) == 0
 			if len(thinking) > 0 {
 				events = append(events, lfm2EventThinkingContent{content: thinking})
 			}
 			return events, true
 		} else if overlapLen := overlap(bufStr, lfm2ThinkingCloseTag); overlapLen > 0 { // partial </think>
 			beforePartialTag := bufStr[:len(bufStr)-overlapLen]
 			trailingLen := trailingWhitespaceLen(beforePartialTag)
 			ambiguousStart := len(beforePartialTag) - trailingLen
 			unambiguous := bufStr[:ambiguousStart]
 			ambiguous := bufStr[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, lfm2EventThinkingContent{content: unambiguous})
 			}
 			return events, false
 		} else { // otherwise its thinking content
 			whitespaceLen := trailingWhitespaceLen(bufStr)
 			ambiguousStart := len(bufStr) - whitespaceLen
 			unambiguous := bufStr[:ambiguousStart]
 			ambiguous := bufStr[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
 			if len(unambiguous) > 0 {
 				events = append(events, lfm2EventThinkingContent{content: unambiguous})
 			}
 			return events, false
 		}
 	case LFM2CollectingContent:
 		// Trim leading whitespace after </think> tag (may span multiple chunks)
 		if p.needsContentLeadingTrim {
 			if trimmed := strings.TrimLeftFunc(bufStr, unicode.IsSpace); trimmed != bufStr {
 				bufStr = trimmed
 				p.buffer.Reset()
 				p.buffer.WriteString(bufStr)
 			}
 			// Clear flag once we have non-whitespace content
 			if len(bufStr) > 0 {
 				p.needsContentLeadingTrim = false
 			}
 		}
 		if strings.Contains(bufStr, lfm2ToolCallStartTag) { // content[<|tool_call_start|>] -> tool calls
 			split := strings.SplitN(bufStr, lfm2ToolCallStartTag, 2)
 			contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
 			remaining := split[1]
 			p.buffer.Reset()
 			p.buffer.WriteString(remaining)
 			p.state = LFM2CollectingToolCalls
 			if len(contentBefore) > 0 {
 				events = append(events, lfm2EventContent{content: contentBefore})
 			}
 			return events, true
 		} else { // otherwise its content
 			p.buffer.Reset()
 			if len(bufStr) > 0 {
 				events = append(events, lfm2EventContent{content: bufStr})
 			}
 			return events, false
 		}
 	case LFM2CollectingToolCalls:
 		// Look for complete tool call JSON between tags
 		if idx := strings.Index(bufStr, lfm2ToolCallEndTag); idx != -1 {
 			toolCallContent := bufStr[:idx]
 			if toolCalls, err := p.parseToolCallsContent(toolCallContent); err == nil && len(toolCalls) > 0 {
 				remaining := bufStr[idx+len(lfm2ToolCallEndTag):]
 				// Check if there's another tool call
 				if strings.HasPrefix(remaining, lfm2ToolCallStartTag) {
 					remaining = remaining[len(lfm2ToolCallStartTag):]
 				} else {
 					// No more tool calls, go back to content
 					remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
 					p.state = LFM2CollectingContent
 				}
 				p.buffer.Reset()
 				p.buffer.WriteString(remaining)
 				for _, tc := range toolCalls {
 					events = append(events, lfm2EventToolCall{toolCall: tc})
 				}
 				return events, true
 			} else if err != nil {
 				slog.Warn("lfm2 tool call parsing failed", "error", err, "content", toolCallContent)
 			}
 		}
 		return events, false
 	}
 	return events, false
 }
 // parseToolCallsContent parses one or more tool calls from content
 // Supports JSON format and Python-style format including multiple calls: [func1(...),func2(...)]
 func (p *LFM2Parser) parseToolCallsContent(content string) ([]api.ToolCall, error) {
 	content = strings.TrimSpace(content)
 	// Try JSON format first: {"name": "func", "arguments": {...}}
 	var parsed struct {
 		Name      string          `json:"name"`
 		Arguments json.RawMessage `json:"arguments"`
 	}
 	if err := json.Unmarshal([]byte(content), &parsed); err == nil && parsed.Name != "" {
 		var args api.ToolCallFunctionArguments
 		if len(parsed.Arguments) > 0 {
 			if err := json.Unmarshal(parsed.Arguments, &args); err != nil {
 				return nil, err
 			}
 		} else {
 			args = api.NewToolCallFunctionArguments()
 		}
 		return []api.ToolCall{{
 			Function: api.ToolCallFunction{
 				Name:      parsed.Name,
 				Arguments: args,
 			},
 		}}, nil
 	}
 	// Try Python-style format: [func(arg1='val1'),func2(arg2='val2')] or func(arg1='val1')
 	return p.parsePythonStyleToolCalls(content)
 }
 // parsePythonStyleToolCalls parses one or more Python-style tool calls
 // Examples: [bash(command='ls'),bash(command='pwd')] or bash(command='ls')
 func (p *LFM2Parser) parsePythonStyleToolCalls(content string) ([]api.ToolCall, error) {
 	content = strings.TrimSpace(content)
 	// Strip outer brackets if present: [func(...)] -> func(...)
 	if strings.HasPrefix(content, "[") && strings.HasSuffix(content, "]") {
 		content = content[1 : len(content)-1]
 	}
 	var toolCalls []api.ToolCall
 	// Parse multiple function calls separated by commas at the top level
 	for len(content) > 0 {
 		content = strings.TrimSpace(content)
 		if content == "" {
 			break
 		}
 		// Skip leading comma from previous iteration
 		if strings.HasPrefix(content, ",") {
 			content = strings.TrimSpace(content[1:])
 			if content == "" {
 				break
 			}
 		}
 		// Find function name
 		parenIdx := strings.Index(content, "(")
 		if parenIdx == -1 {
 			return nil, errors.New("invalid tool call: no opening parenthesis")
 		}
 		funcName := strings.TrimSpace(content[:parenIdx])
 		if funcName == "" {
 			return nil, errors.New("invalid tool call: empty function name")
 		}
 		// Find matching closing parenthesis
 		closeIdx := findMatchingParen(content, parenIdx)
 		if closeIdx == -1 {
 			return nil, errors.New("invalid tool call: no matching closing parenthesis")
 		}
 		argsStr := content[parenIdx+1 : closeIdx]
 		args := api.NewToolCallFunctionArguments()
 		if argsStr != "" {
 			if err := parsePythonArgs(argsStr, &args); err != nil {
 				return nil, err
 			}
 		}
 		toolCalls = append(toolCalls, api.ToolCall{
 			Function: api.ToolCallFunction{
 				Name:      funcName,
 				Arguments: args,
 			},
 		})
 		// Move past this function call
 		content = content[closeIdx+1:]
 	}
 	if len(toolCalls) == 0 {
 		return nil, errors.New("no tool calls found")
 	}
 	return toolCalls, nil
 }
 // findMatchingParen finds the index of the closing parenthesis matching the one at openIdx
 // Returns -1 if not found. Handles nested parentheses and quoted strings.
 func findMatchingParen(s string, openIdx int) int {
 	depth := 1
 	i := openIdx + 1
 	for i < len(s) && depth > 0 {
 		switch s[i] {
 		case '(':
 			depth++
 		case ')':
 			depth--
 			if depth == 0 {
 				return i
 			}
 		case '\'', '"':
 			// Skip quoted string
 			quote := s[i]
 			i++
 			for i < len(s) && s[i] != quote {
 				if s[i] == '\\' && i+1 < len(s) {
 					i++ // skip escaped char
 				}
 				i++
 			}
 		}
 		i++
 	}
 	return -1
 }
 // parseToolCallContent parses a single tool call (for backward compatibility with tests)
 func (p *LFM2Parser) parseToolCallContent(content string) (api.ToolCall, error) {
 	calls, err := p.parseToolCallsContent(content)
 	if err != nil {
 		return api.ToolCall{}, err
 	}
 	if len(calls) == 0 {
 		return api.ToolCall{}, errors.New("no tool call found")
 	}
 	return calls[0], nil
 }
 // parsePythonArgs parses Python-style keyword arguments: key='value', key2="value2"
 func parsePythonArgs(argsStr string, args *api.ToolCallFunctionArguments) error {
 	// Simple state machine to parse key='value' pairs
 	// Handles: command='ls', flag="-la", count=42, enabled=true
 	var key string
 	i := 0
 	for i < len(argsStr) {
 		// Skip whitespace
 		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
 			i++
 		}
 		if i >= len(argsStr) {
 			break
 		}
 		// Parse key
 		keyStart := i
 		for i < len(argsStr) && argsStr[i] != '=' && argsStr[i] != ',' {
 			i++
 		}
 		if i >= len(argsStr) || argsStr[i] != '=' {
 			return errors.New("invalid argument: expected '='")
 		}
 		key = strings.TrimSpace(argsStr[keyStart:i])
 		i++ // skip '='
 		// Skip whitespace after =
 		for i < len(argsStr) && (argsStr[i] == ' ' || argsStr[i] == '\t') {
 			i++
 		}
 		// Parse value
 		var value string
 		if i < len(argsStr) && (argsStr[i] == '\'' || argsStr[i] == '"') {
 			// Quoted string
 			quote := argsStr[i]
 			i++
 			valueStart := i
 			for i < len(argsStr) && argsStr[i] != quote {
 				if argsStr[i] == '\\' && i+1 < len(argsStr) {
 					i += 2 // skip escaped char
 				} else {
 					i++
 				}
 			}
 			value = argsStr[valueStart:i]
 			if i < len(argsStr) {
 				i++ // skip closing quote
 			}
 			args.Set(key, value)
 		} else {
 			// Unquoted value (number, bool, etc)
 			valueStart := i
 			for i < len(argsStr) && argsStr[i] != ',' {
 				i++
 			}
 			value = strings.TrimSpace(argsStr[valueStart:i])
 			// Try to parse as number or bool
 			if v, err := strconv.ParseInt(value, 10, 64); err == nil {
 				args.Set(key, v)
 			} else if v, err := strconv.ParseFloat(value, 64); err == nil {
 				args.Set(key, v)
 			} else if value == "true" {
 				args.Set(key, true)
 			} else if value == "false" {
 				args.Set(key, false)
 			} else {
 				args.Set(key, value)
 			}
 		}
 		// Skip comma and whitespace
 		for i < len(argsStr) && (argsStr[i] == ',' || argsStr[i] == ' ' || argsStr[i] == '\t' || argsStr[i] == '\n') {
 			i++
 		}
 	}
 	return nil
 }
--- a/model/parsers/lfm2_test.go
+++ b/model/parsers/lfm2_test.go
--- a/model/parsers/nemotron3nano.go
+++ b/model/parsers/nemotron3nano.go
@@ -1,6 +1,7 @@
 package parsers
 import (
 	"regexp"
 	"strings"
 	"unicode"
@@ -13,114 +14,243 @@ const (
 	Nemotron3NanoCollectingThinking Nemotron3NanoParserState = iota
 	Nemotron3NanoSkipWhitespaceAfterThinking
 	Nemotron3NanoCollectingContent
 	Nemotron3NanoCollectingToolCalls
 )
 const (
-	nemotronThinkClose   = "</think>"
+	nemotronThinkClose    = "</think>"
-	nemotronToolCallOpen = "<tool_call>"
+	nemotronToolCallOpen  = "<tool_call>"
 	nemotronToolCallClose = "</tool_call>"
 )
 type Nemotron3NanoParser struct {
-	state      Nemotron3NanoParserState
+	state  Nemotron3NanoParserState
-	buffer     strings.Builder
+	buffer strings.Builder
-	toolParser *Qwen3CoderParser
+	tools  []api.Tool
 }
 func (p *Nemotron3NanoParser) HasToolSupport() bool     { return true }
 func (p *Nemotron3NanoParser) HasThinkingSupport() bool { return true }
 func (p *Nemotron3NanoParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.toolParser = &Qwen3CoderParser{}
+	p.tools = tools
 	p.toolParser.Init(tools, nil, nil)
 	// thinking is enabled if user requests it
 	thinkingEnabled := thinkValue != nil && thinkValue.Bool()
 	prefill := lastMessage != nil && lastMessage.Role == "assistant"
-	if !thinkingEnabled || (prefill && lastMessage.Content != "") {
+	if !thinkingEnabled {
 		p.state = Nemotron3NanoCollectingContent
-	} else {
+		return tools
 		p.state = Nemotron3NanoCollectingThinking
 	}
 	if prefill && lastMessage.Content != "" {
 		p.state = Nemotron3NanoCollectingContent
 		return tools
 	}
 	p.state = Nemotron3NanoCollectingThinking
 	return tools
 }
 type nemotronEvent interface {
 	isNemotronEvent()
 }
 type nemotronEventThinkingContent struct {
 	content string
 }
 type nemotronEventContent struct {
 	content string
 }
 type nemotronEventToolCall struct {
 	toolCall api.ToolCall
 }
 func (nemotronEventThinkingContent) isNemotronEvent() {}
 func (nemotronEventContent) isNemotronEvent()         {}
 func (nemotronEventToolCall) isNemotronEvent()        {}
 func (p *Nemotron3NanoParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	if p.state == Nemotron3NanoCollectingContent {
+	p.buffer.WriteString(s)
-		return p.toolParser.Add(s, done)
+	events := p.parseEvents()
 	var toolCalls []api.ToolCall
 	var contentSb strings.Builder
 	var thinkingSb strings.Builder
 	for _, event := range events {
 		switch event := event.(type) {
 		case nemotronEventToolCall:
 			toolCalls = append(toolCalls, event.toolCall)
 		case nemotronEventThinkingContent:
 			thinkingSb.WriteString(event.content)
 		case nemotronEventContent:
 			contentSb.WriteString(event.content)
 		}
 	}
-	if p.state == Nemotron3NanoSkipWhitespaceAfterThinking {
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
-		s = strings.TrimLeftFunc(s, unicode.IsSpace)
+}
-		if s == "" {
+
-			return "", "", nil, nil
+func (p *Nemotron3NanoParser) parseEvents() []nemotronEvent {
 	var all []nemotronEvent
 	keepLooping := true
 	for keepLooping {
 		var events []nemotronEvent
 		events, keepLooping = p.eat()
 		if len(events) > 0 {
 			all = append(all, events...)
 		}
 	}
 	return all
 }
 // emitWithPartialCheck extracts unambiguous content before a potential partial tag
 func (p *Nemotron3NanoParser) emitWithPartialCheck(bufStr, tag string) (unambiguous, ambiguous string) {
 	if overlapLen := overlap(bufStr, tag); overlapLen > 0 {
 		beforePartialTag := bufStr[:len(bufStr)-overlapLen]
 		trailingLen := trailingWhitespaceLen(beforePartialTag)
 		return bufStr[:len(beforePartialTag)-trailingLen], bufStr[len(beforePartialTag)-trailingLen:]
 	}
 	wsLen := trailingWhitespaceLen(bufStr)
 	return bufStr[:len(bufStr)-wsLen], bufStr[len(bufStr)-wsLen:]
 }
 func (p *Nemotron3NanoParser) eat() ([]nemotronEvent, bool) {
 	bufStr := p.buffer.String()
 	if bufStr == "" {
 		return nil, false
 	}
 	switch p.state {
 	case Nemotron3NanoCollectingThinking:
 		if strings.Contains(bufStr, nemotronThinkClose) {
 			split := strings.SplitN(bufStr, nemotronThinkClose, 2)
 			thinking := strings.TrimRightFunc(split[0], unicode.IsSpace)
 			p.buffer.Reset()
 			remainder := strings.TrimLeftFunc(split[1], unicode.IsSpace)
 			p.buffer.WriteString(remainder)
 			// Transition to whitespace-skipping state if buffer is empty,
 			// otherwise go directly to content collection
 			if remainder == "" {
 				p.state = Nemotron3NanoSkipWhitespaceAfterThinking
 			} else {
 				p.state = Nemotron3NanoCollectingContent
 			}
 			if thinking != "" {
 				return []nemotronEvent{nemotronEventThinkingContent{content: thinking}}, true
 			}
 			return nil, true
 		}
 		unambig, ambig := p.emitWithPartialCheck(bufStr, nemotronThinkClose)
 		p.buffer.Reset()
 		p.buffer.WriteString(ambig)
 		if unambig != "" {
 			return []nemotronEvent{nemotronEventThinkingContent{content: unambig}}, false
 		}
 		return nil, false
 	// We only want to skip whitespace between thinking and content
 	case Nemotron3NanoSkipWhitespaceAfterThinking:
 		bufStr = strings.TrimLeftFunc(bufStr, unicode.IsSpace)
 		p.buffer.Reset()
 		p.buffer.WriteString(bufStr)
 		if bufStr == "" {
 			return nil, false
 		}
 		p.state = Nemotron3NanoCollectingContent
-		return p.toolParser.Add(s, done)
+		return nil, true
 	}
-	// Nemotron3NanoCollectingThinking - buffer and look for end markers
+	case Nemotron3NanoCollectingContent:
-	p.buffer.WriteString(s)
+		if strings.Contains(bufStr, nemotronToolCallOpen) {
-	bufStr := p.buffer.String()
+			split := strings.SplitN(bufStr, nemotronToolCallOpen, 2)
-
+			content := strings.TrimRightFunc(split[0], unicode.IsSpace)
-	// Look for end of thinking: </think> or <tool_call> (model may skip </think>)
+			p.buffer.Reset()
-	thinkIdx := strings.Index(bufStr, nemotronThinkClose)
+			p.buffer.WriteString(split[1])
-	toolIdx := strings.Index(bufStr, nemotronToolCallOpen)
+			p.state = Nemotron3NanoCollectingToolCalls
-
+			if content != "" {
-	var endIdx int = -1
+				return []nemotronEvent{nemotronEventContent{content: content}}, true
-	var remainder string
+			}
-
+			return nil, true
 	if thinkIdx != -1 && (toolIdx == -1 || thinkIdx < toolIdx) {
 		endIdx = thinkIdx
 		remainder = strings.TrimLeftFunc(bufStr[thinkIdx+len(nemotronThinkClose):], unicode.IsSpace)
 	} else if toolIdx != -1 {
 		endIdx = toolIdx
 		remainder = bufStr[toolIdx:] // Include <tool_call> tag
 	}
 	if endIdx != -1 {
 		thinking = strings.TrimRightFunc(bufStr[:endIdx], unicode.IsSpace)
 		p.buffer.Reset()
 		if remainder == "" {
 			p.state = Nemotron3NanoSkipWhitespaceAfterThinking
 		} else {
 			p.state = Nemotron3NanoCollectingContent
 			content, _, calls, err = p.toolParser.Add(remainder, done)
 		}
-		return content, thinking, calls, err
+		unambig, ambig := p.emitWithPartialCheck(bufStr, nemotronToolCallOpen)
 		p.buffer.Reset()
 		p.buffer.WriteString(ambig)
 		if unambig != "" {
 			return []nemotronEvent{nemotronEventContent{content: unambig}}, false
 		}
 		return nil, false
 	case Nemotron3NanoCollectingToolCalls:
 		if strings.Contains(bufStr, nemotronToolCallClose) {
 			split := strings.SplitN(bufStr, nemotronToolCallClose, 2)
 			remaining := strings.TrimLeftFunc(split[1], unicode.IsSpace)
 			p.buffer.Reset()
 			p.buffer.WriteString(remaining)
 			var events []nemotronEvent
 			if tc, err := p.parseToolCall(split[0]); err == nil {
 				events = append(events, nemotronEventToolCall{toolCall: tc})
 			}
 			if !strings.Contains(remaining, nemotronToolCallOpen) {
 				p.state = Nemotron3NanoCollectingContent
 			}
 			return events, true
 		}
 		return nil, false
 	}
-	// No end marker - emit unambiguous thinking
+	return nil, false
 	thinking = p.emitThinking(bufStr)
 	return "", thinking, nil, nil
 }
-// emitThinking returns unambiguous thinking content, keeping potential partial tags in buffer
+var (
-func (p *Nemotron3NanoParser) emitThinking(bufStr string) string {
+	nemotronFunctionRegex  = regexp.MustCompile(`<function=([^>]+)>`)
-	// Check for partial </think> or <tool_call> at end
+	nemotronParameterRegex = regexp.MustCompile(`<parameter=([^>]+)>\n?([\s\S]*?)\n?</parameter>`)
-	thinkOverlap := overlap(bufStr, nemotronThinkClose)
+)
 	toolOverlap := overlap(bufStr, nemotronToolCallOpen)
 	maxOverlap := max(thinkOverlap, toolOverlap)
-	if maxOverlap > 0 {
+func (p *Nemotron3NanoParser) parseToolCall(content string) (api.ToolCall, error) {
-		unambiguous := bufStr[:len(bufStr)-maxOverlap]
+	toolCall := api.ToolCall{}
-		unambiguous = strings.TrimRightFunc(unambiguous, unicode.IsSpace)
+
-		p.buffer.Reset()
+	// Extract function name
-		p.buffer.WriteString(bufStr[len(bufStr)-maxOverlap:])
+	fnMatch := nemotronFunctionRegex.FindStringSubmatch(content)
-		return unambiguous
+	if len(fnMatch) < 2 {
 		return toolCall, nil
 	}
 	toolCall.Function.Name = fnMatch[1]
 	// Extract parameters
 	toolCall.Function.Arguments = api.NewToolCallFunctionArguments()
 	paramMatches := nemotronParameterRegex.FindAllStringSubmatch(content, -1)
 	for _, match := range paramMatches {
 		if len(match) >= 3 {
 			paramName := match[1]
 			paramValue := strings.TrimSpace(match[2])
 			// Try to parse as typed value based on tool definition
 			toolCall.Function.Arguments.Set(paramName, p.parseParamValue(paramName, paramValue))
 		}
 	}
-	// No partial tags - emit all but trailing whitespace
+	return toolCall, nil
-	wsLen := trailingWhitespaceLen(bufStr)
+}
-	if wsLen > 0 {
+
-		unambiguous := bufStr[:len(bufStr)-wsLen]
+func (p *Nemotron3NanoParser) parseParamValue(paramName string, raw string) any {
-		p.buffer.Reset()
+	// Find the matching tool to get parameter type
-		p.buffer.WriteString(bufStr[len(bufStr)-wsLen:])
+	var paramType api.PropertyType
-		return unambiguous
+	for _, tool := range p.tools {
-	}
+		if tool.Function.Parameters.Properties != nil {
-
+			if prop, ok := tool.Function.Parameters.Properties.Get(paramName); ok {
-	// Nothing to hold back
+				paramType = prop.Type
-	p.buffer.Reset()
+				break
-	return bufStr
+			}
 		}
 	}
 	return parseValue(raw, paramType)
 }
--- a/model/parsers/nemotron3nano_test.go
+++ b/model/parsers/nemotron3nano_test.go
@@ -8,8 +8,6 @@ import (
 	"github.com/ollama/ollama/api"
 )
 // TestNemotron3NanoParser tests Nemotron-specific behavior (thinking support).
 // Tool call parsing is tested in qwen3coder_test.go since Nemotron delegates to Qwen3CoderParser.
 func TestNemotron3NanoParser(t *testing.T) {
 	tests := []struct {
 		name             string
@@ -19,6 +17,18 @@ func TestNemotron3NanoParser(t *testing.T) {
 		expectedThinking string
 		expectedCalls    []api.ToolCall
 	}{
 		{
 			name:            "simple content - no thinking",
 			input:           "Hello, how can I help you?",
 			thinkValue:      nil,
 			expectedContent: "Hello, how can I help you?",
 		},
 		{
 			name:            "simple content - thinking disabled",
 			input:           "Hello, how can I help you?",
 			thinkValue:      &api.ThinkValue{Value: false},
 			expectedContent: "Hello, how can I help you?",
 		},
 		{
 			name:             "thinking then content",
 			input:            "Let me think about this...</think>\nHere is my answer.",
@@ -33,6 +43,69 @@ func TestNemotron3NanoParser(t *testing.T) {
 			expectedThinking: "Step 1: Analyze\nStep 2: Process\nStep 3: Conclude",
 			expectedContent:  "The answer is 42.",
 		},
 		{
 			name:       "simple tool call",
 			input:      "<tool_call>\n<function=get_weather>\n<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>",
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "Paris"}),
 					},
 				},
 			},
 		},
 		{
 			name:            "content then tool call",
 			input:           "Let me check the weather.\n<tool_call>\n<function=get_weather>\n<parameter=city>\nNYC\n</parameter>\n</function>\n</tool_call>",
 			thinkValue:      nil,
 			expectedContent: "Let me check the weather.",
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "NYC"}),
 					},
 				},
 			},
 		},
 		{
 			name:       "tool call with multiple parameters",
 			input:      "<tool_call>\n<function=book_flight>\n<parameter=from>\nSFO\n</parameter>\n<parameter=to>\nNYC\n</parameter>\n</function>\n</tool_call>",
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "book_flight",
 						Arguments: testArgs(map[string]any{
 							"from": "SFO",
 							"to":   "NYC",
 						}),
 					},
 				},
 			},
 		},
 		{
 			name: "multiple tool calls",
 			input: "<tool_call>\n<function=get_weather>\n<parameter=city>\nSan Francisco\n</parameter>\n</function>\n</tool_call>\n" +
 				"<tool_call>\n<function=get_weather>\n<parameter=city>\nNew York\n</parameter>\n</function>\n</tool_call>",
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "San Francisco"}),
 					},
 				},
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "New York"}),
 					},
 				},
 			},
 		},
 		{
 			name:             "thinking then tool call",
 			input:            "I should check the weather...</think>\n<tool_call>\n<function=get_weather>\n<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>",
@@ -62,6 +135,19 @@ func TestNemotron3NanoParser(t *testing.T) {
 				},
 			},
 		},
 		{
 			name:       "tool call with multiline parameter value",
 			input:      "<tool_call>\n<function=create_note>\n<parameter=content>\nLine 1\nLine 2\nLine 3\n</parameter>\n</function>\n</tool_call>",
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "create_note",
 						Arguments: testArgs(map[string]any{"content": "Line 1\nLine 2\nLine 3"}),
 					},
 				},
 			},
 		},
 		{
 			name:             "empty thinking block - immediate close",
 			input:            "</think>\nHere is my answer.",
@@ -75,6 +161,18 @@ func TestNemotron3NanoParser(t *testing.T) {
 			thinkValue:      &api.ThinkValue{Value: false},
 			expectedContent: "</think>\nSome content after spurious tag.",
 		},
 		{
 			name:          "tool call with no function name - returns empty tool call",
 			input:         "<tool_call>\n<function=>\n</function>\n</tool_call>",
 			thinkValue:    nil,
 			expectedCalls: []api.ToolCall{{Function: api.ToolCallFunction{Name: "", Arguments: api.NewToolCallFunctionArguments()}}},
 		},
 		{
 			name:            "content with newlines preserved",
 			input:           "Line 1\n\nLine 2\n\n\nLine 3",
 			thinkValue:      nil,
 			expectedContent: "Line 1\n\nLine 2\n\n\nLine 3",
 		},
 		{
 			name:             "thinking with only whitespace after close tag",
 			input:            "My thoughts...</think>   \n\t\n   Content here.",
@@ -82,6 +180,25 @@ func TestNemotron3NanoParser(t *testing.T) {
 			expectedThinking: "My thoughts...",
 			expectedContent:  "Content here.",
 		},
 		{
 			name:            "unicode content",
 			input:           "Hello 世界! 🌍 Ñoño",
 			thinkValue:      nil,
 			expectedContent: "Hello 世界! 🌍 Ñoño",
 		},
 		{
 			name:       "tool call with numeric parameter",
 			input:      "<tool_call>\n<function=set_temp>\n<parameter=value>\n42\n</parameter>\n</function>\n</tool_call>",
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "set_temp",
 						Arguments: testArgs(map[string]any{"value": "42"}),
 					},
 				},
 			},
 		},
 	}
 	for _, tt := range tests {
@@ -116,8 +233,6 @@ func TestNemotron3NanoParser(t *testing.T) {
 	}
 }
 // TestNemotron3NanoParser_Streaming tests streaming behavior for thinking support.
 // Tool call streaming is tested in qwen3coder_test.go.
 func TestNemotron3NanoParser_Streaming(t *testing.T) {
 	tests := []struct {
 		name             string
@@ -127,6 +242,18 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 		expectedThinking string
 		expectedCalls    []api.ToolCall
 	}{
 		{
 			name:            "streaming content character by character",
 			chunks:          []string{"H", "e", "l", "l", "o", ",", " ", "w", "o", "r", "l", "d", "!"},
 			thinkValue:      nil,
 			expectedContent: "Hello, world!",
 		},
 		{
 			name:            "streaming content small tokens",
 			chunks:          []string{"Hel", "lo", ", ", "how ", "can", " I", " help", " you", " today", "?"},
 			thinkValue:      nil,
 			expectedContent: "Hello, how can I help you today?",
 		},
 		{
 			name:             "streaming thinking then content - granular",
 			chunks:           []string{"Let", " me", " th", "ink", " about", " this", "...", "<", "/", "think", ">", "\n", "Here", " is", " my", " answer", "."},
@@ -141,6 +268,45 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 			expectedThinking: "Step 1: Analyze\nStep 2: Process",
 			expectedContent:  "The answer.",
 		},
 		{
 			name:       "streaming tool call - highly granular",
 			chunks:     []string{"<", "tool", "_", "call", ">", "\n", "<", "func", "tion", "=", "get", "_", "weather", ">", "\n", "<", "param", "eter", "=", "city", ">", "\n", "Par", "is", "\n", "</", "param", "eter", ">", "\n", "</", "func", "tion", ">", "\n", "</", "tool", "_", "call", ">"},
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "Paris"}),
 					},
 				},
 			},
 		},
 		{
 			name:            "streaming content then tool call - granular",
 			chunks:          []string{"Let", " me", " check", " the", " weather", ".", "\n<", "tool_call", ">", "\n", "<function=", "get_weather", ">", "\n", "<parameter=", "city", ">", "\n", "NYC", "\n", "</parameter>", "\n", "</function>", "\n", "</tool_call>"},
 			thinkValue:      nil,
 			expectedContent: "Let me check the weather.",
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "NYC"}),
 					},
 				},
 			},
 		},
 		{
 			name:   "tool call tag split character by character",
 			chunks: []string{"<", "t", "o", "o", "l", "_", "c", "a", "l", "l", ">", "\n", "<", "f", "u", "n", "c", "t", "i", "o", "n", "=", "t", "e", "s", "t", ">", "\n", "<", "/", "f", "u", "n", "c", "t", "i", "o", "n", ">", "\n", "<", "/", "t", "o", "o", "l", "_", "c", "a", "l", "l", ">"},
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "test",
 						Arguments: api.NewToolCallFunctionArguments(),
 					},
 				},
 			},
 		},
 		{
 			name:             "thinking close tag split character by character",
 			chunks:           []string{"I", "'", "m", " ", "t", "h", "i", "n", "k", "i", "n", "g", ".", ".", ".", "<", "/", "t", "h", "i", "n", "k", ">", "\n", "D", "o", "n", "e", "!"},
@@ -155,6 +321,22 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 			expectedThinking: "Thinking...",
 			expectedContent:  "Content here.",
 		},
 		{
 			name:       "tool call with multiple parameters - streaming",
 			chunks:     []string{"<tool_", "call>\n", "<function", "=book_", "flight>", "\n<para", "meter=", "from>\n", "SFO\n", "</param", "eter>", "\n<param", "eter=to", ">\nNYC", "\n</para", "meter>", "\n</func", "tion>\n", "</tool_", "call>"},
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "book_flight",
 						Arguments: testArgs(map[string]any{
 							"from": "SFO",
 							"to":   "NYC",
 						}),
 					},
 				},
 			},
 		},
 		{
 			name:             "thinking then content then tool call - streaming",
 			chunks:           []string{"Ana", "lyzing", " your", " request", "...", "</", "think", ">\n", "I'll", " check", " that", " for", " you", ".", "\n", "<tool", "_call", ">\n", "<function", "=search", ">\n", "<parameter", "=query", ">\n", "test", " query", "\n</", "parameter", ">\n", "</function", ">\n", "</tool", "_call", ">"},
@@ -170,6 +352,45 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 				},
 			},
 		},
 		{
 			name: "multiple tool calls - streaming",
 			chunks: []string{
 				"<tool_call>", "\n", "<function=", "get_weather>", "\n",
 				"<parameter=", "city>\n", "San Fran", "cisco\n", "</parameter>", "\n",
 				"</function>", "\n", "</tool_call>", "\n",
 				"<tool_", "call>\n", "<function", "=get_weather", ">\n",
 				"<param", "eter=city", ">\nNew", " York\n", "</parameter>\n",
 				"</function>\n", "</tool_call>",
 			},
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "San Francisco"}),
 					},
 				},
 				{
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
 						Arguments: testArgs(map[string]any{"city": "New York"}),
 					},
 				},
 			},
 		},
 		{
 			name:       "tool call with multiline parameter - streaming",
 			chunks:     []string{"<tool_call>\n", "<function=", "create_note>\n", "<parameter=", "content>\n", "Line 1", "\nLine", " 2\n", "Line 3", "\n</parameter>\n", "</function>\n", "</tool_call>"},
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "create_note",
 						Arguments: testArgs(map[string]any{"content": "Line 1\nLine 2\nLine 3"}),
 					},
 				},
 			},
 		},
 		{
 			name:             "empty thinking block",
 			chunks:           []string{"</think>", "\n", "Just content."},
@@ -177,6 +398,12 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 			expectedThinking: "",
 			expectedContent:  "Just content.",
 		},
 		{
 			name:            "empty input chunks interspersed",
 			chunks:          []string{"Hello", "", " ", "", "world", "", "!"},
 			thinkValue:      nil,
 			expectedContent: "Hello world!",
 		},
 		{
 			name:             "tool call immediately after think close - no content",
 			chunks:           []string{"Analyzing...", "</think>", "\n", "<tool_call>", "\n<function=test>\n</function>\n", "</tool_call>"},
@@ -191,6 +418,25 @@ func TestNemotron3NanoParser_Streaming(t *testing.T) {
 				},
 			},
 		},
 		{
 			name:       "tool call with empty parameter value",
 			chunks:     []string{"<tool_call>\n<function=test>\n<parameter=name>\n", "\n</parameter>\n</function>\n</tool_call>"},
 			thinkValue: nil,
 			expectedCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name:      "test",
 						Arguments: testArgs(map[string]any{"name": ""}),
 					},
 				},
 			},
 		},
 		{
 			name:            "partial tool call tag at end - buffered",
 			chunks:          []string{"Here's some content", "<tool"},
 			thinkValue:      nil,
 			expectedContent: "Here's some content",
 		},
 	}
 	for _, tt := range tests {
@@ -326,65 +572,3 @@ func TestNemotron3NanoParser_WithTools(t *testing.T) {
 		t.Errorf("calls mismatch (-got +want):\n%s", diff)
 	}
 }
 // TestNemotron3NanoParser_ToolCallWithoutThinkClose tests the case where thinking is enabled
 // but the model outputs content + tool call WITHOUT the </think> tag.
 // The parser should still parse the tool call (content before is treated as thinking).
 func TestNemotron3NanoParser_ToolCallWithoutThinkClose(t *testing.T) {
 	chunks := []string{
 		"Let", " me", " analyze", " this", ".", "\n",
 		"<tool_call>", "\n",
 		"<function=get_weather>", "\n",
 		"<parameter=city>", "Paris", "</parameter>", "\n",
 		"</function>", "\n",
 		"</tool_call>",
 	}
 	p := &Nemotron3NanoParser{}
 	p.Init(nil, nil, &api.ThinkValue{Value: true}) // thinking ENABLED but model doesn't output </think>
 	var allContent string
 	var allThinking string
 	var allCalls []api.ToolCall
 	for _, chunk := range chunks {
 		content, thinking, calls, err := p.Add(chunk, false)
 		if err != nil {
 			t.Fatalf("unexpected error: %v", err)
 		}
 		allContent += content
 		allThinking += thinking
 		allCalls = append(allCalls, calls...)
 	}
 	// Drain
 	content, thinking, calls, err := p.Add("", true)
 	if err != nil {
 		t.Fatalf("unexpected error on done: %v", err)
 	}
 	allContent += content
 	allThinking += thinking
 	allCalls = append(allCalls, calls...)
 	// The parser was in thinking mode, so text before <tool_call> is emitted as thinking.
 	expectedThinking := "Let me analyze this."
 	expectedCalls := []api.ToolCall{
 		{
 			Function: api.ToolCallFunction{
 				Name:      "get_weather",
 				Arguments: testArgs(map[string]any{"city": "Paris"}),
 			},
 		},
 	}
 	if allContent != "" {
 		t.Errorf("expected no content (text was streamed as thinking), got: %q", allContent)
 	}
 	if diff := cmp.Diff(allThinking, expectedThinking); diff != "" {
 		t.Errorf("thinking mismatch (-got +want):\n%s", diff)
 	}
 	if diff := cmp.Diff(allCalls, expectedCalls, argsComparer); diff != "" {
 		t.Errorf("calls mismatch (-got +want):\n%s", diff)
 	}
 }
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -68,12 +68,6 @@ func ParserForName(name string) Parser {
 		return &Nemotron3NanoParser{}
 	case "functiongemma":
 		return &FunctionGemmaParser{}
 	case "glm-4.7":
 		return &GLM47Parser{}
 	case "lfm2":
 		return &LFM2Parser{hasThinkingSupport: false}
 	case "lfm2-thinking":
 		return &LFM2Parser{hasThinkingSupport: true}
 	default:
 		return nil
 	}
--- a/model/parsers/qwen3coder_test.go
+++ b/model/parsers/qwen3coder_test.go
@@ -91,37 +91,6 @@ func TestQwenParserStreaming(t *testing.T) {
 				},
 			},
 		},
 		{
 			desc: "tool call tags split character by character",
 			steps: []step{
 				{input: "<", wantEvents: []qwenEvent{}},
 				{input: "t", wantEvents: []qwenEvent{}},
 				{input: "o", wantEvents: []qwenEvent{}},
 				{input: "o", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: "_", wantEvents: []qwenEvent{}},
 				{input: "c", wantEvents: []qwenEvent{}},
 				{input: "a", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: ">", wantEvents: []qwenEvent{}},
 				{input: "a", wantEvents: []qwenEvent{}},
 				{input: "b", wantEvents: []qwenEvent{}},
 				{input: "c", wantEvents: []qwenEvent{}},
 				{input: "<", wantEvents: []qwenEvent{}},
 				{input: "/", wantEvents: []qwenEvent{}},
 				{input: "t", wantEvents: []qwenEvent{}},
 				{input: "o", wantEvents: []qwenEvent{}},
 				{input: "o", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: "_", wantEvents: []qwenEvent{}},
 				{input: "c", wantEvents: []qwenEvent{}},
 				{input: "a", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: "l", wantEvents: []qwenEvent{}},
 				{input: ">", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "abc"}}},
 			},
 		},
 		{
 			desc: "trailing whitespace between content and tool call",
 			steps: []step{
--- a/model/parsers/testhelpers_test.go
+++ b/model/parsers/testhelpers_test.go
@@ -96,11 +96,3 @@ func testArgs(m map[string]any) api.ToolCallFunctionArguments {
 	}
 	return args
 }
 func args(s string) api.ToolCallFunctionArguments {
 	var result api.ToolCallFunctionArguments
 	if err := json.Unmarshal([]byte(s), &result); err != nil {
 		panic("invalid JSON in args(): " + err.Error())
 	}
 	return result
 }
--- a/model/renderers/glm46.go
+++ b/model/renderers/glm46.go
@@ -1,110 +0,0 @@
 package renderers
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 	"github.com/ollama/ollama/api"
 )
 type GLM46Renderer struct{}
 func (r *GLM46Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
 	var sb strings.Builder
 	sb.WriteString("[gMASK]<sop>")
 	var lastUserIndex int
 	for i, message := range messages {
 		if message.Role == "user" {
 			lastUserIndex = i
 		}
 	}
 	if len(tools) > 0 {
 		sb.WriteString("<|system|>\n")
 		sb.WriteString("# Tools\n\n")
 		sb.WriteString("You may call one or more functions to assist with the user query.\n\n")
 		sb.WriteString("You are provided with function signatures within <tools></tools> XML tags:\n")
 		sb.WriteString("<tools>\n")
 		for _, tool := range tools {
 			d, _ := json.Marshal(tool)
 			sb.WriteString(string(d) + "\n")
 		}
 		sb.WriteString("</tools>\n\n")
 		sb.WriteString("For each function call, output the function name and arguments within the following XML format:\n")
 		sb.WriteString("<tool_call>{function-name}\n")
 		sb.WriteString("<arg_key>{arg-key-1}</arg_key>\n")
 		sb.WriteString("<arg_value>{arg-value-1}</arg_value>\n")
 		sb.WriteString("<arg_key>{arg-key-2}</arg_key>\n")
 		sb.WriteString("<arg_value>{arg-value-2}</arg_value>\n")
 		sb.WriteString("...\n")
 		sb.WriteString("</tool_call>")
 	}
 	for i, message := range messages {
 		switch message.Role {
 		case "user":
 			sb.WriteString("<|user|>\n")
 			sb.WriteString(message.Content)
 			if thinkValue != nil && !thinkValue.Bool() && !strings.HasSuffix(message.Content, "/nothink") {
 				sb.WriteString("/nothink")
 			}
 		case "assistant":
 			sb.WriteString("<|assistant|>")
 			if i > lastUserIndex {
 				if message.Thinking != "" {
 					sb.WriteString("\n<think>" + message.Thinking + "</think>")
 				} else {
 					sb.WriteString("\n<think></think>")
 				}
 			}
 			if message.Content != "" {
 				sb.WriteString("\n" + message.Content)
 			}
 			if len(message.ToolCalls) > 0 {
 				for _, toolCall := range message.ToolCalls {
 					sb.WriteString("\n<tool_call>" + toolCall.Function.Name + "\n")
 					for key, value := range toolCall.Function.Arguments.All() {
 						sb.WriteString("<arg_key>" + key + "</arg_key>\n")
 						var valueStr string
 						if str, ok := value.(string); ok {
 							valueStr = str
 						} else {
 							jsonBytes, err := json.Marshal(value)
 							if err != nil {
 								valueStr = fmt.Sprintf("%v", value)
 							} else {
 								valueStr = string(jsonBytes)
 							}
 						}
 						sb.WriteString("<arg_value>" + valueStr + "</arg_value>\n")
 					}
 					sb.WriteString("</tool_call>")
 				}
 			}
 		case "tool":
 			if i == 0 || messages[i-1].Role != "tool" {
 				sb.WriteString("<|observation|>")
 			}
 			sb.WriteString("\n<tool_response>\n")
 			sb.WriteString(message.Content)
 			sb.WriteString("\n</tool_response>")
 		case "system":
 			sb.WriteString("<|system|>\n")
 			sb.WriteString(message.Content)
 		}
 	}
 	// Add generation prompt
 	sb.WriteString("<|assistant|>")
 	if thinkValue != nil && !thinkValue.Bool() {
 		sb.WriteString("\n<think></think>\n")
 	}
 	return sb.String(), nil
 }
--- a/model/renderers/glm46_test.go
+++ b/model/renderers/glm46_test.go
@@ -1,223 +0,0 @@
 package renderers
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/api"
 )
 func TestGLM46Renderer(t *testing.T) {
 	tests := []struct {
 		name       string
 		messages   []api.Message
 		tools      []api.Tool
 		thinkValue *api.ThinkValue
 		expected   string
 		skip       string
 	}{
 		{
 			name: "basic",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello, how are you?"},
 			},
 			expected: `[gMASK]<sop><|user|>
 Hello, how are you?<|assistant|>`,
 		},
 		{
 			name: "basic with system message",
 			messages: []api.Message{
 				{Role: "system", Content: "You are a helpful assistant."},
 				{Role: "user", Content: "Hello, how are you?"},
 			},
 			expected: `[gMASK]<sop><|system|>
 You are a helpful assistant.<|user|>
 Hello, how are you?<|assistant|>`,
 		},
 		{
 			name: "basic with user assistant user",
 			messages: []api.Message{
 				{Role: "user", Content: "What is the capital of France?"},
 				{Role: "assistant", Thinking: "Let me analyze the request...", Content: "The capital of France is Paris."},
 				{Role: "user", Content: "Fantastic!"},
 			},
 			expected: `[gMASK]<sop><|user|>
 What is the capital of France?<|assistant|>
 The capital of France is Paris.<|user|>
 Fantastic!<|assistant|>`,
 		},
 		{
 			skip: "tool call ordering not guaranteed yet",
 			name: "tools",
 			messages: []api.Message{
 				{Role: "system", Content: "You are a helpful assistant with access to tools."},
 				{Role: "user", Content: "What is the weather like in Tokyo?"},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get the current weather in a given location",
 						Parameters: api.ToolFunctionParameters{
 							Type:       "object",
 							Required:   []string{"location"},
 							Properties: propsMap(`{"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}`),
 						},
 					},
 				},
 			},
 			expected: `[gMASK]<sop><|system|>
 # Tools
 You may call one or more functions to assist with the user query.
 You are provided with function signatures within <tools></tools> XML tags:
 <tools>
 {"type":"function","function":{"name":"get_weather","description":"Get the current weather in a given location","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"","enum":["celsius","fahrenheit"]}}}}}
 </tools>
 For each function call, output the function name and arguments within the following XML format:
 <tool_call>{function-name}
 <arg_key>{arg-key-1}</arg_key>
 <arg_value>{arg-value-1}</arg_value>
 <arg_key>{arg-key-2}</arg_key>
 <arg_value>{arg-value-2}</arg_value>
 ...
 </tool_call><|system|>
 You are a helpful assistant with access to tools.<|user|>
 What is the weather like in Tokyo?<|assistant|>`,
 		},
 		{
 			skip: "tool call ordering not guaranteed yet",
 			name: "tool calls",
 			messages: []api.Message{
 				{Role: "system", Content: "You are a helpful assistant with access to tools."},
 				{Role: "user", Content: "What is the weather like in Tokyo?"},
 				{
 					Role: "assistant",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Tokyo, Japan", "unit": "celsius"}`),
 							},
 						},
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Japan", "unit": "fahrenheit"}`),
 							},
 						},
 					},
 				},
 				{
 					Role:     "tool",
 					Content:  "{\"temperature\": 22, \"weather\": \"partly cloudy\", \"humidity\": 65}",
 					ToolName: "get_weather",
 				},
 				{
 					Role:     "tool",
 					Content:  "{\"temperature\": 68, \"weather\": \"sunny\", \"humidity\": 75}",
 					ToolName: "get_weather",
 				},
 				{
 					Role:    "assistant",
 					Content: "The weather in Tokyo is currently partly cloudy with a temperature of 22°C and 65% humidity. It's a pleasant day with moderate temperatures.",
 				},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get the current weather in a given location",
 						Parameters: api.ToolFunctionParameters{
 							Type:       "object",
 							Required:   []string{"location"},
 							Properties: propsMap(`{"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}}`),
 						},
 					},
 				},
 			},
 			expected: `[gMASK]<sop><|system|>
 # Tools
 You may call one or more functions to assist with the user query.
 You are provided with function signatures within <tools></tools> XML tags:
 <tools>
 {"type":"function","function":{"name":"get_weather","description":"Get the current weather in a given location","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"The city and state, e.g. San Francisco, CA"},"unit":{"type":"string","description":"","enum":["celsius","fahrenheit"]}}}}}
 </tools>
 For each function call, output the function name and arguments within the following XML format:
 <tool_call>{function-name}
 <arg_key>{arg-key-1}</arg_key>
 <arg_value>{arg-value-1}</arg_value>
 <arg_key>{arg-key-2}</arg_key>
 <arg_value>{arg-value-2}</arg_value>
 ...
 </tool_call><|system|>
 You are a helpful assistant with access to tools.<|user|>
 What is the weather like in Tokyo?<|assistant|>
 <think></think>
 <tool_call>get_weather
 <arg_key>location</arg_key>
 <arg_value>Tokyo, Japan</arg_value>
 <arg_key>unit</arg_key>
 <arg_value>celsius</arg_value>
 </tool_call>
 <tool_call>get_weather
 <arg_key>location</arg_key>
 <arg_value>Japan</arg_value>
 <arg_key>unit</arg_key>
 <arg_value>fahrenheit</arg_value>
 </tool_call><|observation|>
 <tool_response>
 {"temperature": 22, "weather": "partly cloudy", "humidity": 65}
 </tool_response>
 <tool_response>
 {"temperature": 68, "weather": "sunny", "humidity": 75}
 </tool_response><|assistant|>
 <think></think>
 The weather in Tokyo is currently partly cloudy with a temperature of 22°C and 65% humidity. It's a pleasant day with moderate temperatures.<|assistant|>`,
 		},
 		{
 			name: "think true",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello, how are you?"},
 			},
 			thinkValue: &api.ThinkValue{Value: true},
 			expected: `[gMASK]<sop><|user|>
 Hello, how are you?<|assistant|>`,
 		},
 		{
 			name: "think false",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello, how are you?"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected: `[gMASK]<sop><|user|>
 Hello, how are you?/nothink<|assistant|>
 <think></think>
 `,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if tt.skip != "" {
 				t.Skip(tt.skip)
 			}
 			renderer := &GLM46Renderer{}
 			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
 			if err != nil {
 				t.Fatal(err)
 			}
 			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
 				t.Errorf("mismatch (-got +want):\n%s", diff)
 				t.Logf("Got:\n%s", rendered)
 				t.Logf("Expected:\n%s", tt.expected)
 			}
 		})
 	}
 }
--- a/model/renderers/glm47.go
+++ b/model/renderers/glm47.go
@@ -1,170 +0,0 @@
 package renderers
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 	"github.com/ollama/ollama/api"
 )
 // GLM47Renderer renders messages for GLM-4.7 models.
 //
 // GLM-4.7 Thinking Modes (ref: https://docs.z.ai/guides/capabilities/thinking-mode):
 //
 //  1. INTERLEAVED THINKING
 //     The model thinks between tool calls and after receiving tool results.
 //     This enables complex step-by-step reasoning: interpreting each tool output
 //     before deciding what to do next. Thinking blocks are preserved and returned
 //     with tool results to maintain reasoning continuity.
 //
 //  2. PRESERVED THINKING
 //     The model retains reasoning content from previous assistant turns in context.
 //     This preserves reasoning continuity across multi-turn conversations. The
 //     upstream API has a "clear_thinking" parameter to control this:
 //     - clear_thinking=true:  clears reasoning from previous turns (outputs </think>)
 //     - clear_thinking=false: preserves <think>...</think> blocks from previous turns
 //
 //  3. TURN-LEVEL THINKING
 //     Controls whether the model should reason on each turn. The upstream API
 //     uses "enable_thinking" parameter:
 //     - enable_thinking=true:  outputs <think> to start reasoning
 //     - enable_thinking=false: outputs </think> to skip reasoning
 //
 // OLLAMA DEFAULTS:
 //   - Thinking is ENABLED by default (thinkValue=nil or true outputs <think>)
 //   - Thinking is PRESERVED by default (reasoning content from previous turns is always
 //     included in <think>...</think> blocks, equivalent to clear_thinking=false)
 //   - Users can disable thinking per-turn via thinkValue=false
 type GLM47Renderer struct{}
 func (r *GLM47Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
 	var sb strings.Builder
 	sb.WriteString("[gMASK]<sop>")
 	if len(tools) > 0 {
 		sb.WriteString("<|system|>\n")
 		sb.WriteString("# Tools\n\n")
 		sb.WriteString("You may call one or more functions to assist with the user query.\n\n")
 		sb.WriteString("You are provided with function signatures within <tools></tools> XML tags:\n")
 		sb.WriteString("<tools>\n")
 		for _, tool := range tools {
 			d, _ := json.Marshal(tool)
 			sb.WriteString(formatGLM47ToolJSON(d))
 			sb.WriteString("\n")
 		}
 		sb.WriteString("</tools>\n\n")
 		sb.WriteString("For each function call, output the function name and arguments within the following XML format:\n")
 		sb.WriteString("<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>")
 	}
 	think := true
 	if thinkValue != nil && !thinkValue.Bool() {
 		think = false
 	}
 	for i, message := range messages {
 		switch message.Role {
 		case "user":
 			sb.WriteString("<|user|>")
 			sb.WriteString(message.Content)
 		case "assistant":
 			sb.WriteString("<|assistant|>")
 			if message.Thinking != "" {
 				sb.WriteString("<think>" + message.Thinking + "</think>")
 			} else {
 				sb.WriteString("</think>")
 			}
 			if message.Content != "" {
 				sb.WriteString(message.Content)
 			}
 			if len(message.ToolCalls) > 0 {
 				for _, toolCall := range message.ToolCalls {
 					sb.WriteString("<tool_call>" + toolCall.Function.Name)
 					sb.WriteString(renderGLM47ToolArguments(toolCall.Function.Arguments))
 					sb.WriteString("</tool_call>")
 				}
 			}
 		case "tool":
 			if i == 0 || messages[i-1].Role != "tool" {
 				sb.WriteString("<|observation|>")
 			}
 			sb.WriteString("<tool_response>")
 			sb.WriteString(message.Content)
 			sb.WriteString("</tool_response>")
 		case "system":
 			sb.WriteString("<|system|>")
 			sb.WriteString(message.Content)
 		}
 	}
 	sb.WriteString("<|assistant|>")
 	if think {
 		sb.WriteString("<think>")
 	} else {
 		sb.WriteString("</think>")
 	}
 	return sb.String(), nil
 }
 func renderGLM47ToolArguments(args api.ToolCallFunctionArguments) string {
 	var sb strings.Builder
 	for key, value := range args.All() {
 		sb.WriteString("<arg_key>" + key + "</arg_key>")
 		var valueStr string
 		if str, ok := value.(string); ok {
 			valueStr = str
 		} else {
 			jsonBytes, err := json.Marshal(value)
 			if err != nil {
 				valueStr = fmt.Sprintf("%v", value)
 			} else {
 				valueStr = string(jsonBytes)
 			}
 		}
 		sb.WriteString("<arg_value>" + valueStr + "</arg_value>")
 	}
 	return sb.String()
 }
 func formatGLM47ToolJSON(raw []byte) string {
 	var sb strings.Builder
 	sb.Grow(len(raw) + len(raw)/10)
 	inString := false
 	escaped := false
 	for i := range raw {
 		ch := raw[i]
 		sb.WriteByte(ch)
 		if inString {
 			if escaped {
 				escaped = false
 				continue
 			}
 			if ch == '\\' {
 				escaped = true
 				continue
 			}
 			if ch == '"' {
 				inString = false
 			}
 			continue
 		}
 		if ch == '"' {
 			inString = true
 			continue
 		}
 		if ch == ':' || ch == ',' {
 			sb.WriteByte(' ')
 		}
 	}
 	return sb.String()
 }
--- a/model/renderers/glm47_test.go
+++ b/model/renderers/glm47_test.go
@@ -1,191 +0,0 @@
 package renderers
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/api"
 )
 func TestGLM47Renderer(t *testing.T) {
 	tests := []struct {
 		name       string
 		messages   []api.Message
 		tools      []api.Tool
 		thinkValue *api.ThinkValue
 		expected   string
 	}{
 		{
 			name: "basic user message",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello"},
 			},
 			expected: "[gMASK]<sop><|user|>Hello<|assistant|><think>",
 		},
 		{
 			name: "thinking disabled",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "[gMASK]<sop><|user|>Hello<|assistant|></think>",
 		},
 		{
 			name: "system and user",
 			messages: []api.Message{
 				{Role: "system", Content: "You are helpful."},
 				{Role: "user", Content: "Hello"},
 			},
 			expected: "[gMASK]<sop><|system|>You are helpful.<|user|>Hello<|assistant|><think>",
 		},
 		{
 			name: "multi-turn conversation",
 			messages: []api.Message{
 				{Role: "user", Content: "Hi"},
 				{Role: "assistant", Content: "Hello there"},
 				{Role: "user", Content: "How are you?"},
 			},
 			expected: "[gMASK]<sop><|user|>Hi<|assistant|></think>Hello there<|user|>How are you?<|assistant|><think>",
 		},
 		{
 			name: "assistant with reasoning_content",
 			messages: []api.Message{
 				{Role: "user", Content: "Answer with reasoning."},
 				{Role: "assistant", Thinking: "Plan.", Content: "Done."},
 			},
 			expected: "[gMASK]<sop><|user|>Answer with reasoning.<|assistant|><think>Plan.</think>Done.<|assistant|><think>",
 		},
 		{
 			name: "tool call with empty content",
 			messages: []api.Message{
 				{Role: "user", Content: "Weather?"},
 				{
 					Role: "assistant",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Tokyo", "unit": "celsius"}`),
 							},
 						},
 					},
 				},
 				{Role: "tool", Content: `{"temperature":22}`},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get weather",
 						Parameters: api.ToolFunctionParameters{
 							Type:       "object",
 							Required:   []string{"location"},
 							Properties: propsMap(`{"location": {"type": "string"}}`),
 						},
 					},
 				},
 			},
 			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Weather?<|assistant|></think><tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value><arg_key>unit</arg_key><arg_value>celsius</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><|assistant|><think>",
 		},
 		{
 			name: "tool call with content",
 			messages: []api.Message{
 				{Role: "user", Content: "Weather?"},
 				{
 					Role:    "assistant",
 					Content: "Let me check",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Tokyo"}`),
 							},
 						},
 					},
 				},
 				{Role: "tool", Content: `{"temperature":22}`},
 				{Role: "assistant", Content: "It is 22C."},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get weather",
 						Parameters: api.ToolFunctionParameters{
 							Type:       "object",
 							Required:   []string{"location"},
 							Properties: propsMap(`{"location": {"type": "string"}}`),
 						},
 					},
 				},
 			},
 			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Weather?<|assistant|></think>Let me check<tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><|assistant|></think>It is 22C.<|assistant|><think>",
 		},
 		{
 			name: "multiple tool calls and responses",
 			messages: []api.Message{
 				{Role: "user", Content: "Compare weather"},
 				{
 					Role: "assistant",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Tokyo"}`),
 							},
 						},
 						{
 							Function: api.ToolCallFunction{
 								Name:      "get_weather",
 								Arguments: args(`{"location": "Paris"}`),
 							},
 						},
 					},
 				},
 				{Role: "tool", Content: `{"temperature":22}`},
 				{Role: "tool", Content: `{"temperature":18}`},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get weather",
 						Parameters: api.ToolFunctionParameters{
 							Type:       "object",
 							Required:   []string{"location"},
 							Properties: propsMap(`{"location": {"type": "string"}}`),
 						},
 					},
 				},
 			},
 			expected: "[gMASK]<sop><|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"get_weather\", \"description\": \"Get weather\", \"parameters\": {\"type\": \"object\", \"required\": [\"location\"], \"properties\": {\"location\": {\"type\": \"string\"}}}}}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call><|user|>Compare weather<|assistant|></think><tool_call>get_weather<arg_key>location</arg_key><arg_value>Tokyo</arg_value></tool_call><tool_call>get_weather<arg_key>location</arg_key><arg_value>Paris</arg_value></tool_call><|observation|><tool_response>{\"temperature\":22}</tool_response><tool_response>{\"temperature\":18}</tool_response><|assistant|><think>",
 		},
 		{
 			name: "preserved thinking in multi-turn",
 			messages: []api.Message{
 				{Role: "user", Content: "Think step by step"},
 				{Role: "assistant", Thinking: "Let me think...", Content: "Here's my answer."},
 				{Role: "user", Content: "Continue"},
 			},
 			expected: "[gMASK]<sop><|user|>Think step by step<|assistant|><think>Let me think...</think>Here's my answer.<|user|>Continue<|assistant|><think>",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			renderer := &GLM47Renderer{}
 			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
 			if err != nil {
 				t.Fatal(err)
 			}
 			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
 				t.Errorf("mismatch (-got +want):\n%s", diff)
 				t.Logf("Got:\n%s", rendered)
 				t.Logf("Expected:\n%s", tt.expected)
 			}
 		})
 	}
 }
--- a/model/renderers/lfm2.go
+++ b/model/renderers/lfm2.go
@@ -1,144 +0,0 @@
 package renderers
 import (
 	"encoding/json"
 	"strings"
 	"github.com/ollama/ollama/api"
 )
 type LFM2Renderer struct {
 	IsThinking bool
 }
 func (r *LFM2Renderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
 	var sb strings.Builder
 	// Note: BOS token is added by the tokenizer (add_bos_token: true), not the renderer
 	// Extract first system message if present (to combine with tools)
 	var firstSystemContent string
 	startIdx := 0
 	if len(messages) > 0 && messages[0].Role == "system" {
 		firstSystemContent = messages[0].Content
 		startIdx = 1
 	}
 	// Append tools to first system content
 	if len(tools) > 0 {
 		if firstSystemContent != "" {
 			firstSystemContent += "\n"
 		}
 		firstSystemContent += "List of tools: ["
 		for i, tool := range tools {
 			toolJSON, err := json.Marshal(tool)
 			if err != nil {
 				return "", err
 			}
 			firstSystemContent += string(toolJSON)
 			if i < len(tools)-1 {
 				firstSystemContent += ", "
 			}
 		}
 		firstSystemContent += "]"
 	}
 	// Output first system block if it has content
 	if firstSystemContent != "" {
 		sb.WriteString("<|im_start|>system\n")
 		sb.WriteString(firstSystemContent)
 		sb.WriteString("<|im_end|>\n")
 	}
 	// Find the index of the last assistant message for thinking stripping
 	lastAssistantIndex := -1
 	for i := len(messages) - 1; i >= startIdx; i-- {
 		if messages[i].Role == "assistant" {
 			lastAssistantIndex = i
 			break
 		}
 	}
 	// Track whether we need to add generation prompt
 	needsGenerationPrompt := len(messages) > 0
 	for i := startIdx; i < len(messages); i++ {
 		message := messages[i]
 		switch message.Role {
 		case "system":
 			// Additional system messages (after the first) are rendered normally
 			sb.WriteString("<|im_start|>system\n")
 			sb.WriteString(message.Content)
 			sb.WriteString("<|im_end|>\n")
 		case "user":
 			sb.WriteString("<|im_start|>user\n")
 			sb.WriteString(message.Content)
 			sb.WriteString("<|im_end|>\n")
 			needsGenerationPrompt = true
 		case "assistant":
 			sb.WriteString("<|im_start|>assistant\n")
 			// Check if this is the last assistant message
 			isLastAssistant := i == lastAssistantIndex
 			// Process content (may need thinking stripped)
 			content := message.Content
 			// Handle thinking tags in assistant content
 			keepPastThinking := r.IsThinking && (thinkValue != nil && thinkValue.Bool())
 			if strings.Contains(content, "</think>") {
 				parts := strings.SplitN(content, "</think>", 2)
 				if len(parts) > 1 {
 					if !isLastAssistant && !keepPastThinking {
 						// Strip thinking entirely for past assistant messages
 						content = strings.TrimSpace(parts[1])
 					} else {
 						// Preserve thinking but trim whitespace after </think>
 						content = parts[0] + "</think>" + strings.TrimLeft(parts[1], " \t\n\r")
 					}
 				}
 			}
 			if len(message.ToolCalls) > 0 {
 				// Assistant with tool calls - write content first (if any after stripping)
 				if content != "" {
 					sb.WriteString(content)
 				}
 				for _, toolCall := range message.ToolCalls {
 					sb.WriteString("<|tool_call_start|>")
 					toolCallJSON := map[string]any{
 						"name":      toolCall.Function.Name,
 						"arguments": toolCall.Function.Arguments,
 					}
 					callJSON, _ := json.Marshal(toolCallJSON)
 					sb.WriteString(string(callJSON))
 					sb.WriteString("<|tool_call_end|>")
 				}
 			} else {
 				sb.WriteString(content)
 			}
 			sb.WriteString("<|im_end|>\n")
 			needsGenerationPrompt = true // Always add gen prompt after assistant when add_generation_prompt=true
 		case "tool":
 			// Tool responses are rendered as plain messages per the chat template
 			sb.WriteString("<|im_start|>tool\n")
 			sb.WriteString(message.Content)
 			sb.WriteString("<|im_end|>\n")
 			needsGenerationPrompt = true
 		}
 	}
 	// Add generation prompt
 	if needsGenerationPrompt {
 		sb.WriteString("<|im_start|>assistant\n")
 		// Note: Model is a "thinking-only" model - it will output <think> itself
 		// We don't add <think> tag to the prompt
 	}
 	return sb.String(), nil
 }
--- a/model/renderers/lfm2_test.go
+++ b/model/renderers/lfm2_test.go
@@ -1,427 +0,0 @@
 package renderers
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/ollama/ollama/api"
 )
 func TestLFM2Renderer(t *testing.T) {
 	tests := []struct {
 		name       string
 		messages   []api.Message
 		tools      []api.Tool
 		thinkValue *api.ThinkValue
 		expected   string
 	}{
 		{
 			name: "basic user message",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello!"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "basic with system message",
 			messages: []api.Message{
 				{Role: "system", Content: "You are a helpful assistant."},
 				{Role: "user", Content: "Hello!"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "multiple system messages rendered separately",
 			messages: []api.Message{
 				{Role: "system", Content: "First instruction."},
 				{Role: "system", Content: "Second instruction."},
 				{Role: "user", Content: "Hello!"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>system\nFirst instruction.<|im_end|>\n<|im_start|>system\nSecond instruction.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "multi-turn conversation",
 			messages: []api.Message{
 				{Role: "user", Content: "What is 2+2?"},
 				{Role: "assistant", Content: "The answer is 4."},
 				{Role: "user", Content: "Thanks!"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\nThe answer is 4.<|im_end|>\n<|im_start|>user\nThanks!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "only system message",
 			messages: []api.Message{
 				{Role: "system", Content: "You are helpful."},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>system\nYou are helpful.<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// When assistant is the LAST assistant, thinking is preserved (even with keep_past_thinking=false)
 			name: "user-assistant-user: last assistant preserves thinking",
 			messages: []api.Message{
 				{Role: "user", Content: "Q1"},
 				{Role: "assistant", Content: "<think>reasoning</think>A1"},
 				{Role: "user", Content: "Q2"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reasoning</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// With two assistants, first is stripped (not last), second preserved (is last)
 			name: "multi-turn thinking: first stripped, second preserved",
 			messages: []api.Message{
 				{Role: "user", Content: "Q1"},
 				{Role: "assistant", Content: "<think>reason1</think>A1"},
 				{Role: "user", Content: "Q2"},
 				{Role: "assistant", Content: "<think>reason2</think>A2"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\nA1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// With thinking enabled (keep_past_thinking=true), both preserved
 			name: "multi-turn thinking: both preserved when thinking enabled",
 			messages: []api.Message{
 				{Role: "user", Content: "Q1"},
 				{Role: "assistant", Content: "<think>reason1</think>A1"},
 				{Role: "user", Content: "Q2"},
 				{Role: "assistant", Content: "<think>reason2</think>A2"},
 			},
 			thinkValue: &api.ThinkValue{Value: true},
 			expected:   "<|im_start|>user\nQ1<|im_end|>\n<|im_start|>assistant\n<think>reason1</think>A1<|im_end|>\n<|im_start|>user\nQ2<|im_end|>\n<|im_start|>assistant\n<think>reason2</think>A2<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "assistant with tool calls",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{
 					Role: "assistant",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
 		},
 		{
 			name: "assistant with content and tool calls",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather in Paris?"},
 				{
 					Role:    "assistant",
 					Content: "Let me check.",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   `<|im_start|>user` + "\n" + `What's the weather in Paris?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `Let me check.<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
 		},
 		{
 			name: "tool response",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{Role: "assistant", Content: "Let me check."},
 				{Role: "tool", Content: "22C, Sunny"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<|im_end|>\n<|im_start|>tool\n22C, Sunny<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "multiple tool calls",
 			messages: []api.Message{
 				{Role: "user", Content: "Get weather for Paris and London"},
 				{
 					Role: "assistant",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "London",
 								}),
 							},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   `<|im_start|>user` + "\n" + `Get weather for Paris and London<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n" + `<|tool_call_start|>{"arguments":{"location":"Paris"},"name":"get_weather"}<|tool_call_end|><|tool_call_start|>{"arguments":{"location":"London"},"name":"get_weather"}<|tool_call_end|><|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
 		},
 		{
 			name: "tools definitions with system message",
 			messages: []api.Message{
 				{Role: "system", Content: "You are helpful."},
 				{Role: "user", Content: "What's the weather?"},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get current weather",
 						Parameters: api.ToolFunctionParameters{
 							Type: "object",
 							Properties: testPropsMap(map[string]api.ToolProperty{
 								"location": {
 									Type:        api.PropertyType{"string"},
 									Description: "City name",
 								},
 							}),
 							Required: []string{"location"},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   `<|im_start|>system` + "\n" + `You are helpful.` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
 		},
 		{
 			name: "tools definitions without system message",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get current weather",
 						Parameters: api.ToolFunctionParameters{
 							Type: "object",
 							Properties: testPropsMap(map[string]api.ToolProperty{
 								"location": {
 									Type:        api.PropertyType{"string"},
 									Description: "City name",
 								},
 							}),
 							Required: []string{"location"},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   `<|im_start|>system` + "\n" + `List of tools: [{"type":"function","function":{"name":"get_weather","description":"Get current weather","parameters":{"type":"object","required":["location"],"properties":{"location":{"type":"string","description":"City name"}}}}}]<|im_end|>` + "\n" + `<|im_start|>user` + "\n" + `What's the weather?<|im_end|>` + "\n" + `<|im_start|>assistant` + "\n",
 		},
 		{
 			name: "multiple tools without system message",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello"},
 			},
 			tools: []api.Tool{
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_weather",
 						Description: "Get weather",
 					},
 				},
 				{
 					Type: "function",
 					Function: api.ToolFunction{
 						Name:        "get_time",
 						Description: "Get time",
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>system\nList of tools: [{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get weather\",\"parameters\":{\"type\":\"\",\"properties\":null}}}, {\"type\":\"function\",\"function\":{\"name\":\"get_time\",\"description\":\"Get time\",\"parameters\":{\"type\":\"\",\"properties\":null}}}]<|im_end|>\n<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "user-tool sequence",
 			messages: []api.Message{
 				{Role: "user", Content: "Check weather"},
 				{Role: "tool", Content: "22C"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "full tool call cycle",
 			messages: []api.Message{
 				{Role: "user", Content: "Check weather"},
 				{Role: "assistant", Content: "Let me check"},
 				{Role: "tool", Content: "22C"},
 				{Role: "assistant", Content: "It's 22C"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nCheck weather<|im_end|>\n<|im_start|>assistant\nLet me check<|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\nIt's 22C<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "unicode content",
 			messages: []api.Message{
 				{Role: "user", Content: "你好世界! مرحبا 🌍"},
 				{Role: "assistant", Content: "Hello! 👋"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\n你好世界! مرحبا 🌍<|im_end|>\n<|im_start|>assistant\nHello! 👋<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "newlines in content",
 			messages: []api.Message{
 				{Role: "user", Content: "Line 1\nLine 2\n\nLine 4"},
 				{Role: "assistant", Content: "Response with\nmultiple\nlines"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nLine 1\nLine 2\n\nLine 4<|im_end|>\n<|im_start|>assistant\nResponse with\nmultiple\nlines<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			name: "empty assistant content",
 			messages: []api.Message{
 				{Role: "user", Content: "Hello"},
 				{Role: "assistant", Content: ""},
 				{Role: "user", Content: "OK"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n<|im_end|>\n<|im_start|>user\nOK<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// Generation prompt does NOT include <think> - model outputs it
 			name: "generation prompt has no think tag",
 			messages: []api.Message{
 				{Role: "user", Content: "Think hard"},
 			},
 			thinkValue: &api.ThinkValue{Value: true},
 			expected:   "<|im_start|>user\nThink hard<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// Interleaved: thinking before tool call - last assistant preserves thinking
 			name: "thinking before tool call (last assistant)",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{
 					Role:    "assistant",
 					Content: "<think>I need to check the weather</think>",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>I need to check the weather</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// Two assistants with tool calls - first has thinking stripped
 			name: "two assistants with tools: first thinking stripped",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{
 					Role:    "assistant",
 					Content: "<think>checking</think>",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 				{Role: "tool", Content: "22C"},
 				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// Two assistants with tools - both preserved when thinking enabled
 			name: "two assistants with tools: both preserved when thinking enabled",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{
 					Role:    "assistant",
 					Content: "<think>checking</think>",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 				{Role: "tool", Content: "22C"},
 				{Role: "assistant", Content: "<think>got result</think>It's 22C!"},
 			},
 			thinkValue: &api.ThinkValue{Value: true},
 			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\n<think>checking</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>tool\n22C<|im_end|>\n<|im_start|>assistant\n<think>got result</think>It's 22C!<|im_end|>\n<|im_start|>assistant\n",
 		},
 		{
 			// Content before thinking before tool call
 			name: "content then thinking then tool call",
 			messages: []api.Message{
 				{Role: "user", Content: "What's the weather?"},
 				{
 					Role:    "assistant",
 					Content: "Let me check.<think>Using weather API</think>",
 					ToolCalls: []api.ToolCall{
 						{
 							Function: api.ToolCallFunction{
 								Name: "get_weather",
 								Arguments: testArgs(map[string]any{
 									"location": "Paris",
 								}),
 							},
 						},
 					},
 				},
 			},
 			thinkValue: &api.ThinkValue{Value: false},
 			expected:   "<|im_start|>user\nWhat's the weather?<|im_end|>\n<|im_start|>assistant\nLet me check.<think>Using weather API</think><|tool_call_start|>{\"arguments\":{\"location\":\"Paris\"},\"name\":\"get_weather\"}<|tool_call_end|><|im_end|>\n<|im_start|>assistant\n",
 		},
 	}
 	renderer := &LFM2Renderer{IsThinking: true}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			rendered, err := renderer.Render(tt.messages, tt.tools, tt.thinkValue)
 			if err != nil {
 				t.Fatalf("Render() error = %v", err)
 			}
 			if diff := cmp.Diff(tt.expected, rendered); diff != "" {
 				t.Errorf("Render() mismatch (-want +got):\n%s", diff)
 			}
 		})
 	}
 }
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -80,12 +80,6 @@ func rendererForName(name string) Renderer {
 		return &Nemotron3NanoRenderer{}
 	case "functiongemma":
 		return &FunctionGemmaRenderer{}
 	case "glm-4.7":
 		return &GLM47Renderer{}
 	case "lfm2":
 		return &LFM2Renderer{IsThinking: false}
 	case "lfm2-thinking":
 		return &LFM2Renderer{IsThinking: true}
 	default:
 		return nil
 	}
--- a/model/renderers/testhelpers_test.go
+++ b/model/renderers/testhelpers_test.go
@@ -1,26 +1,6 @@
 package renderers
-import (
+import "github.com/ollama/ollama/api"
 	"encoding/json"
 	"github.com/ollama/ollama/api"
 )
 func args(s string) api.ToolCallFunctionArguments {
 	var result api.ToolCallFunctionArguments
 	if err := json.Unmarshal([]byte(s), &result); err != nil {
 		panic("invalid JSON in args(): " + err.Error())
 	}
 	return result
 }
 func propsMap(s string) *api.ToolPropertiesMap {
 	var result api.ToolPropertiesMap
 	if err := json.Unmarshal([]byte(s), &result); err != nil {
 		panic("invalid JSON in propsMap(): " + err.Error())
 	}
 	return &result
 }
 // testPropsMap creates a ToolPropertiesMap from a map (convenience function for tests, order not preserved)
 func testPropsMap(m map[string]api.ToolProperty) *api.ToolPropertiesMap {
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -630,10 +630,6 @@ func nameFromToolCallID(messages []Message, toolCallID string) string {
 // decodeImageURL decodes a base64 data URI into raw image bytes.
 func decodeImageURL(url string) (api.ImageData, error) {
 	if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
 		return nil, errors.New("image URLs are not currently supported, please use base64 encoded data instead")
 	}
 	types := []string{"jpeg", "jpg", "png", "webp"}
 	// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
@@ -737,104 +733,3 @@ func FromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
 		DebugRenderOnly: r.DebugRenderOnly,
 	}, nil
 }
 // ImageGenerationRequest is an OpenAI-compatible image generation request.
 type ImageGenerationRequest struct {
 	Model          string `json:"model"`
 	Prompt         string `json:"prompt"`
 	N              int    `json:"n,omitempty"`
 	Size           string `json:"size,omitempty"`
 	ResponseFormat string `json:"response_format,omitempty"`
 	Seed           *int64 `json:"seed,omitempty"`
 }
 // ImageGenerationResponse is an OpenAI-compatible image generation response.
 type ImageGenerationResponse struct {
 	Created int64            `json:"created"`
 	Data    []ImageURLOrData `json:"data"`
 }
 // ImageURLOrData contains either a URL or base64-encoded image data.
 type ImageURLOrData struct {
 	URL     string `json:"url,omitempty"`
 	B64JSON string `json:"b64_json,omitempty"`
 }
 // FromImageGenerationRequest converts an OpenAI image generation request to an Ollama GenerateRequest.
 func FromImageGenerationRequest(r ImageGenerationRequest) api.GenerateRequest {
 	req := api.GenerateRequest{
 		Model:  r.Model,
 		Prompt: r.Prompt,
 	}
 	// Parse size if provided (e.g., "1024x768")
 	if r.Size != "" {
 		var w, h int32
 		if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
 			req.Width = w
 			req.Height = h
 		}
 	}
 	if r.Seed != nil {
 		if req.Options == nil {
 			req.Options = map[string]any{}
 		}
 		req.Options["seed"] = *r.Seed
 	}
 	return req
 }
 // ToImageGenerationResponse converts an Ollama GenerateResponse to an OpenAI ImageGenerationResponse.
 func ToImageGenerationResponse(resp api.GenerateResponse) ImageGenerationResponse {
 	var data []ImageURLOrData
 	if resp.Image != "" {
 		data = []ImageURLOrData{{B64JSON: resp.Image}}
 	}
 	return ImageGenerationResponse{
 		Created: resp.CreatedAt.Unix(),
 		Data:    data,
 	}
 }
 // ImageEditRequest is an OpenAI-compatible image edit request.
 type ImageEditRequest struct {
 	Model  string `json:"model"`
 	Prompt string `json:"prompt"`
 	Image  string `json:"image"`          // Base64-encoded image data
 	Size   string `json:"size,omitempty"` // e.g., "1024x1024"
 	Seed   *int64 `json:"seed,omitempty"`
 }
 // FromImageEditRequest converts an OpenAI image edit request to an Ollama GenerateRequest.
 func FromImageEditRequest(r ImageEditRequest) (api.GenerateRequest, error) {
 	req := api.GenerateRequest{
 		Model:  r.Model,
 		Prompt: r.Prompt,
 	}
 	// Decode the input image
 	if r.Image != "" {
 		imgData, err := decodeImageURL(r.Image)
 		if err != nil {
 			return api.GenerateRequest{}, fmt.Errorf("invalid image: %w", err)
 		}
 		req.Images = append(req.Images, imgData)
 	}
 	// Parse size if provided (e.g., "1024x768")
 	if r.Size != "" {
 		var w, h int32
 		if _, err := fmt.Sscanf(r.Size, "%dx%d", &w, &h); err == nil {
 			req.Width = w
 			req.Height = h
 		}
 	}
 	if r.Seed != nil {
 		if req.Options == nil {
 			req.Options = map[string]any{}
 		}
 		req.Options["seed"] = *r.Seed
 	}
 	return req, nil
 }
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@@ -448,86 +448,3 @@ func TestFromChatRequest_TopLogprobsRange(t *testing.T) {
 		})
 	}
 }
 func TestFromImageEditRequest_Basic(t *testing.T) {
 	req := ImageEditRequest{
 		Model:  "test-model",
 		Prompt: "make it blue",
 		Image:  prefix + image,
 	}
 	result, err := FromImageEditRequest(req)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "test-model" {
 		t.Errorf("expected model 'test-model', got %q", result.Model)
 	}
 	if result.Prompt != "make it blue" {
 		t.Errorf("expected prompt 'make it blue', got %q", result.Prompt)
 	}
 	if len(result.Images) != 1 {
 		t.Fatalf("expected 1 image, got %d", len(result.Images))
 	}
 }
 func TestFromImageEditRequest_WithSize(t *testing.T) {
 	req := ImageEditRequest{
 		Model:  "test-model",
 		Prompt: "make it blue",
 		Image:  prefix + image,
 		Size:   "512x768",
 	}
 	result, err := FromImageEditRequest(req)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Width != 512 {
 		t.Errorf("expected width 512, got %d", result.Width)
 	}
 	if result.Height != 768 {
 		t.Errorf("expected height 768, got %d", result.Height)
 	}
 }
 func TestFromImageEditRequest_WithSeed(t *testing.T) {
 	seed := int64(12345)
 	req := ImageEditRequest{
 		Model:  "test-model",
 		Prompt: "make it blue",
 		Image:  prefix + image,
 		Seed:   &seed,
 	}
 	result, err := FromImageEditRequest(req)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Options == nil {
 		t.Fatal("expected options to be set")
 	}
 	if result.Options["seed"] != seed {
 		t.Errorf("expected seed %d, got %v", seed, result.Options["seed"])
 	}
 }
 func TestFromImageEditRequest_InvalidImage(t *testing.T) {
 	req := ImageEditRequest{
 		Model:  "test-model",
 		Prompt: "make it blue",
 		Image:  "not-valid-base64",
 	}
 	_, err := FromImageEditRequest(req)
 	if err == nil {
 		t.Error("expected error for invalid image")
 	}
 }
--- a/openai/responses.go
+++ b/openai/responses.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"math/rand"
 	"time"
 	"github.com/ollama/ollama/api"
 )
@@ -266,9 +265,9 @@ type ResponsesText struct {
 type ResponsesTool struct {
 	Type        string         `json:"type"` // "function"
 	Name        string         `json:"name"`
-	Description *string        `json:"description"` // nullable but required
+	Description string         `json:"description,omitempty"`
-	Strict      *bool          `json:"strict"`      // nullable but required
+	Strict      bool           `json:"strict,omitempty"`
-	Parameters  map[string]any `json:"parameters"`  // nullable but required
+	Parameters  map[string]any `json:"parameters,omitempty"`
 }
 type ResponsesRequest struct {
@@ -476,16 +475,11 @@ func convertTool(t ResponsesTool) (api.Tool, error) {
 		}
 	}
 	var description string
 	if t.Description != nil {
 		description = *t.Description
 	}
 	return api.Tool{
 		Type: t.Type,
 		Function: api.ToolFunction{
 			Name:        t.Name,
-			Description: description,
+			Description: t.Description,
 			Parameters:  params,
 		},
 	}, nil
@@ -522,60 +516,17 @@ func convertInputMessage(m ResponsesInputMessage) (api.Message, error) {
 // Response types for the Responses API
 // ResponsesTextField represents the text output configuration in the response.
 type ResponsesTextField struct {
 	Format ResponsesTextFormat `json:"format"`
 }
 // ResponsesReasoningOutput represents reasoning configuration in the response.
 type ResponsesReasoningOutput struct {
 	Effort  *string `json:"effort,omitempty"`
 	Summary *string `json:"summary,omitempty"`
 }
 // ResponsesError represents an error in the response.
 type ResponsesError struct {
 	Code    string `json:"code"`
 	Message string `json:"message"`
 }
 // ResponsesIncompleteDetails represents details about why a response was incomplete.
 type ResponsesIncompleteDetails struct {
 	Reason string `json:"reason"`
 }
 type ResponsesResponse struct {
-	ID                 string                      `json:"id"`
+	ID        string                `json:"id"`
-	Object             string                      `json:"object"`
+	Object    string                `json:"object"`
-	CreatedAt          int64                       `json:"created_at"`
+	CreatedAt int64                 `json:"created_at"`
-	CompletedAt        *int64                      `json:"completed_at"`
+	Status    string                `json:"status"`
-	Status             string                      `json:"status"`
+	Model     string                `json:"model"`
-	IncompleteDetails  *ResponsesIncompleteDetails `json:"incomplete_details"`
+	Output    []ResponsesOutputItem `json:"output"`
-	Model              string                      `json:"model"`
+	Usage     *ResponsesUsage       `json:"usage,omitempty"`
-	PreviousResponseID *string                     `json:"previous_response_id"`
+	// TODO(drifkin): add `temperature` and `top_p` to the response, but this
-	Instructions       *string                     `json:"instructions"`
+	// requires additional plumbing to find the effective values since the
-	Output             []ResponsesOutputItem       `json:"output"`
+	// defaults can come from the model or the request
 	Error              *ResponsesError             `json:"error"`
 	Tools              []ResponsesTool             `json:"tools"`
 	ToolChoice         any                         `json:"tool_choice"`
 	Truncation         string                      `json:"truncation"`
 	ParallelToolCalls  bool                        `json:"parallel_tool_calls"`
 	Text               ResponsesTextField          `json:"text"`
 	TopP               float64                     `json:"top_p"`
 	PresencePenalty    float64                     `json:"presence_penalty"`
 	FrequencyPenalty   float64                     `json:"frequency_penalty"`
 	TopLogprobs        int                         `json:"top_logprobs"`
 	Temperature        float64                     `json:"temperature"`
 	Reasoning          *ResponsesReasoningOutput   `json:"reasoning"`
 	Usage              *ResponsesUsage             `json:"usage"`
 	MaxOutputTokens    *int                        `json:"max_output_tokens"`
 	MaxToolCalls       *int                        `json:"max_tool_calls"`
 	Store              bool                        `json:"store"`
 	Background         bool                        `json:"background"`
 	ServiceTier        string                      `json:"service_tier"`
 	Metadata           map[string]any              `json:"metadata"`
 	SafetyIdentifier   *string                     `json:"safety_identifier"`
 	PromptCacheKey     *string                     `json:"prompt_cache_key"`
 }
 type ResponsesOutputItem struct {
@@ -599,39 +550,18 @@ type ResponsesReasoningSummary struct {
 }
 type ResponsesOutputContent struct {
-	Type        string `json:"type"` // "output_text"
+	Type string `json:"type"` // "output_text"
-	Text        string `json:"text"`
+	Text string `json:"text"`
 	Annotations []any  `json:"annotations"`
 	Logprobs    []any  `json:"logprobs"`
 }
 type ResponsesInputTokensDetails struct {
 	CachedTokens int `json:"cached_tokens"`
 }
 type ResponsesOutputTokensDetails struct {
 	ReasoningTokens int `json:"reasoning_tokens"`
 }
 type ResponsesUsage struct {
-	InputTokens         int                          `json:"input_tokens"`
+	InputTokens  int `json:"input_tokens"`
-	OutputTokens        int                          `json:"output_tokens"`
+	OutputTokens int `json:"output_tokens"`
-	TotalTokens         int                          `json:"total_tokens"`
+	TotalTokens  int `json:"total_tokens"`
 	InputTokensDetails  ResponsesInputTokensDetails  `json:"input_tokens_details"`
 	OutputTokensDetails ResponsesOutputTokensDetails `json:"output_tokens_details"`
 }
-// derefFloat64 returns the value of a float64 pointer, or a default if nil.
+// ToResponse converts an api.ChatResponse to a Responses API response
-func derefFloat64(p *float64, def float64) float64 {
+func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse) ResponsesResponse {
 	if p != nil {
 		return *p
 	}
 	return def
 }
 // ToResponse converts an api.ChatResponse to a Responses API response.
 // The request is used to echo back request parameters in the response.
 func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse, request ResponsesRequest) ResponsesResponse {
 	var output []ResponsesOutputItem
 	// Add reasoning item if thinking is present
@@ -655,7 +585,6 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse,
 			output = append(output, ResponsesOutputItem{
 				ID:        fmt.Sprintf("fc_%s_%d", responseID, i),
 				Type:      "function_call",
 				Status:    "completed",
 				CallID:    tc.ID,
 				Name:      tc.Function.Name,
 				Arguments: tc.Function.Arguments,
@@ -669,90 +598,25 @@ func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse,
 			Role:   "assistant",
 			Content: []ResponsesOutputContent{
 				{
-					Type:        "output_text",
+					Type: "output_text",
-					Text:        chatResponse.Message.Content,
+					Text: chatResponse.Message.Content,
 					Annotations: []any{},
 					Logprobs:    []any{},
 				},
 			},
 		})
 	}
 	var instructions *string
 	if request.Instructions != "" {
 		instructions = &request.Instructions
 	}
 	// Build truncation with default
 	truncation := "disabled"
 	if request.Truncation != nil {
 		truncation = *request.Truncation
 	}
 	tools := request.Tools
 	if tools == nil {
 		tools = []ResponsesTool{}
 	}
 	text := ResponsesTextField{
 		Format: ResponsesTextFormat{Type: "text"},
 	}
 	if request.Text != nil && request.Text.Format != nil {
 		text.Format = *request.Text.Format
 	}
 	// Build reasoning output from request
 	var reasoning *ResponsesReasoningOutput
 	if request.Reasoning.Effort != "" || request.Reasoning.Summary != "" {
 		reasoning = &ResponsesReasoningOutput{}
 		if request.Reasoning.Effort != "" {
 			reasoning.Effort = &request.Reasoning.Effort
 		}
 		if request.Reasoning.Summary != "" {
 			reasoning.Summary = &request.Reasoning.Summary
 		}
 	}
 	return ResponsesResponse{
-		ID:                 responseID,
+		ID:        responseID,
-		Object:             "response",
+		Object:    "response",
-		CreatedAt:          chatResponse.CreatedAt.Unix(),
+		CreatedAt: chatResponse.CreatedAt.Unix(),
-		CompletedAt:        nil, // Set by middleware when writing final response
+		Status:    "completed",
-		Status:             "completed",
+		Model:     model,
-		IncompleteDetails:  nil, // Only populated if response incomplete
+		Output:    output,
 		Model:              model,
 		PreviousResponseID: nil, // Not supported
 		Instructions:       instructions,
 		Output:             output,
 		Error:              nil, // Only populated on failure
 		Tools:              tools,
 		ToolChoice:         "auto", // Default value
 		Truncation:         truncation,
 		ParallelToolCalls:  true, // Default value
 		Text:               text,
 		TopP:               derefFloat64(request.TopP, 1.0),
 		PresencePenalty:    0, // Default value
 		FrequencyPenalty:   0, // Default value
 		TopLogprobs:        0, // Default value
 		Temperature:        derefFloat64(request.Temperature, 1.0),
 		Reasoning:          reasoning,
 		Usage: &ResponsesUsage{
 			InputTokens:  chatResponse.PromptEvalCount,
 			OutputTokens: chatResponse.EvalCount,
 			TotalTokens:  chatResponse.PromptEvalCount + chatResponse.EvalCount,
 			// TODO(drifkin): wire through the actual values
 			InputTokensDetails: ResponsesInputTokensDetails{CachedTokens: 0},
 			// TODO(drifkin): wire through the actual values
 			OutputTokensDetails: ResponsesOutputTokensDetails{ReasoningTokens: 0},
 		},
 		MaxOutputTokens:  request.MaxOutputTokens,
 		MaxToolCalls:     nil,   // Not supported
 		Store:            false, // We don't store responses
 		Background:       request.Background,
 		ServiceTier:      "default", // Default value
 		Metadata:         map[string]any{},
 		SafetyIdentifier: nil, // Not supported
 		PromptCacheKey:   nil, // Not supported
 	}
 }
@@ -772,7 +636,6 @@ type ResponsesStreamConverter struct {
 	responseID string
 	itemID     string
 	model      string
 	request    ResponsesRequest
 	// State tracking (mutated across Process calls)
 	firstWrite      bool
@@ -805,12 +668,11 @@ func (c *ResponsesStreamConverter) newEvent(eventType string, data map[string]an
 }
 // NewResponsesStreamConverter creates a new converter with the given configuration.
-func NewResponsesStreamConverter(responseID, itemID, model string, request ResponsesRequest) *ResponsesStreamConverter {
+func NewResponsesStreamConverter(responseID, itemID, model string) *ResponsesStreamConverter {
 	return &ResponsesStreamConverter{
 		responseID: responseID,
 		itemID:     itemID,
 		model:      model,
 		request:    request,
 		firstWrite: true,
 	}
 }
@@ -855,120 +717,25 @@ func (c *ResponsesStreamConverter) Process(r api.ChatResponse) []ResponsesStream
 	return events
 }
 // buildResponseObject creates a full response object with all required fields for streaming events.
 func (c *ResponsesStreamConverter) buildResponseObject(status string, output []any, usage map[string]any) map[string]any {
 	var instructions any = nil
 	if c.request.Instructions != "" {
 		instructions = c.request.Instructions
 	}
 	truncation := "disabled"
 	if c.request.Truncation != nil {
 		truncation = *c.request.Truncation
 	}
 	var tools []any
 	if c.request.Tools != nil {
 		for _, t := range c.request.Tools {
 			tools = append(tools, map[string]any{
 				"type":        t.Type,
 				"name":        t.Name,
 				"description": t.Description,
 				"strict":      t.Strict,
 				"parameters":  t.Parameters,
 			})
 		}
 	}
 	if tools == nil {
 		tools = []any{}
 	}
 	textFormat := map[string]any{"type": "text"}
 	if c.request.Text != nil && c.request.Text.Format != nil {
 		textFormat = map[string]any{
 			"type": c.request.Text.Format.Type,
 		}
 		if c.request.Text.Format.Name != "" {
 			textFormat["name"] = c.request.Text.Format.Name
 		}
 		if c.request.Text.Format.Schema != nil {
 			textFormat["schema"] = c.request.Text.Format.Schema
 		}
 		if c.request.Text.Format.Strict != nil {
 			textFormat["strict"] = *c.request.Text.Format.Strict
 		}
 	}
 	var reasoning any = nil
 	if c.request.Reasoning.Effort != "" || c.request.Reasoning.Summary != "" {
 		r := map[string]any{}
 		if c.request.Reasoning.Effort != "" {
 			r["effort"] = c.request.Reasoning.Effort
 		} else {
 			r["effort"] = nil
 		}
 		if c.request.Reasoning.Summary != "" {
 			r["summary"] = c.request.Reasoning.Summary
 		} else {
 			r["summary"] = nil
 		}
 		reasoning = r
 	}
 	// Build top_p and temperature with defaults
 	topP := 1.0
 	if c.request.TopP != nil {
 		topP = *c.request.TopP
 	}
 	temperature := 1.0
 	if c.request.Temperature != nil {
 		temperature = *c.request.Temperature
 	}
 	return map[string]any{
 		"id":                   c.responseID,
 		"object":               "response",
 		"created_at":           time.Now().Unix(),
 		"completed_at":         nil,
 		"status":               status,
 		"incomplete_details":   nil,
 		"model":                c.model,
 		"previous_response_id": nil,
 		"instructions":         instructions,
 		"output":               output,
 		"error":                nil,
 		"tools":                tools,
 		"tool_choice":          "auto",
 		"truncation":           truncation,
 		"parallel_tool_calls":  true,
 		"text":                 map[string]any{"format": textFormat},
 		"top_p":                topP,
 		"presence_penalty":     0,
 		"frequency_penalty":    0,
 		"top_logprobs":         0,
 		"temperature":          temperature,
 		"reasoning":            reasoning,
 		"usage":                usage,
 		"max_output_tokens":    c.request.MaxOutputTokens,
 		"max_tool_calls":       nil,
 		"store":                false,
 		"background":           c.request.Background,
 		"service_tier":         "default",
 		"metadata":             map[string]any{},
 		"safety_identifier":    nil,
 		"prompt_cache_key":     nil,
 	}
 }
 func (c *ResponsesStreamConverter) createResponseCreatedEvent() ResponsesStreamEvent {
 	return c.newEvent("response.created", map[string]any{
-		"response": c.buildResponseObject("in_progress", []any{}, nil),
+		"response": map[string]any{
 			"id":     c.responseID,
 			"object": "response",
 			"status": "in_progress",
 			"output": []any{},
 		},
 	})
 }
 func (c *ResponsesStreamConverter) createResponseInProgressEvent() ResponsesStreamEvent {
 	return c.newEvent("response.in_progress", map[string]any{
-		"response": c.buildResponseObject("in_progress", []any{}, nil),
+		"response": map[string]any{
 			"id":     c.responseID,
 			"object": "response",
 			"status": "in_progress",
 			"output": []any{},
 		},
 	})
 }
@@ -995,10 +762,9 @@ func (c *ResponsesStreamConverter) processThinking(thinking string) []ResponsesS
 	// Emit delta
 	events = append(events, c.newEvent("response.reasoning_summary_text.delta", map[string]any{
-		"item_id":       c.reasoningItemID,
+		"item_id":      c.reasoningItemID,
-		"output_index":  c.outputIndex,
+		"output_index": c.outputIndex,
-		"summary_index": 0,
+		"delta":        thinking,
 		"delta":         thinking,
 	}))
 	// TODO(drifkin): consider adding
@@ -1017,10 +783,9 @@ func (c *ResponsesStreamConverter) finishReasoning() []ResponsesStreamEvent {
 	events := []ResponsesStreamEvent{
 		c.newEvent("response.reasoning_summary_text.done", map[string]any{
-			"item_id":       c.reasoningItemID,
+			"item_id":      c.reasoningItemID,
-			"output_index":  c.outputIndex,
+			"output_index": c.outputIndex,
-			"summary_index": 0,
+			"text":         c.accumulatedThinking,
 			"text":          c.accumulatedThinking,
 		}),
 		c.newEvent("response.output_item.done", map[string]any{
 			"output_index": c.outputIndex,
@@ -1133,10 +898,8 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
 			"output_index":  c.outputIndex,
 			"content_index": c.contentIndex,
 			"part": map[string]any{
-				"type":        "output_text",
+				"type": "output_text",
-				"text":        "",
+				"text": "",
 				"annotations": []any{},
 				"logprobs":    []any{},
 			},
 		}))
 	}
@@ -1150,7 +913,6 @@ func (c *ResponsesStreamConverter) processTextContent(content string) []Response
 		"output_index":  c.outputIndex,
 		"content_index": 0,
 		"delta":         content,
 		"logprobs":      []any{},
 	}))
 	return events
@@ -1182,10 +944,8 @@ func (c *ResponsesStreamConverter) buildFinalOutput() []any {
 			"status": "completed",
 			"role":   "assistant",
 			"content": []map[string]any{{
-				"type":        "output_text",
+				"type": "output_text",
-				"text":        c.accumulatedText,
+				"text": c.accumulatedText,
 				"annotations": []any{},
 				"logprobs":    []any{},
 			}},
 		})
 	}
@@ -1207,7 +967,6 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 			"output_index":  c.outputIndex,
 			"content_index": 0,
 			"text":          c.accumulatedText,
 			"logprobs":      []any{},
 		}))
 		// response.content_part.done
@@ -1216,10 +975,8 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 			"output_index":  c.outputIndex,
 			"content_index": 0,
 			"part": map[string]any{
-				"type":        "output_text",
+				"type": "output_text",
-				"text":        c.accumulatedText,
+				"text": c.accumulatedText,
 				"annotations": []any{},
 				"logprobs":    []any{},
 			},
 		}))
@@ -1232,31 +989,26 @@ func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []Respo
 				"status": "completed",
 				"role":   "assistant",
 				"content": []map[string]any{{
-					"type":        "output_text",
+					"type": "output_text",
-					"text":        c.accumulatedText,
+					"text": c.accumulatedText,
 					"annotations": []any{},
 					"logprobs":    []any{},
 				}},
 			},
 		}))
 	}
 	// response.completed
 	usage := map[string]any{
 		"input_tokens":  r.PromptEvalCount,
 		"output_tokens": r.EvalCount,
 		"total_tokens":  r.PromptEvalCount + r.EvalCount,
 		"input_tokens_details": map[string]any{
 			"cached_tokens": 0,
 		},
 		"output_tokens_details": map[string]any{
 			"reasoning_tokens": 0,
 		},
 	}
 	response := c.buildResponseObject("completed", c.buildFinalOutput(), usage)
 	response["completed_at"] = time.Now().Unix()
 	events = append(events, c.newEvent("response.completed", map[string]any{
-		"response": response,
+		"response": map[string]any{
 			"id":     c.responseID,
 			"object": "response",
 			"status": "completed",
 			"output": c.buildFinalOutput(),
 			"usage": map[string]any{
 				"input_tokens":  r.PromptEvalCount,
 				"output_tokens": r.EvalCount,
 				"total_tokens":  r.PromptEvalCount + r.EvalCount,
 			},
 		},
 	}))
 	return events
--- a/openai/responses_test.go
+++ b/openai/responses_test.go
@@ -850,7 +850,7 @@ func TestFromResponsesRequest_Images(t *testing.T) {
 }
 func TestResponsesStreamConverter_TextOnly(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	// First chunk with content
 	events := converter.Process(api.ChatResponse{
@@ -916,7 +916,7 @@ func TestResponsesStreamConverter_TextOnly(t *testing.T) {
 }
 func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{
@@ -952,7 +952,7 @@ func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
 }
 func TestResponsesStreamConverter_Reasoning(t *testing.T) {
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	// First chunk with thinking
 	events := converter.Process(api.ChatResponse{
@@ -1267,7 +1267,7 @@ func TestToResponse_WithReasoning(t *testing.T) {
 			Content:  "The answer is 42",
 		},
 		Done: true,
-	}, ResponsesRequest{})
+	})
 	// Should have 2 output items: reasoning + message
 	if len(response.Output) != 2 {
@@ -1638,7 +1638,7 @@ func TestFromResponsesRequest_ShorthandFormats(t *testing.T) {
 func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
 	// Verify that response.output_item.done includes content field for messages
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	// First chunk
 	converter.Process(api.ChatResponse{
@@ -1686,7 +1686,7 @@ func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
 func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T) {
 	// Verify that response.completed includes the output array
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	// Process some content
 	converter.Process(api.ChatResponse{
@@ -1730,7 +1730,7 @@ func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T)
 func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
 	// Verify that response.created includes an empty output array
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{Content: "Hi"},
@@ -1757,7 +1757,7 @@ func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
 func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
 	// Verify that events include incrementing sequence numbers
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{Content: "Hello"},
@@ -1791,7 +1791,7 @@ func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
 func TestResponsesStreamConverter_FunctionCallStatus(t *testing.T) {
 	// Verify that function call items include status field
-	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b", ResponsesRequest{})
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
 	events := converter.Process(api.ChatResponse{
 		Message: api.Message{
--- a/readline/readline.go
+++ b/readline/readline.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io"
 	"os"
 	"strings"
 )
 type Prompt struct {
@@ -37,11 +36,10 @@ type Terminal struct {
 }
 type Instance struct {
-	Prompt      *Prompt
+	Prompt   *Prompt
-	Terminal    *Terminal
+	Terminal *Terminal
-	History     *History
+	History  *History
-	Pasting     bool
+	Pasting  bool
 	pastedLines []string
 }
 func New(prompt Prompt) (*Instance, error) {
@@ -95,21 +93,7 @@ func (i *Instance) Readline() (string, error) {
 	var currentLineBuf []rune
 	// draining tracks if we're processing buffered input from cooked mode.
 	// In cooked mode Enter sends \n, but in raw mode Ctrl+J sends \n.
 	// We treat \n from cooked mode as submit, not multiline.
 	// We check Buffered() after the first read since the bufio buffer is
 	// empty until then. This is compatible with """ multiline mode in
 	// interactive.go since each Readline() call is independent.
 	var draining, stopDraining bool
 	for {
 		// Apply deferred state change from previous iteration
 		if stopDraining {
 			draining = false
 			stopDraining = false
 		}
 		// don't show placeholder when pasting unless we're in multiline mode
 		showPlaceholder := !i.Pasting || i.Prompt.UseAlt
 		if buf.IsEmpty() && showPlaceholder {
@@ -119,15 +103,6 @@ func (i *Instance) Readline() (string, error) {
 		r, err := i.Terminal.Read()
 		// After reading, check if there's more buffered data. If so, we're
 		// processing cooked-mode input. Once buffer empties, the current
 		// char is the last buffered one (still drain it), then stop next iteration.
 		if i.Terminal.reader.Buffered() > 0 {
 			draining = true
 		} else if draining {
 			stopDraining = true
 		}
 		if buf.IsEmpty() {
 			fmt.Print(ClearToEOL)
 		}
@@ -199,8 +174,6 @@ func (i *Instance) Readline() (string, error) {
 		case CharEsc:
 			esc = true
 		case CharInterrupt:
 			i.pastedLines = nil
 			i.Prompt.UseAlt = false
 			return "", ErrInterrupt
 		case CharPrev:
 			i.historyPrev(buf, &currentLineBuf)
@@ -215,23 +188,7 @@ func (i *Instance) Readline() (string, error) {
 		case CharForward:
 			buf.MoveRight()
 		case CharBackspace, CharCtrlH:
-			if buf.IsEmpty() && len(i.pastedLines) > 0 {
+			buf.Remove()
 				lastIdx := len(i.pastedLines) - 1
 				prevLine := i.pastedLines[lastIdx]
 				i.pastedLines = i.pastedLines[:lastIdx]
 				fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + ClearToEOL)
 				if len(i.pastedLines) == 0 {
 					fmt.Print(i.Prompt.Prompt)
 					i.Prompt.UseAlt = false
 				} else {
 					fmt.Print(i.Prompt.AltPrompt)
 				}
 				for _, r := range prevLine {
 					buf.Add(r)
 				}
 			} else {
 				buf.Remove()
 			}
 		case CharTab:
 			// todo: convert back to real tabs
 			for range 8 {
@@ -254,33 +211,13 @@ func (i *Instance) Readline() (string, error) {
 		case CharCtrlZ:
 			fd := os.Stdin.Fd()
 			return handleCharCtrlZ(fd, i.Terminal.termios)
-		case CharCtrlJ:
+		case CharEnter, CharCtrlJ:
 			// If not draining cooked-mode input, treat as multiline
 			if !draining {
 				i.pastedLines = append(i.pastedLines, buf.String())
 				buf.Buf.Clear()
 				buf.Pos = 0
 				buf.DisplayPos = 0
 				buf.LineHasSpace.Clear()
 				fmt.Println()
 				fmt.Print(i.Prompt.AltPrompt)
 				i.Prompt.UseAlt = true
 				continue
 			}
 			// Draining cooked-mode input: treat \n as submit
 			fallthrough
 		case CharEnter:
 			output := buf.String()
 			if len(i.pastedLines) > 0 {
 				output = strings.Join(i.pastedLines, "\n") + "\n" + output
 				i.pastedLines = nil
 			}
 			if output != "" {
 				i.History.Add(output)
 			}
 			buf.MoveToEnd()
 			fmt.Println()
 			i.Prompt.UseAlt = false
 			return output, nil
 		default:
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -60,7 +60,7 @@ _build_darwin() {
            cmake --install $BUILD_DIR --component MLX
            # Override CGO flags to point to the amd64 build directory
            MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
-            MLX_CGO_LDFLAGS="-ldl -lc++ -framework Accelerate -mmacosx-version-min=14.0"
+            MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Accelerate -mmacosx-version-min=14.0"
        else
            BUILD_DIR=build
            cmake --preset MLX \
@@ -71,12 +71,10 @@ _build_darwin() {
            cmake --install $BUILD_DIR --component MLX
            # Use default CGO flags from mlx.go for arm64
            MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0"
-            MLX_CGO_LDFLAGS="-lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
+            MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0"
        fi
-        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX .
+        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX/ollama-mlx .
-        # Copy MLX libraries to same directory as executable for dlopen
+        GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 go build -o $INSTALL_PREFIX .
        cp $INSTALL_PREFIX/lib/ollama/libmlxc.dylib $INSTALL_PREFIX/
        cp $INSTALL_PREFIX/lib/ollama/libmlx.dylib $INSTALL_PREFIX/
    done
 }
@@ -84,10 +82,12 @@ _sign_darwin() {
    status "Creating universal binary..."
    mkdir -p dist/darwin
    lipo -create -output dist/darwin/ollama dist/darwin-*/ollama
    lipo -create -output dist/darwin/ollama-mlx dist/darwin-*/ollama-mlx
    chmod +x dist/darwin/ollama
    chmod +x dist/darwin/ollama-mlx
    if [ -n "$APPLE_IDENTITY" ]; then
-        for F in dist/darwin/ollama dist/darwin-*/lib/ollama/*; do
+        for F in dist/darwin/ollama dist/darwin-*/lib/ollama/* dist/darwin/ollama-mlx; do
            codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F
        done
@@ -154,6 +154,7 @@ _build_macapp() {
    mkdir -p dist/Ollama.app/Contents/Resources
    if [ -d dist/darwin-amd64 ]; then
        lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama
        lipo -create -output dist/Ollama.app/Contents/Resources/ollama-mlx dist/darwin-amd64/ollama-mlx dist/darwin-arm64/ollama-mlx
        for F in dist/darwin-amd64/lib/ollama/*mlx*.dylib ; do
            lipo -create -output dist/darwin/$(basename $F) $F dist/darwin-arm64/lib/ollama/$(basename $F)
        done
@@ -165,27 +166,28 @@ _build_macapp() {
        cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama
        cp dist/darwin/*.so dist/darwin/*.dylib dist/Ollama.app/Contents/Resources/
    fi
    cp -a dist/darwin/ollama-mlx dist/Ollama.app/Contents/Resources/ollama-mlx
    chmod a+x dist/Ollama.app/Contents/Resources/ollama
    # Sign
    if [ -n "$APPLE_IDENTITY" ]; then
        codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama
-        for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib ; do
+        for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/*.metallib dist/Ollama.app/Contents/Resources/ollama-mlx ; do
            codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime ${lib}
        done
        codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app
    fi
    rm -f dist/Ollama-darwin.zip
-    ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
+    ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
-    (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
+    (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib *.metallib 2>/dev/null) | gzip -9vc > dist/ollama-darwin.tgz
    # Notarize and Staple
    if [ -n "$APPLE_IDENTITY" ]; then
        $(xcrun -f notarytool) submit dist/Ollama-darwin.zip --wait --timeout 20m --apple-id "$APPLE_ID" --password "$APPLE_PASSWORD" --team-id "$APPLE_TEAM_ID"
        rm -f dist/Ollama-darwin.zip
        $(xcrun -f stapler) staple dist/Ollama.app
-        ditto -c -k --norsrc --keepParent dist/Ollama.app dist/Ollama-darwin.zip
+        ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip
        rm -f dist/Ollama.dmg
--- a/server/auth.go
+++ b/server/auth.go
@@ -50,17 +50,12 @@ func (r registryChallenge) URL() (*url.URL, error) {
 	return redirectURL, nil
 }
-func getAuthorizationToken(ctx context.Context, challenge registryChallenge, originalHost string) (string, error) {
+func getAuthorizationToken(ctx context.Context, challenge registryChallenge) (string, error) {
 	redirectURL, err := challenge.URL()
 	if err != nil {
 		return "", err
 	}
 	// Validate that the realm host matches the original request host to prevent sending tokens cross-origin.
 	if redirectURL.Host != originalHost {
 		return "", fmt.Errorf("realm host %q does not match original host %q", redirectURL.Host, originalHost)
 	}
 	sha256sum := sha256.Sum256(nil)
 	data := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, redirectURL.String(), base64.StdEncoding.EncodeToString([]byte(hex.EncodeToString(sha256sum[:])))))
--- a/server/auth_test.go
+++ b/server/auth_test.go
@@ -1,113 +0,0 @@
 package server
 import (
 	"context"
 	"strings"
 	"testing"
 	"time"
 )
 func TestGetAuthorizationTokenRejectsCrossDomain(t *testing.T) {
 	tests := []struct {
 		realm        string
 		originalHost string
 		wantMismatch bool
 	}{
 		{"https://example.com/token", "example.com", false},
 		{"https://example.com/token", "other.com", true},
 		{"https://example.com/token", "localhost:8000", true},
 		{"https://localhost:5000/token", "localhost:5000", false},
 		{"https://localhost:5000/token", "localhost:6000", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.originalHost, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
 			defer cancel()
 			challenge := registryChallenge{Realm: tt.realm, Service: "test", Scope: "repo:x:pull"}
 			_, err := getAuthorizationToken(ctx, challenge, tt.originalHost)
 			isMismatch := err != nil && strings.Contains(err.Error(), "does not match")
 			if tt.wantMismatch && !isMismatch {
 				t.Errorf("expected domain mismatch error, got: %v", err)
 			}
 			if !tt.wantMismatch && isMismatch {
 				t.Errorf("unexpected domain mismatch error: %v", err)
 			}
 		})
 	}
 }
 func TestParseRegistryChallenge(t *testing.T) {
 	tests := []struct {
 		input                             string
 		wantRealm, wantService, wantScope string
 	}{
 		{
 			`Bearer realm="https://auth.example.com/token",service="registry",scope="repo:foo:pull"`,
 			"https://auth.example.com/token", "registry", "repo:foo:pull",
 		},
 		{
 			`Bearer realm="https://r.ollama.ai/v2/token",service="ollama",scope="-"`,
 			"https://r.ollama.ai/v2/token", "ollama", "-",
 		},
 		{"", "", "", ""},
 	}
 	for _, tt := range tests {
 		result := parseRegistryChallenge(tt.input)
 		if result.Realm != tt.wantRealm || result.Service != tt.wantService || result.Scope != tt.wantScope {
 			t.Errorf("parseRegistryChallenge(%q) = {%q, %q, %q}, want {%q, %q, %q}",
 				tt.input, result.Realm, result.Service, result.Scope,
 				tt.wantRealm, tt.wantService, tt.wantScope)
 		}
 	}
 }
 func TestRegistryChallengeURL(t *testing.T) {
 	challenge := registryChallenge{
 		Realm:   "https://auth.example.com/token",
 		Service: "registry",
 		Scope:   "repo:foo:pull repo:bar:push",
 	}
 	u, err := challenge.URL()
 	if err != nil {
 		t.Fatalf("URL() error: %v", err)
 	}
 	if u.Host != "auth.example.com" {
 		t.Errorf("host = %q, want %q", u.Host, "auth.example.com")
 	}
 	if u.Path != "/token" {
 		t.Errorf("path = %q, want %q", u.Path, "/token")
 	}
 	q := u.Query()
 	if q.Get("service") != "registry" {
 		t.Errorf("service = %q, want %q", q.Get("service"), "registry")
 	}
 	if scopes := q["scope"]; len(scopes) != 2 {
 		t.Errorf("scope count = %d, want 2", len(scopes))
 	}
 	if q.Get("ts") == "" {
 		t.Error("missing ts")
 	}
 	if q.Get("nonce") == "" {
 		t.Error("missing nonce")
 	}
 	// Nonces should differ between calls
 	u2, _ := challenge.URL()
 	if q.Get("nonce") == u2.Query().Get("nonce") {
 		t.Error("nonce should be unique per call")
 	}
 }
 func TestRegistryChallengeURLInvalid(t *testing.T) {
 	challenge := registryChallenge{Realm: "://invalid"}
 	if _, err := challenge.URL(); err == nil {
 		t.Error("expected error for invalid URL")
 	}
 }
--- a/server/create.go
+++ b/server/create.go
@@ -28,7 +28,6 @@ import (
 	"github.com/ollama/ollama/format"
 	ofs "github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -91,7 +90,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
 			ch <- resp
 		}
-		oldManifest, _ := manifest.ParseNamedManifest(name)
+		oldManifest, _ := ParseNamedManifest(name)
 		var baseLayers []*layerGGML
 		var err error
@@ -124,9 +123,9 @@ func (s *Server) CreateHandler(c *gin.Context) {
 				}
 				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "") {
-					mf, mErr := manifest.ParseNamedManifest(fromName)
+					manifest, mErr := ParseNamedManifest(fromName)
-					if mErr == nil && mf.Config.Digest != "" {
+					if mErr == nil && manifest.Config.Digest != "" {
-						configPath, pErr := manifest.BlobsPath(mf.Config.Digest)
+						configPath, pErr := GetBlobsPath(manifest.Config.Digest)
 						if pErr == nil {
 							if cfgFile, fErr := os.Open(configPath); fErr == nil {
 								var baseConfig model.ConfigV2
@@ -343,7 +342,7 @@ func detectModelTypeFromFiles(files map[string]string) string {
 			return "gguf"
 		} else {
 			// try to see if we can find a gguf file even without the file extension
-			blobPath, err := manifest.BlobsPath(files[fn])
+			blobPath, err := GetBlobsPath(files[fn])
 			if err != nil {
 				slog.Error("error getting blobs path", "file", fn)
 				return ""
@@ -395,7 +394,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 			return nil, fmt.Errorf("%w: %s: %s", errFilePath, err, fp)
 		}
-		blobPath, err := manifest.BlobsPath(digest)
+		blobPath, err := GetBlobsPath(digest)
 		if err != nil {
 			return nil, err
 		}
@@ -433,7 +432,7 @@ func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, is
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(t, mediaType)
+	layer, err := NewLayer(t, mediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -466,7 +465,7 @@ func kvFromLayers(baseLayers []*layerGGML) (ofs.Config, error) {
 }
 func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, config *model.ConfigV2, fn func(resp api.ProgressResponse)) (err error) {
-	var layers []manifest.Layer
+	var layers []Layer
 	for _, layer := range baseLayers {
 		if layer.GGML != nil {
 			quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
@@ -551,13 +550,13 @@ func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML,
 	}
 	for _, layer := range layers {
-		if layer.Status != "" {
+		if layer.status != "" {
-			fn(api.ProgressResponse{Status: layer.Status})
+			fn(api.ProgressResponse{Status: layer.status})
 		}
 	}
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	if err := manifest.WriteManifest(name, *configLayer, layers); err != nil {
+	if err := WriteManifest(name, *configLayer, layers); err != nil {
 		return err
 	}
@@ -578,7 +577,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 		return nil, err
 	}
-	blob, err := manifest.BlobsPath(layer.Digest)
+	blob, err := GetBlobsPath(layer.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -600,7 +599,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 	}
 	temp.Seek(0, io.SeekStart)
 	fn(api.ProgressResponse{Status: "verifying conversion"})
-	newLayer, err := manifest.NewLayer(temp, layer.MediaType)
+	newLayer, err := NewLayer(temp, layer.MediaType)
 	if err != nil {
 		return nil, err
 	}
@@ -620,7 +619,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	var layers []*layerGGML
 	fn(api.ProgressResponse{Status: "parsing GGUF"})
-	blobPath, err := manifest.BlobsPath(digest)
+	blobPath, err := GetBlobsPath(digest)
 	if err != nil {
 		return nil, err
 	}
@@ -655,7 +654,7 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 		mediatype = "application/vnd.ollama.image.projector"
 	}
-	layer, err := manifest.NewLayerFromLayer(digest, mediatype, blob.Name())
+	layer, err := NewLayerFromLayer(digest, mediatype, blob.Name())
 	if err != nil {
 		slog.Debug("could not create new layer from layer", "error", err)
 		return nil, err
@@ -666,8 +665,8 @@ func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML
 	return detectChatTemplate(layers)
 }
-func removeLayer(layers []manifest.Layer, mediatype string) []manifest.Layer {
+func removeLayer(layers []Layer, mediatype string) []Layer {
-	return slices.DeleteFunc(layers, func(layer manifest.Layer) bool {
+	return slices.DeleteFunc(layers, func(layer Layer) bool {
 		if layer.MediaType != mediatype {
 			return false
 		}
@@ -681,7 +680,7 @@ func removeLayer(layers []manifest.Layer, mediatype string) []manifest.Layer {
 	})
 }
-func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
+func setTemplate(layers []Layer, t string) ([]Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.template")
 	if _, err := template.Parse(t); err != nil {
 		return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
@@ -691,7 +690,7 @@ func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
 	}
 	blob := strings.NewReader(t)
-	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.template")
+	layer, err := NewLayer(blob, "application/vnd.ollama.image.template")
 	if err != nil {
 		return nil, err
 	}
@@ -700,11 +699,11 @@ func setTemplate(layers []manifest.Layer, t string) ([]manifest.Layer, error) {
 	return layers, nil
 }
-func setSystem(layers []manifest.Layer, s string) ([]manifest.Layer, error) {
+func setSystem(layers []Layer, s string) ([]Layer, error) {
 	layers = removeLayer(layers, "application/vnd.ollama.image.system")
 	if s != "" {
 		blob := strings.NewReader(s)
-		layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.system")
+		layer, err := NewLayer(blob, "application/vnd.ollama.image.system")
 		if err != nil {
 			return nil, err
 		}
@@ -713,9 +712,9 @@ func setSystem(layers []manifest.Layer, s string) ([]manifest.Layer, error) {
 	return layers, nil
 }
-func setLicense(layers []manifest.Layer, l string) ([]manifest.Layer, error) {
+func setLicense(layers []Layer, l string) ([]Layer, error) {
 	blob := strings.NewReader(l)
-	layer, err := manifest.NewLayer(blob, "application/vnd.ollama.image.license")
+	layer, err := NewLayer(blob, "application/vnd.ollama.image.license")
 	if err != nil {
 		return nil, err
 	}
@@ -723,7 +722,7 @@ func setLicense(layers []manifest.Layer, l string) ([]manifest.Layer, error) {
 	return layers, nil
 }
-func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer, error) {
+func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
 	if p == nil {
 		p = make(map[string]any)
 	}
@@ -732,7 +731,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 			continue
 		}
-		digestPath, err := manifest.BlobsPath(layer.Digest)
+		digestPath, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -766,7 +765,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 	if err := json.NewEncoder(&b).Encode(p); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.params")
+	layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
 	if err != nil {
 		return nil, err
 	}
@@ -774,7 +773,7 @@ func setParameters(layers []manifest.Layer, p map[string]any) ([]manifest.Layer,
 	return layers, nil
 }
-func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, error) {
+func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
 	// this leaves the old messages intact if no new messages were specified
 	// which may not be the correct behaviour
 	if len(m) == 0 {
@@ -787,7 +786,7 @@ func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, er
 	if err := json.NewEncoder(&b).Encode(m); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.ollama.image.messages")
+	layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
 	if err != nil {
 		return nil, err
 	}
@@ -795,7 +794,7 @@ func setMessages(layers []manifest.Layer, m []api.Message) ([]manifest.Layer, er
 	return layers, nil
 }
-func createConfigLayer(layers []manifest.Layer, config model.ConfigV2) (*manifest.Layer, error) {
+func createConfigLayer(layers []Layer, config model.ConfigV2) (*Layer, error) {
 	digests := make([]string, len(layers))
 	for i, layer := range layers {
 		digests[i] = layer.Digest
@@ -806,7 +805,7 @@ func createConfigLayer(layers []manifest.Layer, config model.ConfigV2) (*manifes
 	if err := json.NewEncoder(&b).Encode(config); err != nil {
 		return nil, err
 	}
-	layer, err := manifest.NewLayer(&b, "application/vnd.docker.container.image.v1+json")
+	layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
 	if err != nil {
 		return nil, err
 	}
--- a/server/create_test.go
+++ b/server/create_test.go
@@ -10,7 +10,6 @@ import (
 	"testing"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/manifest"
 )
 func TestConvertFromSafetensors(t *testing.T) {
@@ -18,7 +17,7 @@ func TestConvertFromSafetensors(t *testing.T) {
 	// Helper function to create a new layer and return its digest
 	makeTemp := func(content string) string {
-		l, err := manifest.NewLayer(strings.NewReader(content), "application/octet-stream")
+		l, err := NewLayer(strings.NewReader(content), "application/octet-stream")
 		if err != nil {
 			t.Fatalf("Failed to create layer: %v", err)
 		}
--- a/server/download.go
+++ b/server/download.go
@@ -24,8 +24,6 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 )
 const maxRetries = 6
@@ -97,11 +95,48 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
 }
 const (
-	numDownloadParts          = 16
+	// numDownloadParts is the default number of concurrent download parts for standard downloads
 	numDownloadParts = 16
 	// numHFDownloadParts is the reduced number of concurrent download parts for HuggingFace
 	// downloads to avoid triggering rate limits (HTTP 429 errors). See GitHub issue #13297.
 	numHFDownloadParts        = 4
 	minDownloadPartSize int64 = 100 * format.MegaByte
 	maxDownloadPartSize int64 = 1000 * format.MegaByte
 )
 // isHuggingFaceURL returns true if the URL is from a HuggingFace domain.
 // This includes:
 //   - huggingface.co (main domain)
 //   - *.huggingface.co (subdomains like cdn-lfs.huggingface.co)
 //   - hf.co (shortlink domain)
 //   - *.hf.co (CDN domains like cdn-lfs.hf.co, cdn-lfs3.hf.co)
 func isHuggingFaceURL(u *url.URL) bool {
 	if u == nil {
 		return false
 	}
 	host := strings.ToLower(u.Hostname())
 	return host == "huggingface.co" ||
 		strings.HasSuffix(host, ".huggingface.co") ||
 		host == "hf.co" ||
 		strings.HasSuffix(host, ".hf.co")
 }
 // getNumDownloadParts returns the number of concurrent download parts to use
 // for the given URL. HuggingFace URLs use reduced concurrency (default 4) to
 // avoid triggering rate limits. This can be overridden via the OLLAMA_HF_CONCURRENCY
 // environment variable. For non-HuggingFace URLs, returns the standard concurrency (16).
 func getNumDownloadParts(u *url.URL) int {
 	if isHuggingFaceURL(u) {
 		if v := os.Getenv("OLLAMA_HF_CONCURRENCY"); v != "" {
 			if n, err := strconv.Atoi(v); err == nil && n > 0 {
 				return n
 			}
 		}
 		return numHFDownloadParts
 	}
 	return numDownloadParts
 }
 func (p *blobDownloadPart) Name() string {
 	return strings.Join([]string{
 		p.blobDownload.Name, "partial", strconv.Itoa(p.N),
@@ -273,7 +308,11 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
 	}
 	g, inner := errgroup.WithContext(ctx)
-	g.SetLimit(numDownloadParts)
+	concurrency := getNumDownloadParts(directURL)
 	if concurrency != numDownloadParts {
 		slog.Info(fmt.Sprintf("using reduced concurrency (%d) for HuggingFace download", concurrency))
 	}
 	g.SetLimit(concurrency)
 	for i := range b.Parts {
 		part := b.Parts[i]
 		if part.Completed.Load() == part.Size {
@@ -458,7 +497,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
 }
 type downloadOpts struct {
-	n       model.Name
+	mp      ModelPath
 	digest  string
 	regOpts *registryOptions
 	fn      func(api.ProgressResponse)
@@ -467,10 +506,10 @@ type downloadOpts struct {
 // downloadBlob downloads a blob from the registry and stores it in the blobs directory
 func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ error) {
 	if opts.digest == "" {
-		return false, fmt.Errorf(("%s: %s"), opts.n.DisplayNamespaceModel(), "digest is empty")
+		return false, fmt.Errorf(("%s: %s"), opts.mp.GetNamespaceRepository(), "digest is empty")
 	}
-	fp, err := manifest.BlobsPath(opts.digest)
+	fp, err := GetBlobsPath(opts.digest)
 	if err != nil {
 		return false, err
 	}
@@ -494,8 +533,8 @@ func downloadBlob(ctx context.Context, opts downloadOpts) (cacheHit bool, _ erro
 	data, ok := blobDownloadManager.LoadOrStore(opts.digest, &blobDownload{Name: fp, Digest: opts.digest})
 	download := data.(*blobDownload)
 	if !ok {
-		requestURL := opts.n.BaseURL()
+		requestURL := opts.mp.BaseURL()
-		requestURL = requestURL.JoinPath("v2", opts.n.DisplayNamespaceModel(), "blobs", opts.digest)
+		requestURL = requestURL.JoinPath("v2", opts.mp.GetNamespaceRepository(), "blobs", opts.digest)
 		if err := download.Prepare(ctx, requestURL, opts.regOpts); err != nil {
 			blobDownloadManager.Delete(opts.digest)
 			return false, err
--- a/server/download_test.go
+++ b/server/download_test.go
@@ -0,0 +1,194 @@
 package server
 import (
 	"net/url"
 	"testing"
 	"github.com/stretchr/testify/assert"
 )
 func TestIsHuggingFaceURL(t *testing.T) {
 	tests := []struct {
 		name     string
 		url      string
 		expected bool
 	}{
 		{
 			name:     "nil url",
 			url:      "",
 			expected: false,
 		},
 		{
 			name:     "huggingface.co main domain",
 			url:      "https://huggingface.co/some/model",
 			expected: true,
 		},
 		{
 			name:     "cdn-lfs.huggingface.co subdomain",
 			url:      "https://cdn-lfs.huggingface.co/repos/abc/123",
 			expected: true,
 		},
 		{
 			name:     "cdn-lfs3.hf.co CDN domain",
 			url:      "https://cdn-lfs3.hf.co/repos/abc/123",
 			expected: true,
 		},
 		{
 			name:     "hf.co shortlink domain",
 			url:      "https://hf.co/model",
 			expected: true,
 		},
 		{
 			name:     "uppercase HuggingFace domain",
 			url:      "https://HUGGINGFACE.CO/model",
 			expected: true,
 		},
 		{
 			name:     "mixed case HF domain",
 			url:      "https://Cdn-Lfs.HF.Co/repos",
 			expected: true,
 		},
 		{
 			name:     "ollama registry",
 			url:      "https://registry.ollama.ai/v2/library/llama3",
 			expected: false,
 		},
 		{
 			name:     "github.com",
 			url:      "https://github.com/ollama/ollama",
 			expected: false,
 		},
 		{
 			name:     "fake huggingface domain",
 			url:      "https://nothuggingface.co/model",
 			expected: false,
 		},
 		{
 			name:     "fake hf domain",
 			url:      "https://nothf.co/model",
 			expected: false,
 		},
 		{
 			name:     "huggingface in path not host",
 			url:      "https://example.com/huggingface.co/model",
 			expected: false,
 		},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			var u *url.URL
 			if tc.url != "" {
 				var err error
 				u, err = url.Parse(tc.url)
 				if err != nil {
 					t.Fatalf("failed to parse URL: %v", err)
 				}
 			}
 			got := isHuggingFaceURL(u)
 			assert.Equal(t, tc.expected, got)
 		})
 	}
 }
 func TestGetNumDownloadParts(t *testing.T) {
 	tests := []struct {
 		name        string
 		url         string
 		envValue    string
 		expected    int
 		description string
 	}{
 		{
 			name:        "nil url returns default",
 			url:         "",
 			envValue:    "",
 			expected:    numDownloadParts,
 			description: "nil URL should return standard concurrency",
 		},
 		{
 			name:        "ollama registry returns default",
 			url:         "https://registry.ollama.ai/v2/library/llama3",
 			envValue:    "",
 			expected:    numDownloadParts,
 			description: "Ollama registry should use standard concurrency",
 		},
 		{
 			name:        "huggingface returns reduced default",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "",
 			expected:    numHFDownloadParts,
 			description: "HuggingFace should use reduced concurrency",
 		},
 		{
 			name:        "hf.co CDN returns reduced default",
 			url:         "https://cdn-lfs3.hf.co/repos/abc/123",
 			envValue:    "",
 			expected:    numHFDownloadParts,
 			description: "HuggingFace CDN should use reduced concurrency",
 		},
 		{
 			name:        "huggingface with env override",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "2",
 			expected:    2,
 			description: "OLLAMA_HF_CONCURRENCY should override default",
 		},
 		{
 			name:        "huggingface with higher env override",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "8",
 			expected:    8,
 			description: "OLLAMA_HF_CONCURRENCY can be set higher than default",
 		},
 		{
 			name:        "huggingface with invalid env (non-numeric)",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "invalid",
 			expected:    numHFDownloadParts,
 			description: "Invalid OLLAMA_HF_CONCURRENCY should fall back to default",
 		},
 		{
 			name:        "huggingface with invalid env (zero)",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "0",
 			expected:    numHFDownloadParts,
 			description: "Zero OLLAMA_HF_CONCURRENCY should fall back to default",
 		},
 		{
 			name:        "huggingface with invalid env (negative)",
 			url:         "https://huggingface.co/model/repo",
 			envValue:    "-1",
 			expected:    numHFDownloadParts,
 			description: "Negative OLLAMA_HF_CONCURRENCY should fall back to default",
 		},
 		{
 			name:        "non-huggingface ignores env",
 			url:         "https://registry.ollama.ai/v2/library/llama3",
 			envValue:    "2",
 			expected:    numDownloadParts,
 			description: "OLLAMA_HF_CONCURRENCY should not affect non-HF URLs",
 		},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			// Set or clear the environment variable
 			if tc.envValue != "" {
 				t.Setenv("OLLAMA_HF_CONCURRENCY", tc.envValue)
 			}
 			var u *url.URL
 			if tc.url != "" {
 				var err error
 				u, err = url.Parse(tc.url)
 				if err != nil {
 					t.Fatalf("failed to parse URL: %v", err)
 				}
 			}
 			got := getNumDownloadParts(u)
 			assert.Equal(t, tc.expected, got, tc.description)
 		})
 	}
 }
--- a/server/images.go
+++ b/server/images.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -23,7 +24,6 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/gguf"
 	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/model/parsers"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/template"
@@ -41,7 +41,6 @@ var (
 	errCapabilityVision     = errors.New("vision")
 	errCapabilityEmbedding  = errors.New("embedding")
 	errCapabilityThinking   = errors.New("thinking")
 	errCapabilityImage      = errors.New("image generation")
 	errInsecureProtocol     = errors.New("insecure protocol http")
 )
@@ -75,6 +74,12 @@ type Model struct {
 func (m *Model) Capabilities() []model.Capability {
 	capabilities := []model.Capability{}
 	// Check for image generation model via config capabilities
 	if slices.Contains(m.Config.Capabilities, "image") {
 		return []model.Capability{model.CapabilityImageGeneration}
 	}
 	// Check for completion capability
 	if m.ModelPath != "" {
 		f, err := gguf.Open(m.ModelPath)
 		if err == nil {
@@ -154,7 +159,6 @@ func (m *Model) CheckCapabilities(want ...model.Capability) error {
 		model.CapabilityVision:     errCapabilityVision,
 		model.CapabilityEmbedding:  errCapabilityEmbedding,
 		model.CapabilityThinking:   errCapabilityThinking,
 		model.CapabilityImage:      errCapabilityImage,
 	}
 	for _, cap := range want {
@@ -268,22 +272,44 @@ func (m *Model) String() string {
 	return modelfile.String()
 }
 func GetManifest(mp ModelPath) (*Manifest, string, error) {
 	fp, err := mp.GetManifestPath()
 	if err != nil {
 		return nil, "", err
 	}
 	f, err := os.Open(fp)
 	if err != nil {
 		return nil, "", err
 	}
 	defer f.Close()
 	sha256sum := sha256.New()
 	var manifest Manifest
 	if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
 		return nil, "", err
 	}
 	return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
 }
 func GetModel(name string) (*Model, error) {
-	n := model.ParseName(name)
+	mp := ParseModelPath(name)
-	mf, err := manifest.ParseNamedManifest(n)
+	manifest, digest, err := GetManifest(mp)
 	if err != nil {
 		return nil, err
 	}
-	m := &Model{
+	model := &Model{
-		Name:      n.String(),
+		Name:      mp.GetFullTagname(),
-		ShortName: n.DisplayShortest(),
+		ShortName: mp.GetShortTagname(),
-		Digest:    mf.Digest(),
+		Digest:    digest,
 		Template:  template.DefaultTemplate,
 	}
-	if mf.Config.Digest != "" {
+	if manifest.Config.Digest != "" {
-		filename, err := manifest.BlobsPath(mf.Config.Digest)
+		filename, err := GetBlobsPath(manifest.Config.Digest)
 		if err != nil {
 			return nil, err
 		}
@@ -294,29 +320,29 @@ func GetModel(name string) (*Model, error) {
 		}
 		defer configFile.Close()
-		if err := json.NewDecoder(configFile).Decode(&m.Config); err != nil {
+		if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
 			return nil, err
 		}
 	}
-	for _, layer := range mf.Layers {
+	for _, layer := range manifest.Layers {
-		filename, err := manifest.BlobsPath(layer.Digest)
+		filename, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return nil, err
 		}
 		switch layer.MediaType {
 		case "application/vnd.ollama.image.model":
-			m.ModelPath = filename
+			model.ModelPath = filename
-			m.ParentModel = layer.From
+			model.ParentModel = layer.From
 		case "application/vnd.ollama.image.embed":
 			// Deprecated in versions  > 0.1.2
 			// TODO: remove this warning in a future version
 			slog.Info("WARNING: model contains embeddings, but embeddings in modelfiles have been deprecated and will be ignored.")
 		case "application/vnd.ollama.image.adapter":
-			m.AdapterPaths = append(m.AdapterPaths, filename)
+			model.AdapterPaths = append(model.AdapterPaths, filename)
 		case "application/vnd.ollama.image.projector":
-			m.ProjectorPaths = append(m.ProjectorPaths, filename)
+			model.ProjectorPaths = append(model.ProjectorPaths, filename)
 		case "application/vnd.ollama.image.prompt",
 			"application/vnd.ollama.image.template":
 			bts, err := os.ReadFile(filename)
@@ -324,7 +350,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}
-			m.Template, err = template.Parse(string(bts))
+			model.Template, err = template.Parse(string(bts))
 			if err != nil {
 				return nil, err
 			}
@@ -334,7 +360,7 @@ func GetModel(name string) (*Model, error) {
 				return nil, err
 			}
-			m.System = string(bts)
+			model.System = string(bts)
 		case "application/vnd.ollama.image.params":
 			params, err := os.Open(filename)
 			if err != nil {
@@ -343,7 +369,7 @@ func GetModel(name string) (*Model, error) {
 			defer params.Close()
 			// parse model options parameters into a map so that we can see which fields have been specified explicitly
-			if err = json.NewDecoder(params).Decode(&m.Options); err != nil {
+			if err = json.NewDecoder(params).Decode(&model.Options); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.messages":
@@ -353,7 +379,7 @@ func GetModel(name string) (*Model, error) {
 			}
 			defer msgs.Close()
-			if err = json.NewDecoder(msgs).Decode(&m.Messages); err != nil {
+			if err = json.NewDecoder(msgs).Decode(&model.Messages); err != nil {
 				return nil, err
 			}
 		case "application/vnd.ollama.image.license":
@@ -361,11 +387,11 @@ func GetModel(name string) (*Model, error) {
 			if err != nil {
 				return nil, err
 			}
-			m.License = append(m.License, string(bts))
+			model.License = append(model.License, string(bts))
 		}
 	}
-	return m, nil
+	return model, nil
 }
 func CopyModel(src, dst model.Name) error {
@@ -380,7 +406,7 @@ func CopyModel(src, dst model.Name) error {
 		return nil
 	}
-	manifests, err := manifest.Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -409,7 +435,7 @@ func CopyModel(src, dst model.Name) error {
 func deleteUnusedLayers(deleteMap map[string]struct{}) error {
 	// Ignore corrupt manifests to avoid blocking deletion of layers that are freshly orphaned
-	manifests, err := manifest.Manifests(true)
+	manifests, err := Manifests(true)
 	if err != nil {
 		return err
 	}
@@ -424,7 +450,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {
 	// only delete the files which are still in the deleteMap
 	for k := range deleteMap {
-		fp, err := manifest.BlobsPath(k)
+		fp, err := GetBlobsPath(k)
 		if err != nil {
 			slog.Info(fmt.Sprintf("couldn't get file path for '%s': %v", k, err))
 			continue
@@ -440,7 +466,7 @@ func deleteUnusedLayers(deleteMap map[string]struct{}) error {
 func PruneLayers() error {
 	deleteMap := make(map[string]struct{})
-	p, err := manifest.BlobsPath("")
+	p, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
@@ -455,9 +481,9 @@ func PruneLayers() error {
 		name := blob.Name()
 		name = strings.ReplaceAll(name, "-", ":")
-		_, err := manifest.BlobsPath(name)
+		_, err := GetBlobsPath(name)
 		if err != nil {
-			if errors.Is(err, manifest.ErrInvalidDigestFormat) {
+			if errors.Is(err, ErrInvalidDigestFormat) {
 				// remove invalid blobs (e.g. partial downloads)
 				if err := os.Remove(filepath.Join(p, blob.Name())); err != nil {
 					slog.Error("couldn't remove blob", "blob", blob.Name(), "error", err)
@@ -482,30 +508,63 @@ func PruneLayers() error {
 	return nil
 }
 func PruneDirectory(path string) error {
 	info, err := os.Lstat(path)
 	if err != nil {
 		return err
 	}
 	if info.IsDir() && info.Mode()&os.ModeSymlink == 0 {
 		entries, err := os.ReadDir(path)
 		if err != nil {
 			return err
 		}
 		for _, entry := range entries {
 			if err := PruneDirectory(filepath.Join(path, entry.Name())); err != nil {
 				return err
 			}
 		}
 		entries, err = os.ReadDir(path)
 		if err != nil {
 			return err
 		}
 		if len(entries) > 0 {
 			return nil
 		}
 		return os.Remove(path)
 	}
 	return nil
 }
 func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	n := model.ParseName(name)
+	mp := ParseModelPath(name)
 	fn(api.ProgressResponse{Status: "retrieving manifest"})
-	if n.ProtocolScheme == "http" && !regOpts.Insecure {
+	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}
-	mf, err := manifest.ParseNamedManifest(n)
+	manifest, _, err := GetManifest(mp)
 	if err != nil {
 		fn(api.ProgressResponse{Status: "couldn't retrieve manifest"})
 		return err
 	}
-	var layers []manifest.Layer
+	var layers []Layer
-	layers = append(layers, mf.Layers...)
+	layers = append(layers, manifest.Layers...)
-	if mf.Config.Digest != "" {
+	if manifest.Config.Digest != "" {
-		layers = append(layers, mf.Config)
+		layers = append(layers, manifest.Config)
 	}
 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
 		// Read raw manifest JSON to preserve tensor metadata fields
-		manifestPath, err := manifest.PathForName(n)
+		manifestPath, err := mp.GetManifestPath()
 		if err != nil {
 			return err
 		}
@@ -513,7 +572,7 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		if err != nil {
 			return err
 		}
-		if err := pushWithTransfer(ctx, n, layers, manifestJSON, regOpts, fn); err != nil {
+		if err := pushWithTransfer(ctx, mp, layers, manifestJSON, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -521,17 +580,17 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	}
 	for _, layer := range layers {
-		if err := uploadBlob(ctx, n, layer, regOpts, fn); err != nil {
+		if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
 			slog.Info(fmt.Sprintf("error uploading blob: %v", err))
 			return err
 		}
 	}
 	fn(api.ProgressResponse{Status: "pushing manifest"})
-	requestURL := n.BaseURL()
+	requestURL := mp.BaseURL()
-	requestURL = requestURL.JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
+	requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)
-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}
@@ -550,44 +609,44 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }
 func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
-	n := model.ParseName(name)
+	mp := ParseModelPath(name)
 	// build deleteMap to prune unused layers
 	deleteMap := make(map[string]struct{})
-	existingMf, err := manifest.ParseNamedManifest(n)
+	manifest, _, err := GetManifest(mp)
 	if errors.Is(err, os.ErrNotExist) {
 		// noop
 	} else if err != nil {
 		slog.Warn("pulling model with bad existing manifest", "name", name, "error", err)
 	} else {
-		for _, l := range existingMf.Layers {
+		for _, l := range manifest.Layers {
 			deleteMap[l.Digest] = struct{}{}
 		}
-		if existingMf.Config.Digest != "" {
+		if manifest.Config.Digest != "" {
-			deleteMap[existingMf.Config.Digest] = struct{}{}
+			deleteMap[manifest.Config.Digest] = struct{}{}
 		}
 	}
-	if n.ProtocolScheme == "http" && !regOpts.Insecure {
+	if mp.ProtocolScheme == "http" && !regOpts.Insecure {
 		return errInsecureProtocol
 	}
 	fn(api.ProgressResponse{Status: "pulling manifest"})
-	mf, err := pullModelManifest(ctx, n, regOpts)
+	manifest, err = pullModelManifest(ctx, mp, regOpts)
 	if err != nil {
 		return fmt.Errorf("pull model manifest: %s", err)
 	}
-	var layers []manifest.Layer
+	var layers []Layer
-	layers = append(layers, mf.Layers...)
+	layers = append(layers, manifest.Layers...)
-	if mf.Config.Digest != "" {
+	if manifest.Config.Digest != "" {
-		layers = append(layers, mf.Config)
+		layers = append(layers, manifest.Config)
 	}
 	// Use fast transfer for models with tensor layers (many small blobs)
 	if hasTensorLayers(layers) {
-		if err := pullWithTransfer(ctx, n, layers, mf, regOpts, fn); err != nil {
+		if err := pullWithTransfer(ctx, mp, layers, manifest, regOpts, fn); err != nil {
 			return err
 		}
 		fn(api.ProgressResponse{Status: "success"})
@@ -597,7 +656,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	skipVerify := make(map[string]bool)
 	for _, layer := range layers {
 		cacheHit, err := downloadBlob(ctx, downloadOpts{
-			n:       n,
+			mp:      mp,
 			digest:  layer.Digest,
 			regOpts: regOpts,
 			fn:      fn,
@@ -616,7 +675,7 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 		}
 		if err := verifyBlob(layer.Digest); err != nil {
 			if errors.Is(err, errDigestMismatch) {
-				fp, err := manifest.BlobsPath(layer.Digest)
+				fp, err := GetBlobsPath(layer.Digest)
 				if err != nil {
 					return err
 				}
@@ -631,16 +690,16 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	for _, layer := range layers {
 		delete(deleteMap, layer.Digest)
 	}
-	delete(deleteMap, mf.Config.Digest)
+	delete(deleteMap, manifest.Config.Digest)
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}
-	fp, err := manifest.PathForName(n)
+	fp, err := mp.GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -667,9 +726,9 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 }
 // hasTensorLayers checks if any layer has tensor media type.
-func hasTensorLayers(layers []manifest.Layer) bool {
+func hasTensorLayers(layers []Layer) bool {
 	for _, layer := range layers {
-		if layer.MediaType == manifest.MediaTypeImageTensor {
+		if layer.MediaType == MediaTypeImageTensor {
 			return true
 		}
 	}
@@ -677,7 +736,7 @@ func hasTensorLayers(layers []manifest.Layer) bool {
 }
 // pullWithTransfer uses the simplified x/transfer package for downloading blobs.
-func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, mf *manifest.Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pullWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifest *Manifest, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -686,12 +745,12 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		}
 	}
-	destDir, err := manifest.BlobsPath("")
+	destDir, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
-	base := n.BaseURL()
+	base := mp.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -716,14 +775,14 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 			Realm:   challenge.Realm,
 			Service: challenge.Service,
 			Scope:   challenge.Scope,
-		}, base.Host)
+		})
 	}
 	if err := transfer.Download(ctx, transfer.DownloadOptions{
 		Blobs:      blobs,
 		BaseURL:    baseURL,
 		DestDir:    destDir,
-		Repository: n.DisplayNamespaceModel(),
+		Repository: mp.GetNamespaceRepository(),
 		Progress:   progress,
 		Token:      regOpts.Token,
 		GetToken:   getToken,
@@ -734,12 +793,12 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 	// Write manifest
 	fn(api.ProgressResponse{Status: "writing manifest"})
-	manifestJSON, err := json.Marshal(mf)
+	manifestJSON, err := json.Marshal(manifest)
 	if err != nil {
 		return err
 	}
-	fp, err := manifest.PathForName(n)
+	fp, err := mp.GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -751,7 +810,7 @@ func pullWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 }
 // pushWithTransfer uses the simplified x/transfer package for uploading blobs and manifest.
-func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
+func pushWithTransfer(ctx context.Context, mp ModelPath, layers []Layer, manifestJSON []byte, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
 	blobs := make([]transfer.Blob, len(layers))
 	for i, layer := range layers {
 		blobs[i] = transfer.Blob{
@@ -761,12 +820,12 @@ func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		}
 	}
-	srcDir, err := manifest.BlobsPath("")
+	srcDir, err := GetBlobsPath("")
 	if err != nil {
 		return err
 	}
-	base := n.BaseURL()
+	base := mp.BaseURL()
 	if base.Scheme != "http" && regOpts != nil && regOpts.Insecure {
 		base.Scheme = "http"
 	}
@@ -791,7 +850,7 @@ func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 			Realm:   challenge.Realm,
 			Service: challenge.Service,
 			Scope:   challenge.Scope,
-		}, base.Host)
+		})
 	}
 	return transfer.Upload(ctx, transfer.UploadOptions{
@@ -803,13 +862,13 @@ func pushWithTransfer(ctx context.Context, n model.Name, layers []manifest.Layer
 		GetToken:    getToken,
 		Logger:      slog.Default(),
 		Manifest:    manifestJSON,
-		ManifestRef: n.Tag,
+		ManifestRef: mp.Tag,
-		Repository:  n.DisplayNamespaceModel(),
+		Repository:  mp.GetNamespaceRepository(),
 	})
 }
-func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptions) (*manifest.Manifest, error) {
+func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptions) (*Manifest, error) {
-	requestURL := n.BaseURL().JoinPath("v2", n.DisplayNamespaceModel(), "manifests", n.Tag)
+	requestURL := mp.BaseURL().JoinPath("v2", mp.GetNamespaceRepository(), "manifests", mp.Tag)
 	headers := make(http.Header)
 	headers.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json")
@@ -819,7 +878,7 @@ func pullModelManifest(ctx context.Context, n model.Name, regOpts *registryOptio
 	}
 	defer resp.Body.Close()
-	var m manifest.Manifest
+	var m Manifest
 	if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
 		return nil, err
 	}
@@ -857,7 +916,7 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
 			// Handle authentication error with one retry
 			challenge := parseRegistryChallenge(resp.Header.Get("www-authenticate"))
-			token, err := getAuthorizationToken(ctx, challenge, requestURL.Host)
+			token, err := getAuthorizationToken(ctx, challenge)
 			if err != nil {
 				return nil, err
 			}
@@ -981,7 +1040,7 @@ func parseRegistryChallenge(authStr string) registryChallenge {
 var errDigestMismatch = errors.New("digest mismatch, file must be downloaded again")
 func verifyBlob(digest string) error {
-	fp, err := manifest.BlobsPath(digest)
+	fp, err := GetBlobsPath(digest)
 	if err != nil {
 		return err
 	}
--- a/server/images_test.go
+++ b/server/images_test.go
@@ -54,16 +54,7 @@ func TestModelCapabilities(t *testing.T) {
 					Capabilities: []string{"image"},
 				},
 			},
-			expectedCaps: []model.Capability{model.CapabilityImage},
+			expectedCaps: []model.Capability{model.CapabilityImageGeneration},
 		},
 		{
 			name: "model with image and vision capability (image editing)",
 			model: Model{
 				Config: model.ConfigV2{
 					Capabilities: []string{"image", "vision"},
 				},
 			},
 			expectedCaps: []model.Capability{model.CapabilityImage, model.CapabilityVision},
 		},
 		{
 			name: "model with completion capability",
@@ -251,24 +242,6 @@ func TestModelCheckCapabilities(t *testing.T) {
 			checkCaps:      []model.Capability{"unknown"},
 			expectedErrMsg: "unknown capability",
 		},
 		{
 			name: "model missing image generation capability",
 			model: Model{
 				ModelPath: completionModelPath,
 				Template:  chatTemplate,
 			},
 			checkCaps:      []model.Capability{model.CapabilityImage},
 			expectedErrMsg: "does not support image generation",
 		},
 		{
 			name: "model with image generation capability",
 			model: Model{
 				Config: model.ConfigV2{
 					Capabilities: []string{"image"},
 				},
 			},
 			checkCaps: []model.Capability{model.CapabilityImage},
 		},
 	}
 	for _, tt := range tests {
--- a/manifest/layer.go
+++ b/manifest/layer.go
@@ -1,4 +1,4 @@
-package manifest
+package server
 import (
 	"crypto/sha256"
@@ -14,7 +14,7 @@ type Layer struct {
 	Size      int64  `json:"size"`
 	From      string `json:"from,omitempty"`
 	Name      string `json:"name,omitempty"` // tensor name, e.g., "text_encoder/model.embed_tokens.weight"
-	Status    string `json:"-"`
+	status    string
 }
 const (
@@ -22,7 +22,7 @@ const (
 )
 func NewLayer(r io.Reader, mediatype string) (Layer, error) {
-	blobs, err := BlobsPath("")
+	blobs, err := GetBlobsPath("")
 	if err != nil {
 		return Layer{}, err
 	}
@@ -45,7 +45,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 	}
 	digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
-	blob, err := BlobsPath(digest)
+	blob, err := GetBlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -65,7 +65,7 @@ func NewLayer(r io.Reader, mediatype string) (Layer, error) {
 		MediaType: mediatype,
 		Digest:    digest,
 		Size:      n,
-		Status:    fmt.Sprintf("%s %s", status, digest),
+		status:    fmt.Sprintf("%s %s", status, digest),
 	}, nil
 }
@@ -74,7 +74,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		return Layer{}, errors.New("creating new layer from layer with empty digest")
 	}
-	blob, err := BlobsPath(digest)
+	blob, err := GetBlobsPath(digest)
 	if err != nil {
 		return Layer{}, err
 	}
@@ -89,7 +89,7 @@ func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
 		Digest:    digest,
 		Size:      fi.Size(),
 		From:      from,
-		Status:    fmt.Sprintf("using existing layer %s", digest),
+		status:    fmt.Sprintf("using existing layer %s", digest),
 	}, nil
 }
@@ -98,7 +98,7 @@ func (l *Layer) Open() (io.ReadSeekCloser, error) {
 		return nil, errors.New("opening layer with empty digest")
 	}
-	blob, err := BlobsPath(l.Digest)
+	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
 		return nil, err
 	}
@@ -126,7 +126,7 @@ func (l *Layer) Remove() error {
 		}
 	}
-	blob, err := BlobsPath(l.Digest)
+	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
 		return err
 	}
--- a/manifest/manifest.go
+++ b/manifest/manifest.go
@@ -1,9 +1,10 @@
-package manifest
+package server
 import (
 	"crypto/sha256"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -32,38 +33,12 @@ func (m *Manifest) Size() (size int64) {
 	return
 }
 func (m *Manifest) Digest() string {
 	return m.digest
 }
 func (m *Manifest) FileInfo() os.FileInfo {
 	return m.fi
 }
 // ReadConfigJSON reads and unmarshals a config layer as JSON.
 func (m *Manifest) ReadConfigJSON(configPath string, v any) error {
 	for _, layer := range m.Layers {
 		if layer.MediaType == "application/vnd.ollama.image.json" && layer.Name == configPath {
 			blobPath, err := BlobsPath(layer.Digest)
 			if err != nil {
 				return err
 			}
 			data, err := os.ReadFile(blobPath)
 			if err != nil {
 				return err
 			}
 			return json.Unmarshal(data, v)
 		}
 	}
 	return fmt.Errorf("config %q not found in manifest", configPath)
 }
 func (m *Manifest) Remove() error {
 	if err := os.Remove(m.filepath); err != nil {
 		return err
 	}
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -95,11 +70,11 @@ func (m *Manifest) RemoveLayers() error {
 		if _, used := inUse[layer.Digest]; used {
 			continue
 		}
-		blob, err := BlobsPath(layer.Digest)
+		blob, err := GetBlobsPath(layer.Digest)
 		if err != nil {
 			return err
 		}
-		if err := os.Remove(blob); os.IsNotExist(err) {
+		if err := os.Remove(blob); errors.Is(err, os.ErrNotExist) {
 			slog.Debug("layer does not exist", "digest", layer.Digest)
 		} else if err != nil {
 			return err
@@ -114,7 +89,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 		return nil, model.Unqualified(n)
 	}
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return nil, err
 	}
@@ -146,7 +121,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
 }
 func WriteManifest(name model.Name, config Layer, layers []Layer) error {
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return err
 	}
@@ -173,7 +148,7 @@ func WriteManifest(name model.Name, config Layer, layers []Layer) error {
 }
 func Manifests(continueOnError bool) (map[model.Name]*Manifest, error) {
-	manifests, err := Path()
+	manifests, err := GetManifestPath()
 	if err != nil {
 		return nil, err
 	}
--- a/manifest/manifest_test.go
+++ b/manifest/manifest_test.go
@@ -1,4 +1,4 @@
-package manifest
+package server
 import (
 	"encoding/json"
--- a/Show More
+++ b/Show More