x/grammar: add experimental GPU accelerated constrained decoding package

2026-02-04 12:42:58 -05:00 · 2026-01-11 00:50:11 -08:00
526 changed files with 23386 additions and 61832 deletions
--- a/.github/ISSUE_TEMPLATE/10_bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/10_bug_report.yml
@@ -13,7 +13,7 @@ body:
    id: logs
    attributes:
      label: Relevant log output
-      description: Please copy and paste any relevant log output. See [Troubleshooting Guide](https://github.com/ollama/ollama/blob/main/docs/troubleshooting.mdx#how-to-troubleshoot-issues) for details.
+      description: Please copy and paste any relevant log output. See [Troubleshooting Guide](https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md#how-to-troubleshoot-issues) for details.
      render: shell
    validations:
      required: false
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -92,7 +92,7 @@ jobs:
            flags: ''
          - os: windows
            arch: amd64
-            preset: 'CUDA 13 Windows'
+            preset: 'CUDA 13'
            install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
            cuda-components:
              - '"cudart"'
@@ -372,17 +372,13 @@ jobs:
          outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
          cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          cache-to: type=inline
-      - name: Deduplicate CUDA libraries
-        run: |
-          ./scripts/deduplicate_cuda_libs.sh dist/${{ matrix.os }}-${{ matrix.arch }}
      - run: |
          for COMPONENT in bin/* lib/ollama/*; do
            case "$COMPONENT" in
-              bin/ollama*)               echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              bin/ollama)                echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/*.so*)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/cuda_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/vulkan*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
-              lib/ollama/mlx*)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
              lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
              lib/ollama/rocm)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,10 +48,9 @@ if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
    set(GGML_CPU_ALL_VARIANTS ON)
 endif()

-if(APPLE)
+if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
    set(CMAKE_BUILD_RPATH "@loader_path")
    set(CMAKE_INSTALL_RPATH "@loader_path")
-    set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
 endif()

 set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
@@ -190,21 +189,13 @@ if(MLX_ENGINE)
    install(TARGETS mlx mlxc
        RUNTIME_DEPENDENCIES
            DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_BIN_DIR}/x64 ${CUDAToolkit_LIBRARY_DIR}
-            PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc nvrtc-builtins cudnn nccl openblas gfortran
+            PRE_INCLUDE_REGEXES cublas cublasLt cudart nvrtc cudnn nccl
            PRE_EXCLUDE_REGEXES ".*"
        RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
        LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
        FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
    )

-    # Install the Metal library for macOS arm64 (must be colocated with the binary)
-    # Metal backend is only built for arm64, not x86_64
-    if(APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
-        install(FILES ${CMAKE_BINARY_DIR}/_deps/mlx-build/mlx/backend/metal/kernels/mlx.metallib
-            DESTINATION ${OLLAMA_INSTALL_DIR}
-            COMPONENT MLX)
-    endif()
-
    # Manually install cudart and cublas since they might not be picked up as direct dependencies
    if(CUDAToolkit_FOUND)
        file(GLOB CUDART_LIBS
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -40,17 +40,7 @@
      "name": "CUDA 13",
      "inherits": [ "CUDA" ],
      "cacheVariables": {
-        "CMAKE_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual",
-        "CMAKE_CUDA_FLAGS": "-t 4",
-        "OLLAMA_RUNNER_DIR": "cuda_v13"
-      }
-    },
-    {
-      "name": "CUDA 13 Windows",
-      "inherits": [ "CUDA" ],
-      "description": "Reduced architecture set for Windows to avoid MSVC template compilation issues",
-      "cacheVariables": {
-        "CMAKE_CUDA_ARCHITECTURES": "75-virtual;89-virtual;100-virtual;120-virtual",
+        "CMAKE_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;87-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual",
        "CMAKE_CUDA_FLAGS": "-t 4",
        "OLLAMA_RUNNER_DIR": "cuda_v13"
      }
@@ -148,11 +138,6 @@
      "inherits": [ "CUDA" ],
      "configurePreset": "CUDA 13"
    },
-    {
-      "name": "CUDA 13 Windows",
-      "inherits": [ "CUDA" ],
-      "configurePreset": "CUDA 13 Windows"
-    },
    {
      "name": "JetPack 5",
      "inherits": [ "CUDA" ],
--- a/14
+++ b/14
@@ -32,7 +32,7 @@ ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
 RUN yum install -y yum-utils epel-release \
-    && dnf install -y clang ccache git \
+    && dnf install -y clang ccache \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
 ENV CC=clang CXX=clang++

@@ -149,7 +149,6 @@ COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 COPY x/ml/backend/mlx x/ml/backend/mlx
 COPY go.mod go.sum .
-COPY MLX_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
@@ -157,6 +156,11 @@ RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
+COPY . .
+ARG GOFLAGS="'-ldflags=-w -s'"
+ENV CGO_ENABLED=1
+ARG CGO_CFLAGS
+ARG CGO_CXXFLAGS

 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
@@ -165,16 +169,12 @@ RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 COPY . .
-# Clone mlx-c headers for CGO (version from MLX_VERSION file)
-RUN git clone --depth 1 --branch "$(cat MLX_VERSION)" https://github.com/ml-explore/mlx-c.git build/_deps/mlx-c-src
 ARG GOFLAGS="'-ldflags=-w -s'"
 ENV CGO_ENABLED=1
 ARG CGO_CFLAGS
 ARG CGO_CXXFLAGS
-ENV CGO_CFLAGS="${CGO_CFLAGS} -I/go/src/github.com/ollama/ollama/build/_deps/mlx-c-src"
-ENV CGO_CXXFLAGS="${CGO_CXXFLAGS}"
 RUN --mount=type=cache,target=/root/.cache/go-build \
-    go build -tags mlx -trimpath -buildmode=pie -o /bin/ollama .
+    go build -trimpath -buildmode=pie -o /bin/ollama .

 FROM --platform=linux/amd64 scratch AS amd64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
--- a/1
+++ b/1
@@ -1 +0,0 @@
-v0.4.1
--- a/Makefile.sync
+++ b/Makefile.sync
@@ -1,6 +1,6 @@
 UPSTREAM=https://github.com/ggml-org/llama.cpp.git
 WORKDIR=llama/vendor
-FETCH_HEAD=a5bb8ba4c50257437630c136210396810741bbf7
+FETCH_HEAD=ec98e2002

 .PHONY: help
 help:
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ ollama run gemma3

 ## Model library

-Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library "ollama model library")
+Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library')

 Here are some example models that can be downloaded:

@@ -79,7 +79,7 @@ Here are some example models that can be downloaded:
 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`           |
 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored`   |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`               |
-| Granite-3.3        | 8B         | 4.9GB | `ollama run granite3.3`          |
+| Granite-3.3         | 8B         | 4.9GB | `ollama run granite3.3`          |

 > [!NOTE]
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -260,38 +260,6 @@ Finally, in a separate shell, run a model:
 ./ollama run llama3.2
 ```

-## Building with MLX (experimental)
-
-First build the MLX libraries:
-
-```shell
-cmake --preset MLX
-cmake --build --preset MLX --parallel
-cmake --install build --component MLX
-```
-
-When building with the `-tags mlx` flag, the main `ollama` binary includes MLX support for experimental features like image generation:
-
-```shell
-go build -tags mlx .
-```
-
-Finally, start the server:
-
-```
-./ollama serve
-```
-
-### Building MLX with CUDA
-
-When building with CUDA, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with default architectures:
-
-```shell
-cmake --preset 'MLX CUDA 13'
-cmake --build --preset 'MLX CUDA 13' --parallel
-cmake --install build --component MLX
-```
-
 ## REST API

 Ollama has a REST API for running and managing models.
@@ -322,7 +290,6 @@ See the [API documentation](./docs/api.md) for all endpoints.

 ### Web & Desktop

- [Onyx](https://github.com/onyx-dot-app/onyx)
 - [Open WebUI](https://github.com/open-webui/open-webui)
 - [SwiftChat (macOS with ReactNative)](https://github.com/aws-samples/swift-chat)
 - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
@@ -358,7 +325,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
 - [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Screenpipe](https://github.com/mediar-ai/screenpipe) (24/7 screen & mic recording with AI-powered search, uses Ollama for local LLM features)
 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
 - [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
@@ -455,7 +421,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [AppFlowy](https://github.com/AppFlowy-IO/AppFlowy) (AI collaborative workspace with Ollama, cross-platform and self-hostable)
 - [Lumina](https://github.com/cushydigit/lumina.git) (A lightweight, minimal React.js frontend for interacting with Ollama servers)
 - [Tiny Notepad](https://pypi.org/project/tiny-notepad) (A lightweight, notepad-like interface to chat with ollama available on PyPI)
- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.)
+- [macLlama (macOS native)](https://github.com/hellotunamayo/macLlama) (A native macOS GUI application for interacting with Ollama models, featuring a chat interface.) 
 - [GPTranslate](https://github.com/philberndt/GPTranslate) (A fast and lightweight, AI powered desktop translation application written with Rust and Tauri. Features real-time translation with OpenAI/Azure/Ollama.)
 - [ollama launcher](https://github.com/NGC13009/ollama-launcher) (A launcher for Ollama, aiming to provide users with convenient functions such as ollama server launching, management, or configuration.)
 - [ai-hub](https://github.com/Aj-Seven/ai-hub) (AI Hub supports multiple models via API keys and Chat support via Ollama API.)
@@ -466,7 +432,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Clueless](https://github.com/KashyapTan/clueless) (Open Source & Local Cluely: A desktop application LLM assistant to help you talk to anything on your screen using locally served Ollama models. Also undetectable to screenshare)
 - [ollama-co2](https://github.com/carbonatedWaterOrg/ollama-co2) (FastAPI web interface for monitoring and managing local and remote Ollama servers with real-time model monitoring and concurrent downloads)
 - [Hillnote](https://hillnote.com) (A Markdown-first workspace designed to supercharge your AI workflow. Create documents ready to integrate with Claude, ChatGPT, Gemini, Cursor, and more - all while keeping your work on your device.)
- [Stakpak](https://github.com/stakpak/agent) (An open source, vendor neutral DevOps agent that works with any model, and any stack, for teams who just want to ship)

 ### Cloud

@@ -528,7 +493,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Database

 - [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
-  - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
+   - [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
 - [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
 - [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
 - [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
@@ -560,7 +525,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama for Ruby](https://github.com/crmne/ruby_llm)
+- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/ollama4j/ollama4j)
@@ -671,7 +636,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [llama.cpp](https://github.com/ggml-org/llama.cpp) project founded by Georgi Gerganov.

 ### Observability
-
 - [Opik](https://www.comet.com/docs/opik/cookbook/ollama) is an open-source platform to debug, evaluate, and monitor your LLM applications, RAG systems, and agentic workflows with comprehensive tracing, automated evaluations, and production-ready dashboards. Opik supports native integration to Ollama.
 - [Lunary](https://lunary.ai/docs/integrations/ollama) is the leading open-source LLM observability platform. It provides a variety of enterprise-grade features such as real-time analytics, prompt templates management, PII masking, and comprehensive agent tracing.
 - [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
@@ -680,5 +644,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [MLflow Tracing](https://mlflow.org/docs/latest/llms/tracing/index.html#automatic-tracing) is an open source LLM observability tool with a convenient API to log and visualize traces, making it easy to debug and evaluate GenAI applications.

 ### Security
-
 - [Ollama Fortress](https://github.com/ParisNeo/ollama_proxy_server)
--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -211,7 +211,6 @@ type MessageDelta struct {

 // DeltaUsage contains cumulative token usage
 type DeltaUsage struct {
-	InputTokens  int `json:"input_tokens"`
 	OutputTokens int `json:"output_tokens"`
 }

@@ -722,7 +721,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 			})
 		}

-		c.inputTokens = r.Metrics.PromptEvalCount
 		c.outputTokens = r.Metrics.EvalCount
 		stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0)

@@ -734,7 +732,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 					StopReason: stopReason,
 				},
 				Usage: DeltaUsage{
-					InputTokens:  c.inputTokens,
 					OutputTokens: c.outputTokens,
 				},
 			},
--- a/anthropic/anthropic_test.go
+++ b/anthropic/anthropic_test.go
@@ -642,7 +642,7 @@ func TestStreamConverter_Basic(t *testing.T) {
 		},
 		Done:       true,
 		DoneReason: "stop",
-		Metrics:    api.Metrics{PromptEvalCount: 10, EvalCount: 5},
+		Metrics:    api.Metrics{EvalCount: 5},
 	}

 	events2 := conv.Process(resp2)
@@ -650,24 +650,6 @@ func TestStreamConverter_Basic(t *testing.T) {
 	// Should have content_block_delta, content_block_stop, message_delta, message_stop
 	hasStop := false
 	for _, e := range events2 {
-		if e.Event == "message_delta" {
-			if data, ok := e.Data.(MessageDeltaEvent); ok {
-				if data.Type != "message_delta" {
-					t.Errorf("unexpected data type: %+v", data)
-				}
-
-				if data.Delta.StopReason != "end_turn" {
-					t.Errorf("unexpected stop reason: %+v", data.Delta.StopReason)
-				}
-
-				if data.Usage.InputTokens != 10 || data.Usage.OutputTokens != 5 {
-					t.Errorf("unexpected usage: %+v", data.Usage)
-				}
-			} else {
-				t.Errorf("unexpected data: %+v", e.Data)
-			}
-		}
-
 		if e.Event == "message_stop" {
 			hasStop = true
 		}
--- a/api/client.go
+++ b/api/client.go
@@ -165,7 +165,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	return nil
 }

-const maxBufferSize = 8 * format.MegaByte
+const maxBufferSize = 512 * format.KiloByte

 func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
 	var buf io.Reader
--- a/api/types.go
+++ b/api/types.go
@@ -127,20 +127,6 @@ type GenerateRequest struct {
 	// each with an associated log probability. Only applies when Logprobs is true.
 	// Valid values are 0-20. Default is 0 (only return the selected token's logprob).
 	TopLogprobs int `json:"top_logprobs,omitempty"`
-
-	// Experimental: Image generation fields (may change or be removed)
-
-	// Width is the width of the generated image in pixels.
-	// Only used for image generation models.
-	Width int32 `json:"width,omitempty"`
-
-	// Height is the height of the generated image in pixels.
-	// Only used for image generation models.
-	Height int32 `json:"height,omitempty"`
-
-	// Steps is the number of diffusion steps for image generation.
-	// Only used for image generation models.
-	Steps int32 `json:"steps,omitempty"`
 }

 // ChatRequest describes a request sent by [Client.Chat].
@@ -749,7 +735,7 @@ type ShowResponse struct {
 	Messages      []Message          `json:"messages,omitempty"`
 	RemoteModel   string             `json:"remote_model,omitempty"`
 	RemoteHost    string             `json:"remote_host,omitempty"`
-	ModelInfo     map[string]any     `json:"model_info"`
+	ModelInfo     map[string]any     `json:"model_info,omitempty"`
 	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
@@ -874,20 +860,6 @@ type GenerateResponse struct {
 	// Logprobs contains log probability information for the generated tokens,
 	// if requested via the Logprobs parameter.
 	Logprobs []Logprob `json:"logprobs,omitempty"`
-
-	// Experimental: Image generation fields (may change or be removed)
-
-	// Image contains a base64-encoded generated image.
-	// Only present for image generation models.
-	Image string `json:"image,omitempty"`
-
-	// Completed is the number of completed steps in image generation.
-	// Only present for image generation models during streaming.
-	Completed int64 `json:"completed,omitempty"`
-
-	// Total is the total number of steps for image generation.
-	// Only present for image generation models during streaming.
-	Total int64 `json:"total,omitempty"`
 }

 // ModelDetails provides details about a model.
--- a/app/README.md
+++ b/app/README.md
@@ -75,9 +75,9 @@ The `-dev` flag enables:
 CI builds with Xcode 14.1 for OS compatibility prior to v13.  If you want to manually build v11+ support, you can download the older Xcode [here](https://developer.apple.com/services-account/download?path=/Developer_Tools/Xcode_14.1/Xcode_14.1.xip), extract, then `mv ./Xcode.app /Applications/Xcode_14.1.0.app` then activate with:

 ```
-export CGO_CFLAGS="-O3 -mmacosx-version-min=12.0"
-export CGO_CXXFLAGS="-O3 -mmacosx-version-min=12.0"
-export CGO_LDFLAGS="-mmacosx-version-min=12.0"
+export CGO_CFLAGS=-mmacosx-version-min=12.0
+export CGO_CXXFLAGS=-mmacosx-version-min=12.0
+export CGO_LDFLAGS=-mmacosx-version-min=12.0
 export SDKROOT=/Applications/Xcode_14.1.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
 export DEVELOPER_DIR=/Applications/Xcode_14.1.0.app/Contents/Developer
 ```
--- a/app/cmd/app/app_darwin.m
+++ b/app/cmd/app/app_darwin.m
@@ -14,7 +14,6 @@ extern NSString *SystemWidePath;
@interface AppDelegate () <NSWindowDelegate, WKNavigationDelegate, WKUIDelegate>
@property(strong, nonatomic) NSStatusItem *statusItem;
@property(assign, nonatomic) BOOL updateAvailable;
-@property(assign, nonatomic) BOOL systemShutdownInProgress;
@end

@implementation AppDelegate
@@ -41,13 +40,6 @@ bool firstTimeRun,startHidden; // Set in run before initialization
 }

 - (void)applicationDidFinishLaunching:(NSNotification *)aNotification {
-    // Register for system shutdown/restart notification so we can allow termination
-    [[[NSWorkspace sharedWorkspace] notificationCenter]
-        addObserver:self
-           selector:@selector(systemWillPowerOff:)
-               name:NSWorkspaceWillPowerOffNotification
-             object:nil];
-
    // if we're in development mode, set the app icon
    NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
    if (![bundlePath hasSuffix:@".app"]) {
@@ -286,18 +278,7 @@ bool firstTimeRun,startHidden; // Set in run before initialization
    [NSApp activateIgnoringOtherApps:YES];
 }

- (void)systemWillPowerOff:(NSNotification *)notification {
-    // Set flag so applicationShouldTerminate: knows to allow termination.
-    // The system will call applicationShouldTerminate: after posting this notification.
-    self.systemShutdownInProgress = YES;
-}
-
 - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender {
-    // Allow termination if the system is shutting down or restarting
-    if (self.systemShutdownInProgress) {
-        return NSTerminateNow;
-    }
-    // Otherwise just hide the app (for Cmd+Q, close button, etc.)
    [NSApp hide:nil];
    [NSApp setActivationPolicy:NSApplicationActivationPolicyAccessory];
    return NSTerminateCancel;
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -35,7 +35,6 @@ import (
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/cmd/config"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/parser"
@@ -47,9 +46,8 @@ import (
 	"github.com/ollama/ollama/types/syncmap"
 	"github.com/ollama/ollama/version"
 	xcmd "github.com/ollama/ollama/x/cmd"
-	"github.com/ollama/ollama/x/create"
-	xcreateclient "github.com/ollama/ollama/x/create/client"
 	"github.com/ollama/ollama/x/imagegen"
+	imagegenclient "github.com/ollama/ollama/x/imagegen/client"
 )

 const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"
@@ -95,87 +93,14 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	// Validate model name early to fail fast
-	modelName := args[0]
-	name := model.ParseName(modelName)
-	if !name.IsValid() {
-		return fmt.Errorf("invalid model name: %s", modelName)
-	}
-
-	// Check for --experimental flag for safetensors model creation
-	experimental, _ := cmd.Flags().GetBool("experimental")
-	if experimental {
-		// Get Modelfile content - either from -f flag or default to "FROM ."
-		var reader io.Reader
-		filename, err := getModelfileName(cmd)
-		if os.IsNotExist(err) || filename == "" {
-			// No Modelfile specified or found - use default
-			reader = strings.NewReader("FROM .\n")
-		} else if err != nil {
-			return err
-		} else {
-			f, err := os.Open(filename)
-			if err != nil {
-				return err
-			}
-			defer f.Close()
-			reader = f
-		}
-
-		// Parse the Modelfile
-		modelfile, err := parser.ParseFile(reader)
-		if err != nil {
-			return fmt.Errorf("failed to parse Modelfile: %w", err)
-		}
-
-		// Extract FROM path and configuration
-		var modelDir string
-		mfConfig := &xcreateclient.ModelfileConfig{}
-
-		for _, cmd := range modelfile.Commands {
-			switch cmd.Name {
-			case "model":
-				modelDir = cmd.Args
-			case "template":
-				mfConfig.Template = cmd.Args
-			case "system":
-				mfConfig.System = cmd.Args
-			case "license":
-				mfConfig.License = cmd.Args
-			}
-		}
-
-		if modelDir == "" {
-			modelDir = "."
-		}
-
-		// Resolve relative paths based on Modelfile location
-		if !filepath.IsAbs(modelDir) && filename != "" {
-			modelDir = filepath.Join(filepath.Dir(filename), modelDir)
-		}
-
-		quantize, _ := cmd.Flags().GetString("quantize")
-		return xcreateclient.CreateModel(xcreateclient.CreateOptions{
-			ModelName: modelName,
-			ModelDir:  modelDir,
-			Quantize:  quantize,
-			Modelfile: mfConfig,
-		}, p)
-	}
-
 	var reader io.Reader

 	filename, err := getModelfileName(cmd)
 	if os.IsNotExist(err) {
 		if filename == "" {
 			// No Modelfile found - check if current directory is an image gen model
-			if create.IsTensorModelDir(".") {
-				quantize, _ := cmd.Flags().GetString("quantize")
-				return xcreateclient.CreateModel(xcreateclient.CreateOptions{
-					ModelName: modelName,
-					ModelDir:  ".",
-					Quantize:  quantize,
-				}, p)
+			if imagegen.IsTensorModelDir(".") {
+				return imagegenclient.CreateModel(args[0], ".", p)
 			}
 			reader = strings.NewReader("FROM .\n")
 		} else {
@@ -208,7 +133,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	}
 	spinner.Stop()

-	req.Model = modelName
+	req.Model = args[0]
 	quantize, _ := cmd.Flags().GetString("quantize")
 	if quantize != "" {
 		req.Quantize = quantize
@@ -366,25 +291,14 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
 		return err
 	} else if info.RemoteHost != "" {
 		// Cloud model, no need to load/unload
-
-		isCloud := strings.HasPrefix(info.RemoteHost, "https://ollama.com")
-
 		if opts.ShowConnect {
 			p.StopAndClear()
-			if isCloud {
+			if strings.HasPrefix(info.RemoteHost, "https://ollama.com") {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on 'ollama.com' ⚡\n", info.RemoteModel)
 			} else {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on '%s'\n", info.RemoteModel, info.RemoteHost)
 			}
 		}
-
-		// Check if user is signed in for ollama.com cloud models
-		if isCloud {
-			if _, err := client.Whoami(cmd.Context()); err != nil {
-				return err
-			}
-		}
-
 		return nil
 	}

@@ -550,6 +464,14 @@ func RunHandler(cmd *cobra.Command, args []string) error {

 	name := args[0]

+	// Check if this is a known image generation model (skip Show/Pull)
+	if imagegen.HasTensorLayers(name) {
+		if opts.Prompt == "" && !interactive {
+			return errors.New("image generation models require a prompt. Usage: ollama run " + name + " \"your prompt here\"")
+		}
+		return imagegen.RunCLI(cmd, name, opts.Prompt, interactive, opts.KeepAlive)
+	}
+
 	info, err := func() (*api.ShowResponse, error) {
 		showReq := &api.ShowRequest{Name: name}
 		info, err := client.Show(cmd.Context(), showReq)
@@ -611,18 +533,9 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return generateEmbedding(cmd, name, opts.Prompt, opts.KeepAlive, truncate, dimensions)
 	}

-	// Check if this is an image generation model
-	if slices.Contains(info.Capabilities, model.CapabilityImage) {
-		if opts.Prompt == "" && !interactive {
-			return errors.New("image generation models require a prompt. Usage: ollama run " + name + " \"your prompt here\"")
-		}
-		return imagegen.RunCLI(cmd, name, opts.Prompt, interactive, opts.KeepAlive)
-	}
-
 	// Check for experimental flag
 	isExperimental, _ := cmd.Flags().GetBool("experimental")
 	yoloMode, _ := cmd.Flags().GetBool("experimental-yolo")
-	enableWebsearch, _ := cmd.Flags().GetBool("experimental-websearch")

 	if interactive {
 		if err := loadOrUnloadModel(cmd, &opts); err != nil {
@@ -652,7 +565,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {

 		// Use experimental agent loop with tools
 		if isExperimental {
-			return xcmd.GenerateInteractive(cmd, opts.Model, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode, enableWebsearch)
+			return xcmd.GenerateInteractive(cmd, opts.Model, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode)
 		}

 		return generateInteractive(cmd, opts)
@@ -758,11 +671,7 @@ func PushHandler(cmd *cobra.Command, args []string) error {

 			bar, ok := bars[resp.Digest]
 			if !ok {
-				msg := resp.Status
-				if msg == "" {
-					msg = fmt.Sprintf("pushing %s...", resp.Digest[7:19])
-				}
-				bar = progress.NewBar(msg, resp.Total, resp.Completed)
+				bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
 				bars[resp.Digest] = bar
 				p.Add(resp.Digest, bar)
 			}
@@ -911,11 +820,11 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 	for _, arg := range args {
 		// Unload the model if it's running before deletion
 		if err := loadOrUnloadModel(cmd, &runOptions{
-			Model:     arg,
+			Model:     args[0],
 			KeepAlive: &api.Duration{Duration: 0},
 		}); err != nil {
 			if !strings.Contains(strings.ToLower(err.Error()), "not found") {
-				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", arg)
+				fmt.Fprintf(os.Stderr, "Warning: unable to stop model '%s'\n", args[0])
 			}
 		}

@@ -928,6 +837,11 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 }

 func ShowHandler(cmd *cobra.Command, args []string) error {
+	// Check if this is an image generation model
+	if imagegen.HasTensorLayers(args[0]) {
+		return imagegen.Show(args[0], os.Stdout)
+	}
+
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
@@ -1030,10 +944,8 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 		}

 		if resp.ModelInfo != nil {
-			arch, _ := resp.ModelInfo["general.architecture"].(string)
-			if arch != "" {
-				rows = append(rows, []string{"", "architecture", arch})
-			}
+			arch := resp.ModelInfo["general.architecture"].(string)
+			rows = append(rows, []string{"", "architecture", arch})

 			var paramStr string
 			if resp.Details.ParameterSize != "" {
@@ -1043,9 +955,7 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 					paramStr = format.HumanNumber(uint64(f))
 				}
 			}
-			if paramStr != "" {
-				rows = append(rows, []string{"", "parameters", paramStr})
-			}
+			rows = append(rows, []string{"", "parameters", paramStr})

 			if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
 				if f, ok := v.(float64); ok {
@@ -1831,22 +1741,15 @@ func NewCLI() *cobra.Command {
 	rootCmd.Flags().BoolP("version", "v", false, "Show version information")

 	createCmd := &cobra.Command{
-		Use:   "create MODEL",
-		Short: "Create a model",
-		Args:  cobra.ExactArgs(1),
-		PreRunE: func(cmd *cobra.Command, args []string) error {
-			// Skip server check for experimental mode (writes directly to disk)
-			if experimental, _ := cmd.Flags().GetBool("experimental"); experimental {
-				return nil
-			}
-			return checkServerHeartbeat(cmd, args)
-		},
-		RunE: CreateHandler,
+		Use:     "create MODEL",
+		Short:   "Create a model",
+		Args:    cobra.ExactArgs(1),
+		PreRunE: checkServerHeartbeat,
+		RunE:    CreateHandler,
 	}

 	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\")")
 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_K_M)")
-	createCmd.Flags().Bool("experimental", false, "Enable experimental safetensors model creation")

 	showCmd := &cobra.Command{
 		Use:     "show MODEL",
@@ -1883,7 +1786,6 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")
 	runCmd.Flags().Bool("experimental", false, "Enable experimental agent loop with tools")
 	runCmd.Flags().Bool("experimental-yolo", false, "Skip all tool approval prompts (use with caution)")
-	runCmd.Flags().Bool("experimental-websearch", false, "Enable web search tool in experimental mode")

 	// Image generation flags (width, height, steps, seed, etc.)
 	imagegen.RegisterFlags(runCmd)
@@ -1899,7 +1801,7 @@ func NewCLI() *cobra.Command {
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
-		Short:   "Start Ollama",
+		Short:   "Start ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}
@@ -2001,7 +1903,6 @@ func NewCLI() *cobra.Command {
 	} {
 		switch cmd {
 		case runCmd:
-			imagegen.AppendFlagsDocs(cmd)
 			appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
 		case serveCmd:
 			appendEnvDocs(cmd, []envconfig.EnvVar{
@@ -2042,7 +1943,6 @@ func NewCLI() *cobra.Command {
 		copyCmd,
 		deleteCmd,
 		runnerCmd,
-		config.LaunchCmd(checkServerHeartbeat),
 	)

 	return rootCmd
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -3,7 +3,6 @@ package cmd
 import (
 	"bytes"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -1548,79 +1547,6 @@ func TestRunOptions_Copy_ThinkValueVariants(t *testing.T) {
 	}
 }

-func TestShowInfoImageGen(t *testing.T) {
-	var b bytes.Buffer
-	err := showInfo(&api.ShowResponse{
-		Details: api.ModelDetails{
-			Family:            "ZImagePipeline",
-			ParameterSize:     "10.3B",
-			QuantizationLevel: "Q8",
-		},
-		Capabilities: []model.Capability{model.CapabilityImage},
-		Requires:     "0.14.0",
-	}, false, &b)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	expect := "  Model\n" +
-		"    architecture    ZImagePipeline    \n" +
-		"    parameters      10.3B             \n" +
-		"    quantization    Q8                \n" +
-		"    requires        0.14.0            \n" +
-		"\n" +
-		"  Capabilities\n" +
-		"    image    \n" +
-		"\n"
-	if diff := cmp.Diff(expect, b.String()); diff != "" {
-		t.Errorf("unexpected output (-want +got):\n%s", diff)
-	}
-}
-
-func TestPushProgressMessage(t *testing.T) {
-	tests := []struct {
-		name    string
-		status  string
-		digest  string
-		wantMsg string
-	}{
-		{
-			name:    "uses status when provided",
-			status:  "uploading model",
-			digest:  "sha256:abc123456789def",
-			wantMsg: "uploading model",
-		},
-		{
-			name:    "falls back to digest when status empty",
-			status:  "",
-			digest:  "sha256:abc123456789def",
-			wantMsg: "pushing abc123456789...",
-		},
-		{
-			name:    "handles short digest gracefully",
-			status:  "",
-			digest:  "sha256:abc",
-			wantMsg: "pushing sha256:abc...",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			msg := tt.status
-			if msg == "" {
-				if len(tt.digest) >= 19 {
-					msg = fmt.Sprintf("pushing %s...", tt.digest[7:19])
-				} else {
-					msg = fmt.Sprintf("pushing %s...", tt.digest)
-				}
-			}
-			if msg != tt.wantMsg {
-				t.Errorf("got %q, want %q", msg, tt.wantMsg)
-			}
-		})
-	}
-}
-
 func TestRunOptions_Copy_Independence(t *testing.T) {
 	// Test that modifications to original don't affect copy
 	originalThink := &api.ThinkValue{Value: "original"}
@@ -1660,103 +1586,3 @@ func TestRunOptions_Copy_Independence(t *testing.T) {
 		t.Error("Copy Think should not be affected by original modification")
 	}
 }
-
-func TestLoadOrUnloadModel_CloudModelAuth(t *testing.T) {
-	tests := []struct {
-		name          string
-		remoteHost    string
-		whoamiStatus  int
-		whoamiResp    any
-		expectedError string
-	}{
-		{
-			name:         "ollama.com cloud model - user signed in",
-			remoteHost:   "https://ollama.com",
-			whoamiStatus: http.StatusOK,
-			whoamiResp:   api.UserResponse{Name: "testuser"},
-		},
-		{
-			name:         "ollama.com cloud model - user not signed in",
-			remoteHost:   "https://ollama.com",
-			whoamiStatus: http.StatusUnauthorized,
-			whoamiResp: map[string]string{
-				"error":      "unauthorized",
-				"signin_url": "https://ollama.com/signin",
-			},
-			expectedError: "unauthorized",
-		},
-		{
-			name:         "non-ollama.com remote - no auth check",
-			remoteHost:   "https://other-remote.com",
-			whoamiStatus: http.StatusUnauthorized, // should not be called
-			whoamiResp:   nil,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			whoamiCalled := false
-			mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				switch r.URL.Path {
-				case "/api/show":
-					w.Header().Set("Content-Type", "application/json")
-					if err := json.NewEncoder(w).Encode(api.ShowResponse{
-						RemoteHost:  tt.remoteHost,
-						RemoteModel: "test-model",
-					}); err != nil {
-						http.Error(w, err.Error(), http.StatusInternalServerError)
-					}
-				case "/api/me":
-					whoamiCalled = true
-					w.Header().Set("Content-Type", "application/json")
-					w.WriteHeader(tt.whoamiStatus)
-					if tt.whoamiResp != nil {
-						if err := json.NewEncoder(w).Encode(tt.whoamiResp); err != nil {
-							http.Error(w, err.Error(), http.StatusInternalServerError)
-						}
-					}
-				default:
-					http.NotFound(w, r)
-				}
-			}))
-			defer mockServer.Close()
-
-			t.Setenv("OLLAMA_HOST", mockServer.URL)
-
-			cmd := &cobra.Command{}
-			cmd.SetContext(t.Context())
-
-			opts := &runOptions{
-				Model:       "test-cloud-model",
-				ShowConnect: false,
-			}
-
-			err := loadOrUnloadModel(cmd, opts)
-
-			if strings.HasPrefix(tt.remoteHost, "https://ollama.com") {
-				if !whoamiCalled {
-					t.Error("expected whoami to be called for ollama.com cloud model")
-				}
-			} else {
-				if whoamiCalled {
-					t.Error("whoami should not be called for non-ollama.com remote")
-				}
-			}
-
-			if tt.expectedError != "" {
-				if err == nil {
-					t.Errorf("expected error containing %q, got nil", tt.expectedError)
-				} else {
-					var authErr api.AuthorizationError
-					if !errors.As(err, &authErr) {
-						t.Errorf("expected AuthorizationError, got %T: %v", err, err)
-					}
-				}
-			} else {
-				if err != nil {
-					t.Errorf("expected no error, got %v", err)
-				}
-			}
-		})
-	}
-}
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -1,62 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"runtime"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-// Claude implements Runner for Claude Code integration
-type Claude struct{}
-
-func (c *Claude) String() string { return "Claude Code" }
-
-func (c *Claude) args(model string, extra []string) []string {
-	var args []string
-	if model != "" {
-		args = append(args, "--model", model)
-	}
-	args = append(args, extra...)
-	return args
-}
-
-func (c *Claude) findPath() (string, error) {
-	if p, err := exec.LookPath("claude"); err == nil {
-		return p, nil
-	}
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-	name := "claude"
-	if runtime.GOOS == "windows" {
-		name = "claude.exe"
-	}
-	fallback := filepath.Join(home, ".claude", "local", name)
-	if _, err := os.Stat(fallback); err != nil {
-		return "", err
-	}
-	return fallback, nil
-}
-
-func (c *Claude) Run(model string, args []string) error {
-	claudePath, err := c.findPath()
-	if err != nil {
-		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
-	}
-
-	cmd := exec.Command(claudePath, c.args(model, args)...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	cmd.Env = append(os.Environ(),
-		"ANTHROPIC_BASE_URL="+envconfig.Host().String(),
-		"ANTHROPIC_API_KEY=",
-		"ANTHROPIC_AUTH_TOKEN=ollama",
-	)
-	return cmd.Run()
-}
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -1,105 +0,0 @@
-package config
-
-import (
-	"os"
-	"path/filepath"
-	"runtime"
-	"slices"
-	"testing"
-)
-
-func TestClaudeIntegration(t *testing.T) {
-	c := &Claude{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "Claude Code" {
-			t.Errorf("String() = %q, want %q", got, "Claude Code")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = c
-	})
-}
-
-func TestClaudeFindPath(t *testing.T) {
-	c := &Claude{}
-
-	t.Run("finds claude in PATH", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		name := "claude"
-		if runtime.GOOS == "windows" {
-			name = "claude.exe"
-		}
-		fakeBin := filepath.Join(tmpDir, name)
-		os.WriteFile(fakeBin, []byte("#!/bin/sh\n"), 0o755)
-		t.Setenv("PATH", tmpDir)
-
-		got, err := c.findPath()
-		if err != nil {
-			t.Fatalf("unexpected error: %v", err)
-		}
-		if got != fakeBin {
-			t.Errorf("findPath() = %q, want %q", got, fakeBin)
-		}
-	})
-
-	t.Run("falls back to ~/.claude/local/claude", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
-
-		name := "claude"
-		if runtime.GOOS == "windows" {
-			name = "claude.exe"
-		}
-		fallback := filepath.Join(tmpDir, ".claude", "local", name)
-		os.MkdirAll(filepath.Dir(fallback), 0o755)
-		os.WriteFile(fallback, []byte("#!/bin/sh\n"), 0o755)
-
-		got, err := c.findPath()
-		if err != nil {
-			t.Fatalf("unexpected error: %v", err)
-		}
-		if got != fallback {
-			t.Errorf("findPath() = %q, want %q", got, fallback)
-		}
-	})
-
-	t.Run("returns error when neither PATH nor fallback exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
-
-		_, err := c.findPath()
-		if err == nil {
-			t.Fatal("expected error, got nil")
-		}
-	})
-}
-
-func TestClaudeArgs(t *testing.T) {
-	c := &Claude{}
-
-	tests := []struct {
-		name  string
-		model string
-		args  []string
-		want  []string
-	}{
-		{"with model", "llama3.2", nil, []string{"--model", "llama3.2"}},
-		{"empty model", "", nil, nil},
-		{"with model and verbose", "llama3.2", []string{"--verbose"}, []string{"--model", "llama3.2", "--verbose"}},
-		{"empty model with help", "", []string{"--help"}, []string{"--help"}},
-		{"with allowed tools", "llama3.2", []string{"--allowedTools", "Read,Write,Bash"}, []string{"--model", "llama3.2", "--allowedTools", "Read,Write,Bash"}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := c.args(tt.model, tt.args)
-			if !slices.Equal(got, tt.want) {
-				t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.args, got, tt.want)
-			}
-		})
-	}
-}
--- a/cmd/config/codex.go
+++ b/cmd/config/codex.go
@@ -1,62 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-
-	"golang.org/x/mod/semver"
-)
-
-// Codex implements Runner for Codex integration
-type Codex struct{}
-
-func (c *Codex) String() string { return "Codex" }
-
-func (c *Codex) args(model string, extra []string) []string {
-	args := []string{"--oss"}
-	if model != "" {
-		args = append(args, "-m", model)
-	}
-	args = append(args, extra...)
-	return args
-}
-
-func (c *Codex) Run(model string, args []string) error {
-	if err := checkCodexVersion(); err != nil {
-		return err
-	}
-
-	cmd := exec.Command("codex", c.args(model, args)...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func checkCodexVersion() error {
-	if _, err := exec.LookPath("codex"); err != nil {
-		return fmt.Errorf("codex is not installed, install with: npm install -g @openai/codex")
-	}
-
-	out, err := exec.Command("codex", "--version").Output()
-	if err != nil {
-		return fmt.Errorf("failed to get codex version: %w", err)
-	}
-
-	// Parse output like "codex-cli 0.87.0"
-	fields := strings.Fields(strings.TrimSpace(string(out)))
-	if len(fields) < 2 {
-		return fmt.Errorf("unexpected codex version output: %s", string(out))
-	}
-
-	version := "v" + fields[len(fields)-1]
-	minVersion := "v0.81.0"
-
-	if semver.Compare(version, minVersion) < 0 {
-		return fmt.Errorf("codex version %s is too old, minimum required is %s, update with: npm update -g @openai/codex", fields[len(fields)-1], "0.81.0")
-	}
-
-	return nil
-}
--- a/cmd/config/codex_test.go
+++ b/cmd/config/codex_test.go
@@ -1,31 +0,0 @@
-package config
-
-import (
-	"slices"
-	"testing"
-)
-
-func TestCodexArgs(t *testing.T) {
-	c := &Codex{}
-
-	tests := []struct {
-		name  string
-		model string
-		args  []string
-		want  []string
-	}{
-		{"with model", "llama3.2", nil, []string{"--oss", "-m", "llama3.2"}},
-		{"empty model", "", nil, []string{"--oss"}},
-		{"with model and profile", "qwen3-coder", []string{"-p", "myprofile"}, []string{"--oss", "-m", "qwen3-coder", "-p", "myprofile"}},
-		{"with sandbox flag", "llama3.2", []string{"--sandbox", "workspace-write"}, []string{"--oss", "-m", "llama3.2", "--sandbox", "workspace-write"}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := c.args(tt.model, tt.args)
-			if !slices.Equal(got, tt.want) {
-				t.Errorf("args(%q, %v) = %v, want %v", tt.model, tt.args, got, tt.want)
-			}
-		})
-	}
-}
--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -1,169 +0,0 @@
-// Package config provides integration configuration for external coding tools
-// (Claude Code, Codex, Droid, OpenCode) to use Ollama models.
-package config
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-type integration struct {
-	Models []string `json:"models"`
-}
-
-type config struct {
-	Integrations map[string]*integration `json:"integrations"`
-}
-
-func configPath() (string, error) {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(home, ".ollama", "config.json"), nil
-}
-
-func legacyConfigPath() (string, error) {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(home, ".ollama", "config", "config.json"), nil
-}
-
-// migrateConfig moves the config from the legacy path to ~/.ollama/config.json
-func migrateConfig() (bool, error) {
-	oldPath, err := legacyConfigPath()
-	if err != nil {
-		return false, err
-	}
-
-	oldData, err := os.ReadFile(oldPath)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return false, nil
-		}
-		return false, err
-	}
-
-	var js json.RawMessage
-	if err := json.Unmarshal(oldData, &js); err != nil {
-		slog.Warn("legacy config has invalid JSON, skipping migration", "path", oldPath, "error", err)
-		return false, nil
-	}
-
-	newPath, err := configPath()
-	if err != nil {
-		return false, err
-	}
-
-	if err := os.MkdirAll(filepath.Dir(newPath), 0o755); err != nil {
-		return false, err
-	}
-	if err := os.WriteFile(newPath, oldData, 0o644); err != nil {
-		return false, fmt.Errorf("write new config: %w", err)
-	}
-
-	_ = os.Remove(oldPath)
-	_ = os.Remove(filepath.Dir(oldPath)) // clean up empty directory
-
-	slog.Info("migrated config", "from", oldPath, "to", newPath)
-	return true, nil
-}
-
-func load() (*config, error) {
-	path, err := configPath()
-	if err != nil {
-		return nil, err
-	}
-
-	data, err := os.ReadFile(path)
-	if err != nil && os.IsNotExist(err) {
-		if migrated, merr := migrateConfig(); merr == nil && migrated {
-			data, err = os.ReadFile(path)
-		}
-	}
-	if err != nil {
-		if os.IsNotExist(err) {
-			return &config{Integrations: make(map[string]*integration)}, nil
-		}
-		return nil, err
-	}
-
-	var cfg config
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		return nil, fmt.Errorf("failed to parse config: %w, at: %s", err, path)
-	}
-	if cfg.Integrations == nil {
-		cfg.Integrations = make(map[string]*integration)
-	}
-	return &cfg, nil
-}
-
-func save(cfg *config) error {
-	path, err := configPath()
-	if err != nil {
-		return err
-	}
-
-	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
-		return err
-	}
-
-	data, err := json.MarshalIndent(cfg, "", "  ")
-	if err != nil {
-		return err
-	}
-
-	return writeWithBackup(path, data)
-}
-
-func saveIntegration(appName string, models []string) error {
-	if appName == "" {
-		return errors.New("app name cannot be empty")
-	}
-
-	cfg, err := load()
-	if err != nil {
-		return err
-	}
-
-	cfg.Integrations[strings.ToLower(appName)] = &integration{
-		Models: models,
-	}
-
-	return save(cfg)
-}
-
-func loadIntegration(appName string) (*integration, error) {
-	cfg, err := load()
-	if err != nil {
-		return nil, err
-	}
-
-	ic, ok := cfg.Integrations[strings.ToLower(appName)]
-	if !ok {
-		return nil, os.ErrNotExist
-	}
-
-	return ic, nil
-}
-
-func listIntegrations() ([]integration, error) {
-	cfg, err := load()
-	if err != nil {
-		return nil, err
-	}
-
-	result := make([]integration, 0, len(cfg.Integrations))
-	for _, ic := range cfg.Integrations {
-		result = append(result, *ic)
-	}
-
-	return result, nil
-}
--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -1,548 +0,0 @@
-package config
-
-import (
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-)
-
-// setTestHome sets both HOME (Unix) and USERPROFILE (Windows) for cross-platform tests
-func setTestHome(t *testing.T, dir string) {
-	t.Setenv("HOME", dir)
-	t.Setenv("USERPROFILE", dir)
-}
-
-// editorPaths is a test helper that safely calls Paths if the runner implements Editor
-func editorPaths(r Runner) []string {
-	if editor, ok := r.(Editor); ok {
-		return editor.Paths()
-	}
-	return nil
-}
-
-func TestIntegrationConfig(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("save and load round-trip", func(t *testing.T) {
-		models := []string{"llama3.2", "mistral", "qwen2.5"}
-		if err := saveIntegration("claude", models); err != nil {
-			t.Fatal(err)
-		}
-
-		config, err := loadIntegration("claude")
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(config.Models) != len(models) {
-			t.Errorf("expected %d models, got %d", len(models), len(config.Models))
-		}
-		for i, m := range models {
-			if config.Models[i] != m {
-				t.Errorf("model %d: expected %s, got %s", i, m, config.Models[i])
-			}
-		}
-	})
-
-	t.Run("defaultModel returns first model", func(t *testing.T) {
-		saveIntegration("codex", []string{"model-a", "model-b"})
-
-		config, _ := loadIntegration("codex")
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "model-a" {
-			t.Errorf("expected model-a, got %s", defaultModel)
-		}
-	})
-
-	t.Run("defaultModel returns empty for no models", func(t *testing.T) {
-		config := &integration{Models: []string{}}
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "" {
-			t.Errorf("expected empty string, got %s", defaultModel)
-		}
-	})
-
-	t.Run("app name is case-insensitive", func(t *testing.T) {
-		saveIntegration("Claude", []string{"model-x"})
-
-		config, err := loadIntegration("claude")
-		if err != nil {
-			t.Fatal(err)
-		}
-		defaultModel := ""
-		if len(config.Models) > 0 {
-			defaultModel = config.Models[0]
-		}
-		if defaultModel != "model-x" {
-			t.Errorf("expected model-x, got %s", defaultModel)
-		}
-	})
-
-	t.Run("multiple integrations in single file", func(t *testing.T) {
-		saveIntegration("app1", []string{"model-1"})
-		saveIntegration("app2", []string{"model-2"})
-
-		config1, _ := loadIntegration("app1")
-		config2, _ := loadIntegration("app2")
-
-		defaultModel1 := ""
-		if len(config1.Models) > 0 {
-			defaultModel1 = config1.Models[0]
-		}
-		defaultModel2 := ""
-		if len(config2.Models) > 0 {
-			defaultModel2 = config2.Models[0]
-		}
-		if defaultModel1 != "model-1" {
-			t.Errorf("expected model-1, got %s", defaultModel1)
-		}
-		if defaultModel2 != "model-2" {
-			t.Errorf("expected model-2, got %s", defaultModel2)
-		}
-	})
-}
-
-func TestListIntegrations(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty when no integrations", func(t *testing.T) {
-		configs, err := listIntegrations()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if len(configs) != 0 {
-			t.Errorf("expected 0 integrations, got %d", len(configs))
-		}
-	})
-
-	t.Run("returns all saved integrations", func(t *testing.T) {
-		saveIntegration("claude", []string{"model-1"})
-		saveIntegration("droid", []string{"model-2"})
-
-		configs, err := listIntegrations()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if len(configs) != 2 {
-			t.Errorf("expected 2 integrations, got %d", len(configs))
-		}
-	})
-}
-
-func TestEditorPaths(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty for claude (no Editor)", func(t *testing.T) {
-		r := integrations["claude"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths for claude, got %v", paths)
-		}
-	})
-
-	t.Run("returns empty for codex (no Editor)", func(t *testing.T) {
-		r := integrations["codex"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths for codex, got %v", paths)
-		}
-	})
-
-	t.Run("returns empty for droid when no config exists", func(t *testing.T) {
-		r := integrations["droid"]
-		paths := editorPaths(r)
-		if len(paths) != 0 {
-			t.Errorf("expected no paths, got %v", paths)
-		}
-	})
-
-	t.Run("returns path for droid when config exists", func(t *testing.T) {
-		settingsDir, _ := os.UserHomeDir()
-		settingsDir = filepath.Join(settingsDir, ".factory")
-		os.MkdirAll(settingsDir, 0o755)
-		os.WriteFile(filepath.Join(settingsDir, "settings.json"), []byte(`{}`), 0o644)
-
-		r := integrations["droid"]
-		paths := editorPaths(r)
-		if len(paths) != 1 {
-			t.Errorf("expected 1 path, got %d", len(paths))
-		}
-	})
-
-	t.Run("returns paths for opencode when configs exist", func(t *testing.T) {
-		home, _ := os.UserHomeDir()
-		configDir := filepath.Join(home, ".config", "opencode")
-		stateDir := filepath.Join(home, ".local", "state", "opencode")
-		os.MkdirAll(configDir, 0o755)
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "opencode.json"), []byte(`{}`), 0o644)
-		os.WriteFile(filepath.Join(stateDir, "model.json"), []byte(`{}`), 0o644)
-
-		r := integrations["opencode"]
-		paths := editorPaths(r)
-		if len(paths) != 2 {
-			t.Errorf("expected 2 paths, got %d: %v", len(paths), paths)
-		}
-	})
-}
-
-func TestLoadIntegration_CorruptedJSON(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	dir := filepath.Join(tmpDir, ".ollama")
-	os.MkdirAll(dir, 0o755)
-	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{corrupted json`), 0o644)
-
-	_, err := loadIntegration("test")
-	if err == nil {
-		t.Error("expected error for nonexistent integration in corrupted file")
-	}
-}
-
-func TestSaveIntegration_NilModels(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	if err := saveIntegration("test", nil); err != nil {
-		t.Fatalf("saveIntegration with nil models failed: %v", err)
-	}
-
-	config, err := loadIntegration("test")
-	if err != nil {
-		t.Fatalf("loadIntegration failed: %v", err)
-	}
-
-	if config.Models == nil {
-		// nil is acceptable
-	} else if len(config.Models) != 0 {
-		t.Errorf("expected empty or nil models, got %v", config.Models)
-	}
-}
-
-func TestSaveIntegration_EmptyAppName(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	err := saveIntegration("", []string{"model"})
-	if err == nil {
-		t.Error("expected error for empty app name, got nil")
-	}
-	if err != nil && !strings.Contains(err.Error(), "app name cannot be empty") {
-		t.Errorf("expected 'app name cannot be empty' error, got: %v", err)
-	}
-}
-
-func TestLoadIntegration_NonexistentIntegration(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	_, err := loadIntegration("nonexistent")
-	if err == nil {
-		t.Error("expected error for nonexistent integration, got nil")
-	}
-	if !os.IsNotExist(err) {
-		t.Logf("error type is os.ErrNotExist as expected: %v", err)
-	}
-}
-
-func TestConfigPath(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	path, err := configPath()
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	expected := filepath.Join(tmpDir, ".ollama", "config.json")
-	if path != expected {
-		t.Errorf("expected %s, got %s", expected, path)
-	}
-}
-
-func TestLoad(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("returns empty config when file does not exist", func(t *testing.T) {
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if cfg == nil {
-			t.Fatal("expected non-nil config")
-		}
-		if cfg.Integrations == nil {
-			t.Error("expected non-nil Integrations map")
-		}
-		if len(cfg.Integrations) != 0 {
-			t.Errorf("expected empty Integrations, got %d", len(cfg.Integrations))
-		}
-	})
-
-	t.Run("loads existing config", func(t *testing.T) {
-		path, _ := configPath()
-		os.MkdirAll(filepath.Dir(path), 0o755)
-		os.WriteFile(path, []byte(`{"integrations":{"test":{"models":["model-a"]}}}`), 0o644)
-
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if cfg.Integrations["test"] == nil {
-			t.Fatal("expected test integration")
-		}
-		if len(cfg.Integrations["test"].Models) != 1 {
-			t.Errorf("expected 1 model, got %d", len(cfg.Integrations["test"].Models))
-		}
-	})
-
-	t.Run("returns error for corrupted JSON", func(t *testing.T) {
-		path, _ := configPath()
-		os.MkdirAll(filepath.Dir(path), 0o755)
-		os.WriteFile(path, []byte(`{corrupted`), 0o644)
-
-		_, err := load()
-		if err == nil {
-			t.Error("expected error for corrupted JSON")
-		}
-	})
-}
-
-func TestMigrateConfig(t *testing.T) {
-	t.Run("migrates legacy file to new location", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		data := []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), data, 0o644)
-
-		migrated, err := migrateConfig()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if !migrated {
-			t.Fatal("expected migration to occur")
-		}
-
-		newPath, _ := configPath()
-		got, err := os.ReadFile(newPath)
-		if err != nil {
-			t.Fatalf("new config not found: %v", err)
-		}
-		if string(got) != string(data) {
-			t.Errorf("content mismatch: got %s", got)
-		}
-
-		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); !os.IsNotExist(err) {
-			t.Error("legacy file should have been removed")
-		}
-
-		if _, err := os.Stat(legacyDir); !os.IsNotExist(err) {
-			t.Error("legacy directory should have been removed")
-		}
-	})
-
-	t.Run("no-op when no legacy file exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		migrated, err := migrateConfig()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if migrated {
-			t.Error("expected no migration")
-		}
-	})
-
-	t.Run("skips corrupt legacy file", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{corrupt`), 0o644)
-
-		migrated, err := migrateConfig()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if migrated {
-			t.Error("should not migrate corrupt file")
-		}
-
-		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); os.IsNotExist(err) {
-			t.Error("corrupt legacy file should not have been deleted")
-		}
-	})
-
-	t.Run("new path takes precedence over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"old":{"models":["old-model"]}}}`), 0o644)
-
-		newDir := filepath.Join(tmpDir, ".ollama")
-		os.WriteFile(filepath.Join(newDir, "config.json"), []byte(`{"integrations":{"new":{"models":["new-model"]}}}`), 0o644)
-
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if _, ok := cfg.Integrations["new"]; !ok {
-			t.Error("expected new-path integration to be loaded")
-		}
-		if _, ok := cfg.Integrations["old"]; ok {
-			t.Error("legacy integration should not have been loaded")
-		}
-	})
-
-	t.Run("idempotent when called twice", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{}}`), 0o644)
-
-		if _, err := migrateConfig(); err != nil {
-			t.Fatal(err)
-		}
-
-		migrated, err := migrateConfig()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if migrated {
-			t.Error("second migration should be a no-op")
-		}
-	})
-
-	t.Run("legacy directory preserved if not empty", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{}}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "other-file.txt"), []byte("keep me"), 0o644)
-
-		if _, err := migrateConfig(); err != nil {
-			t.Fatal(err)
-		}
-
-		if _, err := os.Stat(legacyDir); os.IsNotExist(err) {
-			t.Error("directory with other files should not have been removed")
-		}
-		if _, err := os.Stat(filepath.Join(legacyDir, "other-file.txt")); os.IsNotExist(err) {
-			t.Error("other files in legacy directory should be untouched")
-		}
-	})
-
-	t.Run("save writes to new path after migration", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`), 0o644)
-
-		// load triggers migration, then save should write to new path
-		if err := saveIntegration("codex", []string{"qwen2.5"}); err != nil {
-			t.Fatal(err)
-		}
-
-		newPath := filepath.Join(tmpDir, ".ollama", "config.json")
-		if _, err := os.Stat(newPath); os.IsNotExist(err) {
-			t.Error("save should write to new path")
-		}
-
-		// old path should not be recreated
-		if _, err := os.Stat(filepath.Join(legacyDir, "config.json")); !os.IsNotExist(err) {
-			t.Error("save should not recreate legacy path")
-		}
-	})
-
-	t.Run("load triggers migration transparently", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-
-		legacyDir := filepath.Join(tmpDir, ".ollama", "config")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`), 0o644)
-
-		cfg, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-		if cfg.Integrations["claude"] == nil || cfg.Integrations["claude"].Models[0] != "llama3.2" {
-			t.Error("migration via load() did not preserve data")
-		}
-	})
-}
-
-func TestSave(t *testing.T) {
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("creates config file", func(t *testing.T) {
-		cfg := &config{
-			Integrations: map[string]*integration{
-				"test": {Models: []string{"model-a", "model-b"}},
-			},
-		}
-
-		if err := save(cfg); err != nil {
-			t.Fatal(err)
-		}
-
-		path, _ := configPath()
-		if _, err := os.Stat(path); os.IsNotExist(err) {
-			t.Error("config file was not created")
-		}
-	})
-
-	t.Run("round-trip preserves data", func(t *testing.T) {
-		cfg := &config{
-			Integrations: map[string]*integration{
-				"claude": {Models: []string{"llama3.2", "mistral"}},
-				"codex":  {Models: []string{"qwen2.5"}},
-			},
-		}
-
-		if err := save(cfg); err != nil {
-			t.Fatal(err)
-		}
-
-		loaded, err := load()
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(loaded.Integrations) != 2 {
-			t.Errorf("expected 2 integrations, got %d", len(loaded.Integrations))
-		}
-		if loaded.Integrations["claude"] == nil {
-			t.Error("missing claude integration")
-		}
-		if len(loaded.Integrations["claude"].Models) != 2 {
-			t.Errorf("expected 2 models for claude, got %d", len(loaded.Integrations["claude"].Models))
-		}
-	})
-}
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -1,186 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"slices"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-// Droid implements Runner and Editor for Droid integration
-type Droid struct{}
-
-// droidSettings represents the Droid settings.json file (only fields we use)
-type droidSettings struct {
-	CustomModels           []modelEntry    `json:"customModels"`
-	SessionDefaultSettings sessionSettings `json:"sessionDefaultSettings"`
-}
-
-type sessionSettings struct {
-	Model           string `json:"model"`
-	ReasoningEffort string `json:"reasoningEffort"`
-}
-
-type modelEntry struct {
-	Model           string `json:"model"`
-	DisplayName     string `json:"displayName"`
-	BaseURL         string `json:"baseUrl"`
-	APIKey          string `json:"apiKey"`
-	Provider        string `json:"provider"`
-	MaxOutputTokens int    `json:"maxOutputTokens"`
-	SupportsImages  bool   `json:"supportsImages"`
-	ID              string `json:"id"`
-	Index           int    `json:"index"`
-}
-
-func (d *Droid) String() string { return "Droid" }
-
-func (d *Droid) Run(model string, args []string) error {
-	if _, err := exec.LookPath("droid"); err != nil {
-		return fmt.Errorf("droid is not installed, install from https://docs.factory.ai/cli/getting-started/quickstart")
-	}
-
-	// Call Edit() to ensure config is up-to-date before launch
-	models := []string{model}
-	if config, err := loadIntegration("droid"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := d.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	cmd := exec.Command("droid", args...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func (d *Droid) Paths() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-	p := filepath.Join(home, ".factory", "settings.json")
-	if _, err := os.Stat(p); err == nil {
-		return []string{p}
-	}
-	return nil
-}
-
-func (d *Droid) Edit(models []string) error {
-	if len(models) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	settingsPath := filepath.Join(home, ".factory", "settings.json")
-	if err := os.MkdirAll(filepath.Dir(settingsPath), 0o755); err != nil {
-		return err
-	}
-
-	// Read file once, unmarshal twice:
-	// map preserves unknown fields for writing back (including extra fields in model entries)
-	settingsMap := make(map[string]any)
-	var settings droidSettings
-	if data, err := os.ReadFile(settingsPath); err == nil {
-		if err := json.Unmarshal(data, &settingsMap); err != nil {
-			return fmt.Errorf("failed to parse settings file: %w, at: %s", err, settingsPath)
-		}
-		json.Unmarshal(data, &settings) // ignore error, zero values are fine
-	}
-
-	// Keep only non-Ollama models from the raw map (preserves extra fields)
-	// Rebuild Ollama models
-	var nonOllamaModels []any
-	if rawModels, ok := settingsMap["customModels"].([]any); ok {
-		for _, raw := range rawModels {
-			if m, ok := raw.(map[string]any); ok {
-				if m["apiKey"] != "ollama" {
-					nonOllamaModels = append(nonOllamaModels, raw)
-				}
-			}
-		}
-	}
-
-	// Build new Ollama model entries with sequential indices (0, 1, 2, ...)
-	var newModels []any
-	var defaultModelID string
-	for i, model := range models {
-		modelID := fmt.Sprintf("custom:%s-%d", model, i)
-		newModels = append(newModels, modelEntry{
-			Model:           model,
-			DisplayName:     model,
-			BaseURL:         envconfig.Host().String() + "/v1",
-			APIKey:          "ollama",
-			Provider:        "generic-chat-completion-api",
-			MaxOutputTokens: 64000,
-			SupportsImages:  false,
-			ID:              modelID,
-			Index:           i,
-		})
-		if i == 0 {
-			defaultModelID = modelID
-		}
-	}
-
-	settingsMap["customModels"] = append(newModels, nonOllamaModels...)
-
-	// Update session default settings (preserve unknown fields in the nested object)
-	sessionSettings, ok := settingsMap["sessionDefaultSettings"].(map[string]any)
-	if !ok {
-		sessionSettings = make(map[string]any)
-	}
-	sessionSettings["model"] = defaultModelID
-
-	if !isValidReasoningEffort(settings.SessionDefaultSettings.ReasoningEffort) {
-		sessionSettings["reasoningEffort"] = "none"
-	}
-
-	settingsMap["sessionDefaultSettings"] = sessionSettings
-
-	data, err := json.MarshalIndent(settingsMap, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(settingsPath, data)
-}
-
-func (d *Droid) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	data, err := os.ReadFile(filepath.Join(home, ".factory", "settings.json"))
-	if err != nil {
-		return nil
-	}
-
-	var settings droidSettings
-	if err := json.Unmarshal(data, &settings); err != nil {
-		return nil
-	}
-
-	var result []string
-	for _, m := range settings.CustomModels {
-		if m.APIKey == "ollama" {
-			result = append(result, m.Model)
-		}
-	}
-	return result
-}
-
-var validReasoningEfforts = []string{"high", "medium", "low", "none"}
-
-func isValidReasoningEffort(effort string) bool {
-	return slices.Contains(validReasoningEfforts, effort)
-}
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
--- a/cmd/config/files.go
+++ b/cmd/config/files.go
@@ -1,99 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"time"
-)
-
-func readJSONFile(path string) (map[string]any, error) {
-	data, err := os.ReadFile(path)
-	if err != nil {
-		return nil, err
-	}
-	var result map[string]any
-	if err := json.Unmarshal(data, &result); err != nil {
-		return nil, err
-	}
-	return result, nil
-}
-
-func copyFile(src, dst string) error {
-	info, err := os.Stat(src)
-	if err != nil {
-		return err
-	}
-	data, err := os.ReadFile(src)
-	if err != nil {
-		return err
-	}
-	return os.WriteFile(dst, data, info.Mode().Perm())
-}
-
-func backupDir() string {
-	return filepath.Join(os.TempDir(), "ollama-backups")
-}
-
-func backupToTmp(srcPath string) (string, error) {
-	dir := backupDir()
-	if err := os.MkdirAll(dir, 0o755); err != nil {
-		return "", err
-	}
-
-	backupPath := filepath.Join(dir, fmt.Sprintf("%s.%d", filepath.Base(srcPath), time.Now().Unix()))
-	if err := copyFile(srcPath, backupPath); err != nil {
-		return "", err
-	}
-	return backupPath, nil
-}
-
-// writeWithBackup writes data to path via temp file + rename, backing up any existing file first
-func writeWithBackup(path string, data []byte) error {
-	var backupPath string
-	// backup must be created before any writes to the target file
-	if existingContent, err := os.ReadFile(path); err == nil {
-		if !bytes.Equal(existingContent, data) {
-			backupPath, err = backupToTmp(path)
-			if err != nil {
-				return fmt.Errorf("backup failed: %w", err)
-			}
-		}
-	} else if !os.IsNotExist(err) {
-		return fmt.Errorf("read existing file: %w", err)
-	}
-
-	dir := filepath.Dir(path)
-	tmp, err := os.CreateTemp(dir, ".tmp-*")
-	if err != nil {
-		return fmt.Errorf("create temp failed: %w", err)
-	}
-	tmpPath := tmp.Name()
-
-	if _, err := tmp.Write(data); err != nil {
-		_ = tmp.Close()
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("write failed: %w", err)
-	}
-	if err := tmp.Sync(); err != nil {
-		_ = tmp.Close()
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("sync failed: %w", err)
-	}
-	if err := tmp.Close(); err != nil {
-		_ = os.Remove(tmpPath)
-		return fmt.Errorf("close failed: %w", err)
-	}
-
-	if err := os.Rename(tmpPath, path); err != nil {
-		_ = os.Remove(tmpPath)
-		if backupPath != "" {
-			_ = copyFile(backupPath, path)
-		}
-		return fmt.Errorf("rename failed: %w", err)
-	}
-
-	return nil
-}
--- a/cmd/config/files_test.go
+++ b/cmd/config/files_test.go
@@ -1,502 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"runtime"
-	"testing"
-)
-
-func mustMarshal(t *testing.T, v any) []byte {
-	t.Helper()
-	data, err := json.MarshalIndent(v, "", "  ")
-	if err != nil {
-		t.Fatal(err)
-	}
-	return data
-}
-
-func TestWriteWithBackup(t *testing.T) {
-	tmpDir := t.TempDir()
-
-	t.Run("creates file", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "new.json")
-		data := mustMarshal(t, map[string]string{"key": "value"})
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		content, err := os.ReadFile(path)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		var result map[string]string
-		if err := json.Unmarshal(content, &result); err != nil {
-			t.Fatal(err)
-		}
-		if result["key"] != "value" {
-			t.Errorf("expected value, got %s", result["key"])
-		}
-	})
-
-	t.Run("creates backup in /tmp/ollama-backups", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "backup.json")
-
-		os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-
-		data := mustMarshal(t, map[string]bool{"updated": true})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, err := os.ReadDir(backupDir())
-		if err != nil {
-			t.Fatal("backup directory not created")
-		}
-
-		var foundBackup bool
-		for _, entry := range entries {
-			if filepath.Ext(entry.Name()) != ".json" {
-				name := entry.Name()
-				if len(name) > len("backup.json.") && name[:len("backup.json.")] == "backup.json." {
-					backupPath := filepath.Join(backupDir(), name)
-					backup, err := os.ReadFile(backupPath)
-					if err == nil {
-						var backupData map[string]bool
-						json.Unmarshal(backup, &backupData)
-						if backupData["original"] {
-							foundBackup = true
-							os.Remove(backupPath)
-							break
-						}
-					}
-				}
-			}
-		}
-
-		if !foundBackup {
-			t.Error("backup file not created in /tmp/ollama-backups")
-		}
-
-		current, _ := os.ReadFile(path)
-		var currentData map[string]bool
-		json.Unmarshal(current, &currentData)
-		if !currentData["updated"] {
-			t.Error("file doesn't contain updated data")
-		}
-	})
-
-	t.Run("no backup for new file", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "nobak.json")
-
-		data := mustMarshal(t, map[string]string{"new": "file"})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, _ := os.ReadDir(backupDir())
-		for _, entry := range entries {
-			if len(entry.Name()) > len("nobak.json.") && entry.Name()[:len("nobak.json.")] == "nobak.json." {
-				t.Error("backup should not exist for new file")
-			}
-		}
-	})
-
-	t.Run("no backup when content unchanged", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "unchanged.json")
-
-		data := mustMarshal(t, map[string]string{"key": "value"})
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries1, _ := os.ReadDir(backupDir())
-		countBefore := 0
-		for _, e := range entries1 {
-			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
-				countBefore++
-			}
-		}
-
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries2, _ := os.ReadDir(backupDir())
-		countAfter := 0
-		for _, e := range entries2 {
-			if len(e.Name()) > len("unchanged.json.") && e.Name()[:len("unchanged.json.")] == "unchanged.json." {
-				countAfter++
-			}
-		}
-
-		if countAfter != countBefore {
-			t.Errorf("backup was created when content unchanged (before=%d, after=%d)", countBefore, countAfter)
-		}
-	})
-
-	t.Run("backup filename contains unix timestamp", func(t *testing.T) {
-		path := filepath.Join(tmpDir, "timestamped.json")
-
-		os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
-		data := mustMarshal(t, map[string]int{"v": 2})
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatal(err)
-		}
-
-		entries, _ := os.ReadDir(backupDir())
-		var found bool
-		for _, entry := range entries {
-			name := entry.Name()
-			if len(name) > len("timestamped.json.") && name[:len("timestamped.json.")] == "timestamped.json." {
-				timestamp := name[len("timestamped.json."):]
-				for _, c := range timestamp {
-					if c < '0' || c > '9' {
-						t.Errorf("backup filename timestamp contains non-numeric character: %s", name)
-					}
-				}
-				found = true
-				os.Remove(filepath.Join(backupDir(), name))
-				break
-			}
-		}
-		if !found {
-			t.Error("backup file with timestamp not found")
-		}
-	})
-}
-
-// Edge case tests for files.go
-
-// TestWriteWithBackup_FailsIfBackupFails documents critical behavior: if backup fails, we must not proceed.
-// User could lose their config with no way to recover.
-func TestWriteWithBackup_FailsIfBackupFails(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "config.json")
-
-	// Create original file
-	originalContent := []byte(`{"original": true}`)
-	os.WriteFile(path, originalContent, 0o644)
-
-	// Make backup directory read-only to force backup failure
-	backupDir := backupDir()
-	os.MkdirAll(backupDir, 0o755)
-	os.Chmod(backupDir, 0o444) // Read-only
-	defer os.Chmod(backupDir, 0o755)
-
-	newContent := []byte(`{"updated": true}`)
-	err := writeWithBackup(path, newContent)
-
-	// Should fail because backup couldn't be created
-	if err == nil {
-		t.Error("expected error when backup fails, got nil")
-	}
-
-	// Original file should be preserved
-	current, _ := os.ReadFile(path)
-	if string(current) != string(originalContent) {
-		t.Errorf("original file was modified despite backup failure: got %s", string(current))
-	}
-}
-
-// TestWriteWithBackup_PermissionDenied verifies clear error when target file has wrong permissions.
-// Common issue when config owned by root or wrong perms.
-func TestWriteWithBackup_PermissionDenied(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-
-	// Create a read-only directory
-	readOnlyDir := filepath.Join(tmpDir, "readonly")
-	os.MkdirAll(readOnlyDir, 0o755)
-	os.Chmod(readOnlyDir, 0o444)
-	defer os.Chmod(readOnlyDir, 0o755)
-
-	path := filepath.Join(readOnlyDir, "config.json")
-	err := writeWithBackup(path, []byte(`{"test": true}`))
-
-	if err == nil {
-		t.Error("expected permission error, got nil")
-	}
-}
-
-// TestWriteWithBackup_DirectoryDoesNotExist verifies behavior when target directory doesn't exist.
-// writeWithBackup doesn't create directories - caller is responsible.
-func TestWriteWithBackup_DirectoryDoesNotExist(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "nonexistent", "subdir", "config.json")
-
-	err := writeWithBackup(path, []byte(`{"test": true}`))
-
-	// Should fail because directory doesn't exist
-	if err == nil {
-		t.Error("expected error for nonexistent directory, got nil")
-	}
-}
-
-// TestWriteWithBackup_SymlinkTarget documents behavior when target is a symlink.
-// Documents what happens if user symlinks their config file.
-func TestWriteWithBackup_SymlinkTarget(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("symlink tests may require admin on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	realFile := filepath.Join(tmpDir, "real.json")
-	symlink := filepath.Join(tmpDir, "link.json")
-
-	// Create real file and symlink
-	os.WriteFile(realFile, []byte(`{"v": 1}`), 0o644)
-	os.Symlink(realFile, symlink)
-
-	// Write through symlink
-	err := writeWithBackup(symlink, []byte(`{"v": 2}`))
-	if err != nil {
-		t.Fatalf("writeWithBackup through symlink failed: %v", err)
-	}
-
-	// The real file should be updated (symlink followed for temp file creation)
-	content, _ := os.ReadFile(symlink)
-	if string(content) != `{"v": 2}` {
-		t.Errorf("symlink target not updated correctly: got %s", string(content))
-	}
-}
-
-// TestBackupToTmp_SpecialCharsInFilename verifies backup works with special characters.
-// User may have config files with unusual names.
-func TestBackupToTmp_SpecialCharsInFilename(t *testing.T) {
-	tmpDir := t.TempDir()
-
-	// File with spaces and special chars
-	path := filepath.Join(tmpDir, "my config (backup).json")
-	os.WriteFile(path, []byte(`{"test": true}`), 0o644)
-
-	backupPath, err := backupToTmp(path)
-	if err != nil {
-		t.Fatalf("backupToTmp with special chars failed: %v", err)
-	}
-
-	// Verify backup exists and has correct content
-	content, err := os.ReadFile(backupPath)
-	if err != nil {
-		t.Fatalf("could not read backup: %v", err)
-	}
-	if string(content) != `{"test": true}` {
-		t.Errorf("backup content mismatch: got %s", string(content))
-	}
-
-	os.Remove(backupPath)
-}
-
-// TestCopyFile_PreservesPermissions verifies that copyFile preserves file permissions.
-func TestCopyFile_PreservesPermissions(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission preservation tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	src := filepath.Join(tmpDir, "src.json")
-	dst := filepath.Join(tmpDir, "dst.json")
-
-	// Create source with specific permissions
-	os.WriteFile(src, []byte(`{"test": true}`), 0o600)
-
-	err := copyFile(src, dst)
-	if err != nil {
-		t.Fatalf("copyFile failed: %v", err)
-	}
-
-	srcInfo, _ := os.Stat(src)
-	dstInfo, _ := os.Stat(dst)
-
-	if srcInfo.Mode().Perm() != dstInfo.Mode().Perm() {
-		t.Errorf("permissions not preserved: src=%v, dst=%v", srcInfo.Mode().Perm(), dstInfo.Mode().Perm())
-	}
-}
-
-// TestCopyFile_SourceNotFound verifies clear error when source doesn't exist.
-func TestCopyFile_SourceNotFound(t *testing.T) {
-	tmpDir := t.TempDir()
-	src := filepath.Join(tmpDir, "nonexistent.json")
-	dst := filepath.Join(tmpDir, "dst.json")
-
-	err := copyFile(src, dst)
-	if err == nil {
-		t.Error("expected error for nonexistent source, got nil")
-	}
-}
-
-// TestWriteWithBackup_TargetIsDirectory verifies error when path points to a directory.
-func TestWriteWithBackup_TargetIsDirectory(t *testing.T) {
-	tmpDir := t.TempDir()
-	dirPath := filepath.Join(tmpDir, "actualdir")
-	os.MkdirAll(dirPath, 0o755)
-
-	err := writeWithBackup(dirPath, []byte(`{"test": true}`))
-	if err == nil {
-		t.Error("expected error when target is a directory, got nil")
-	}
-}
-
-// TestWriteWithBackup_EmptyData verifies writing zero bytes works correctly.
-func TestWriteWithBackup_EmptyData(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "empty.json")
-
-	err := writeWithBackup(path, []byte{})
-	if err != nil {
-		t.Fatalf("writeWithBackup with empty data failed: %v", err)
-	}
-
-	content, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatalf("could not read file: %v", err)
-	}
-	if len(content) != 0 {
-		t.Errorf("expected empty file, got %d bytes", len(content))
-	}
-}
-
-// TestWriteWithBackup_FileUnreadableButDirWritable verifies behavior when existing file
-// cannot be read (for backup comparison) but directory is writable.
-func TestWriteWithBackup_FileUnreadableButDirWritable(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "unreadable.json")
-
-	// Create file and make it unreadable
-	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-	os.Chmod(path, 0o000)
-	defer os.Chmod(path, 0o644)
-
-	// Should fail because we can't read the file to compare/backup
-	err := writeWithBackup(path, []byte(`{"updated": true}`))
-	if err == nil {
-		t.Error("expected error when file is unreadable, got nil")
-	}
-}
-
-// TestWriteWithBackup_RapidSuccessiveWrites verifies backup works with multiple writes
-// within the same second (timestamp collision scenario).
-func TestWriteWithBackup_RapidSuccessiveWrites(t *testing.T) {
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "rapid.json")
-
-	// Create initial file
-	os.WriteFile(path, []byte(`{"v": 0}`), 0o644)
-
-	// Rapid successive writes
-	for i := 1; i <= 3; i++ {
-		data := []byte(fmt.Sprintf(`{"v": %d}`, i))
-		if err := writeWithBackup(path, data); err != nil {
-			t.Fatalf("write %d failed: %v", i, err)
-		}
-	}
-
-	// Verify final content
-	content, _ := os.ReadFile(path)
-	if string(content) != `{"v": 3}` {
-		t.Errorf("expected final content {\"v\": 3}, got %s", string(content))
-	}
-
-	// Verify at least one backup exists
-	entries, _ := os.ReadDir(backupDir())
-	var backupCount int
-	for _, e := range entries {
-		if len(e.Name()) > len("rapid.json.") && e.Name()[:len("rapid.json.")] == "rapid.json." {
-			backupCount++
-		}
-	}
-	if backupCount == 0 {
-		t.Error("expected at least one backup file from rapid writes")
-	}
-}
-
-// TestWriteWithBackup_BackupDirIsFile verifies error when backup directory path is a file.
-func TestWriteWithBackup_BackupDirIsFile(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("test modifies system temp directory")
-	}
-
-	// Create a file at the backup directory path
-	backupPath := backupDir()
-	// Clean up any existing directory first
-	os.RemoveAll(backupPath)
-	// Create a file instead of directory
-	os.WriteFile(backupPath, []byte("not a directory"), 0o644)
-	defer func() {
-		os.Remove(backupPath)
-		os.MkdirAll(backupPath, 0o755)
-	}()
-
-	tmpDir := t.TempDir()
-	path := filepath.Join(tmpDir, "test.json")
-	os.WriteFile(path, []byte(`{"original": true}`), 0o644)
-
-	err := writeWithBackup(path, []byte(`{"updated": true}`))
-	if err == nil {
-		t.Error("expected error when backup dir is a file, got nil")
-	}
-}
-
-// TestWriteWithBackup_NoOrphanTempFiles verifies temp files are cleaned up on failure.
-func TestWriteWithBackup_NoOrphanTempFiles(t *testing.T) {
-	if runtime.GOOS == "windows" {
-		t.Skip("permission tests unreliable on Windows")
-	}
-
-	tmpDir := t.TempDir()
-
-	// Count existing temp files
-	countTempFiles := func() int {
-		entries, _ := os.ReadDir(tmpDir)
-		count := 0
-		for _, e := range entries {
-			if len(e.Name()) > 4 && e.Name()[:4] == ".tmp" {
-				count++
-			}
-		}
-		return count
-	}
-
-	before := countTempFiles()
-
-	// Create a file, then make directory read-only to cause rename failure
-	path := filepath.Join(tmpDir, "orphan.json")
-	os.WriteFile(path, []byte(`{"v": 1}`), 0o644)
-
-	// Make a subdirectory and try to write there after making parent read-only
-	subDir := filepath.Join(tmpDir, "subdir")
-	os.MkdirAll(subDir, 0o755)
-	subPath := filepath.Join(subDir, "config.json")
-	os.WriteFile(subPath, []byte(`{"v": 1}`), 0o644)
-
-	// Make subdir read-only after creating temp file would succeed but rename would fail
-	// This is tricky to test - the temp file is created in the same dir, so if we can't
-	// rename, we also couldn't create. Let's just verify normal failure cleanup works.
-
-	// Force a failure by making the target a directory
-	badPath := filepath.Join(tmpDir, "isdir")
-	os.MkdirAll(badPath, 0o755)
-
-	_ = writeWithBackup(badPath, []byte(`{"test": true}`))
-
-	after := countTempFiles()
-	if after > before {
-		t.Errorf("orphan temp files left behind: before=%d, after=%d", before, after)
-	}
-}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -1,531 +0,0 @@
-package config
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"maps"
-	"os"
-	"os/exec"
-	"runtime"
-	"slices"
-	"strings"
-	"time"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/progress"
-	"github.com/spf13/cobra"
-)
-
-// Runners execute the launching of a model with the integration - claude, codex
-// Editors can edit config files (supports multi-model selection) - opencode, droid
-// They are composable interfaces where in some cases an editor is also a runner - opencode, droid
-// Runner can run an integration with a model.
-
-type Runner interface {
-	Run(model string, args []string) error
-	// String returns the human-readable name of the integration
-	String() string
-}
-
-// Editor can edit config files (supports multi-model selection)
-type Editor interface {
-	// Paths returns the paths to the config files for the integration
-	Paths() []string
-	// Edit updates the config files for the integration with the given models
-	Edit(models []string) error
-	// Models returns the models currently configured for the integration
-	// TODO(parthsareen): add error return to Models()
-	Models() []string
-}
-
-// integrations is the registry of available integrations.
-var integrations = map[string]Runner{
-	"claude":   &Claude{},
-	"clawdbot": &Openclaw{},
-	"codex":    &Codex{},
-	"moltbot":  &Openclaw{},
-	"droid":    &Droid{},
-	"opencode": &OpenCode{},
-	"openclaw": &Openclaw{},
-}
-
-// recommendedModels are shown when the user has no models or as suggestions.
-// Order matters: local models first, then cloud models.
-var recommendedModels = []selectItem{
-	{Name: "glm-4.7-flash", Description: "Recommended (requires ~25GB VRAM)"},
-	{Name: "qwen3:8b", Description: "Recommended (requires ~11GB VRAM)"},
-	{Name: "glm-4.7:cloud", Description: "Recommended"},
-	{Name: "kimi-k2.5:cloud", Description: "Recommended"},
-}
-
-// integrationAliases are hidden from the interactive selector but work as CLI arguments.
-var integrationAliases = map[string]bool{
-	"clawdbot": true,
-	"moltbot":  true,
-}
-
-func selectIntegration() (string, error) {
-	if len(integrations) == 0 {
-		return "", fmt.Errorf("no integrations available")
-	}
-
-	names := slices.Sorted(maps.Keys(integrations))
-	var items []selectItem
-	for _, name := range names {
-		if integrationAliases[name] {
-			continue
-		}
-		r := integrations[name]
-		description := r.String()
-		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
-			description = fmt.Sprintf("%s (%s)", r.String(), conn.Models[0])
-		}
-		items = append(items, selectItem{Name: name, Description: description})
-	}
-
-	return selectPrompt("Select integration:", items)
-}
-
-// selectModels lets the user select models for an integration
-func selectModels(ctx context.Context, name, current string) ([]string, error) {
-	r, ok := integrations[name]
-	if !ok {
-		return nil, fmt.Errorf("unknown integration: %s", name)
-	}
-
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		return nil, err
-	}
-
-	models, err := client.List(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	var existing []modelInfo
-	for _, m := range models.Models {
-		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
-	}
-
-	var preChecked []string
-	if saved, err := loadIntegration(name); err == nil {
-		preChecked = saved.Models
-	} else if editor, ok := r.(Editor); ok {
-		preChecked = editor.Models()
-	}
-
-	items, preChecked, existingModels, cloudModels := buildModelList(existing, preChecked, current)
-
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no models available")
-	}
-
-	var selected []string
-	if _, ok := r.(Editor); ok {
-		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		model, err := selectPrompt(fmt.Sprintf("Select model for %s:", r), items)
-		if err != nil {
-			return nil, err
-		}
-		selected = []string{model}
-	}
-
-	var toPull []string
-	for _, m := range selected {
-		if !existingModels[m] {
-			toPull = append(toPull, m)
-		}
-	}
-	if len(toPull) > 0 {
-		msg := fmt.Sprintf("Download %s?", strings.Join(toPull, ", "))
-		if ok, err := confirmPrompt(msg); err != nil {
-			return nil, err
-		} else if !ok {
-			return nil, errCancelled
-		}
-		for _, m := range toPull {
-			fmt.Fprintf(os.Stderr, "\n")
-			if err := pullModel(ctx, client, m); err != nil {
-				return nil, fmt.Errorf("failed to pull %s: %w", m, err)
-			}
-		}
-	}
-
-	var selectedCloudModels []string
-	for _, m := range selected {
-		if cloudModels[m] {
-			selectedCloudModels = append(selectedCloudModels, m)
-		}
-	}
-	if len(selectedCloudModels) > 0 {
-		// ensure user is signed in
-		user, err := client.Whoami(ctx)
-		if err == nil && user != nil && user.Name != "" {
-			return selected, nil
-		}
-
-		var aErr api.AuthorizationError
-		if !errors.As(err, &aErr) || aErr.SigninURL == "" {
-			return nil, err
-		}
-
-		modelList := strings.Join(selectedCloudModels, ", ")
-		yes, err := confirmPrompt(fmt.Sprintf("sign in to use %s?", modelList))
-		if err != nil || !yes {
-			return nil, fmt.Errorf("%s requires sign in", modelList)
-		}
-
-		fmt.Fprintf(os.Stderr, "\nTo sign in, navigate to:\n    %s\n\n", aErr.SigninURL)
-
-		// TODO(parthsareen): extract into auth package for cmd
-		// Auto-open browser (best effort, fail silently)
-		switch runtime.GOOS {
-		case "darwin":
-			_ = exec.Command("open", aErr.SigninURL).Start()
-		case "linux":
-			_ = exec.Command("xdg-open", aErr.SigninURL).Start()
-		case "windows":
-			_ = exec.Command("rundll32", "url.dll,FileProtocolHandler", aErr.SigninURL).Start()
-		}
-
-		spinnerFrames := []string{"|", "/", "-", "\\"}
-		frame := 0
-
-		fmt.Fprintf(os.Stderr, "\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[0])
-
-		ticker := time.NewTicker(200 * time.Millisecond)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ctx.Done():
-				fmt.Fprintf(os.Stderr, "\r\033[K")
-				return nil, ctx.Err()
-			case <-ticker.C:
-				frame++
-				fmt.Fprintf(os.Stderr, "\r\033[90mwaiting for sign in to complete... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
-
-				// poll every 10th frame (~2 seconds)
-				if frame%10 == 0 {
-					u, err := client.Whoami(ctx)
-					if err == nil && u != nil && u.Name != "" {
-						fmt.Fprintf(os.Stderr, "\r\033[K\033[A\r\033[K\033[1msigned in:\033[0m %s\n", u.Name)
-						return selected, nil
-					}
-				}
-			}
-		}
-	}
-
-	return selected, nil
-}
-
-func runIntegration(name, modelName string, args []string) error {
-	r, ok := integrations[name]
-	if !ok {
-		return fmt.Errorf("unknown integration: %s", name)
-	}
-	fmt.Fprintf(os.Stderr, "\nLaunching %s with %s...\n", r, modelName)
-	return r.Run(modelName, args)
-}
-
-// LaunchCmd returns the cobra command for launching integrations.
-func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) error) *cobra.Command {
-	var modelFlag string
-	var configFlag bool
-
-	cmd := &cobra.Command{
-		Use:   "launch [INTEGRATION] [-- [EXTRA_ARGS...]]",
-		Short: "Launch an integration with Ollama",
-		Long: `Launch an integration configured with Ollama models.
-
-Supported integrations:
-  claude    Claude Code
-  codex     Codex
-  droid     Droid
-  opencode  OpenCode
-  openclaw  OpenClaw (aliases: clawdbot, moltbot)
-
-Examples:
-  ollama launch
-  ollama launch claude
-  ollama launch claude --model <model>
-  ollama launch droid --config (does not auto-launch)
-  ollama launch codex -- -p myprofile (pass extra args to integration)
-  ollama launch codex -- --sandbox workspace-write`,
-		Args:    cobra.ArbitraryArgs,
-		PreRunE: checkServerHeartbeat,
-		RunE: func(cmd *cobra.Command, args []string) error {
-			// Extract integration name and args to pass through using -- separator
-			var name string
-			var passArgs []string
-			dashIdx := cmd.ArgsLenAtDash()
-
-			if dashIdx == -1 {
-				// No "--" separator: only allow 0 or 1 args (integration name)
-				if len(args) > 1 {
-					return fmt.Errorf("unexpected arguments: %v\nUse '--' to pass extra arguments to the integration", args[1:])
-				}
-				if len(args) == 1 {
-					name = args[0]
-				}
-			} else {
-				// "--" was used: args before it = integration name, args after = passthrough
-				if dashIdx > 1 {
-					return fmt.Errorf("expected at most 1 integration name before '--', got %d", dashIdx)
-				}
-				if dashIdx == 1 {
-					name = args[0]
-				}
-				passArgs = args[dashIdx:]
-			}
-
-			if name == "" {
-				var err error
-				name, err = selectIntegration()
-				if errors.Is(err, errCancelled) {
-					return nil
-				}
-				if err != nil {
-					return err
-				}
-			}
-
-			r, ok := integrations[strings.ToLower(name)]
-			if !ok {
-				return fmt.Errorf("unknown integration: %s", name)
-			}
-
-			if !configFlag && modelFlag == "" {
-				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
-					return runIntegration(name, config.Models[0], passArgs)
-				}
-			}
-
-			var models []string
-			if modelFlag != "" {
-				models = []string{modelFlag}
-				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
-					for _, m := range existing.Models {
-						if m != modelFlag {
-							models = append(models, m)
-						}
-					}
-				}
-			} else {
-				var err error
-				models, err = selectModels(cmd.Context(), name, "")
-				if errors.Is(err, errCancelled) {
-					return nil
-				}
-				if err != nil {
-					return err
-				}
-			}
-
-			if editor, isEditor := r.(Editor); isEditor {
-				paths := editor.Paths()
-				if len(paths) > 0 {
-					fmt.Fprintf(os.Stderr, "This will modify your %s configuration:\n", r)
-					for _, p := range paths {
-						fmt.Fprintf(os.Stderr, "  %s\n", p)
-					}
-					fmt.Fprintf(os.Stderr, "Backups will be saved to %s/\n\n", backupDir())
-
-					if ok, _ := confirmPrompt("Proceed?"); !ok {
-						return nil
-					}
-				}
-			}
-
-			if err := saveIntegration(name, models); err != nil {
-				return fmt.Errorf("failed to save: %w", err)
-			}
-
-			if editor, isEditor := r.(Editor); isEditor {
-				if err := editor.Edit(models); err != nil {
-					return fmt.Errorf("setup failed: %w", err)
-				}
-			}
-
-			if _, isEditor := r.(Editor); isEditor {
-				if len(models) == 1 {
-					fmt.Fprintf(os.Stderr, "Added %s to %s\n", models[0], r)
-				} else {
-					fmt.Fprintf(os.Stderr, "Added %d models to %s (default: %s)\n", len(models), r, models[0])
-				}
-			}
-
-			if configFlag {
-				if launch, _ := confirmPrompt(fmt.Sprintf("\nLaunch %s now?", r)); launch {
-					return runIntegration(name, models[0], passArgs)
-				}
-				fmt.Fprintf(os.Stderr, "Run 'ollama launch %s' to start with %s\n", strings.ToLower(name), models[0])
-				return nil
-			}
-
-			return runIntegration(name, models[0], passArgs)
-		},
-	}
-
-	cmd.Flags().StringVar(&modelFlag, "model", "", "Model to use")
-	cmd.Flags().BoolVar(&configFlag, "config", false, "Configure without launching")
-	return cmd
-}
-
-type modelInfo struct {
-	Name   string
-	Remote bool
-}
-
-// buildModelList merges existing models with recommendations, sorts them, and returns
-// the ordered items along with maps of existing and cloud model names.
-func buildModelList(existing []modelInfo, preChecked []string, current string) (items []selectItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
-	existingModels = make(map[string]bool)
-	cloudModels = make(map[string]bool)
-	recommended := make(map[string]bool)
-	var hasLocalModel, hasCloudModel bool
-
-	for _, rec := range recommendedModels {
-		recommended[rec.Name] = true
-	}
-
-	for _, m := range existing {
-		existingModels[m.Name] = true
-		if m.Remote {
-			cloudModels[m.Name] = true
-			hasCloudModel = true
-		} else {
-			hasLocalModel = true
-		}
-		displayName := strings.TrimSuffix(m.Name, ":latest")
-		existingModels[displayName] = true
-		item := selectItem{Name: displayName}
-		if recommended[displayName] {
-			item.Description = "recommended"
-		}
-		items = append(items, item)
-	}
-
-	for _, rec := range recommendedModels {
-		if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
-			continue
-		}
-		items = append(items, rec)
-		if isCloudModel(rec.Name) {
-			cloudModels[rec.Name] = true
-		}
-	}
-
-	checked := make(map[string]bool, len(preChecked))
-	for _, n := range preChecked {
-		checked[n] = true
-	}
-
-	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
-	for _, item := range items {
-		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
-			current = item.Name
-			break
-		}
-	}
-
-	if checked[current] {
-		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
-	}
-
-	// Non-existing models get "install?" suffix and are pushed to the bottom.
-	// When user has no models, preserve recommended order.
-	notInstalled := make(map[string]bool)
-	for i := range items {
-		if !existingModels[items[i].Name] {
-			notInstalled[items[i].Name] = true
-			if items[i].Description != "" {
-				items[i].Description += ", install?"
-			} else {
-				items[i].Description = "install?"
-			}
-		}
-	}
-
-	if hasLocalModel || hasCloudModel {
-		slices.SortStableFunc(items, func(a, b selectItem) int {
-			ac, bc := checked[a.Name], checked[b.Name]
-			aNew, bNew := notInstalled[a.Name], notInstalled[b.Name]
-
-			if ac != bc {
-				if ac {
-					return -1
-				}
-				return 1
-			}
-			if !ac && !bc && aNew != bNew {
-				if aNew {
-					return 1
-				}
-				return -1
-			}
-			return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
-		})
-	}
-
-	return items, preChecked, existingModels, cloudModels
-}
-
-func isCloudModel(name string) bool {
-	return strings.HasSuffix(name, ":cloud")
-}
-
-func pullModel(ctx context.Context, client *api.Client, model string) error {
-	p := progress.NewProgress(os.Stderr)
-	defer p.Stop()
-
-	bars := make(map[string]*progress.Bar)
-	var status string
-	var spinner *progress.Spinner
-
-	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != "" {
-			if resp.Completed == 0 {
-				return nil
-			}
-
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			bar, ok := bars[resp.Digest]
-			if !ok {
-				name, isDigest := strings.CutPrefix(resp.Digest, "sha256:")
-				name = strings.TrimSpace(name)
-				if isDigest {
-					name = name[:min(12, len(name))]
-				}
-				bar = progress.NewBar(fmt.Sprintf("pulling %s:", name), resp.Total, resp.Completed)
-				bars[resp.Digest] = bar
-				p.Add(resp.Digest, bar)
-			}
-
-			bar.Set(resp.Completed)
-		} else if status != resp.Status {
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			status = resp.Status
-			spinner = progress.NewSpinner(status)
-			p.Add(status, spinner)
-		}
-
-		return nil
-	}
-
-	request := api.PullRequest{Name: model}
-	return client.Pull(ctx, &request, fn)
-}
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -1,511 +0,0 @@
-package config
-
-import (
-	"fmt"
-	"slices"
-	"strings"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/spf13/cobra"
-)
-
-func TestIntegrationLookup(t *testing.T) {
-	tests := []struct {
-		name      string
-		input     string
-		wantFound bool
-		wantName  string
-	}{
-		{"claude lowercase", "claude", true, "Claude Code"},
-		{"claude uppercase", "CLAUDE", true, "Claude Code"},
-		{"claude mixed case", "Claude", true, "Claude Code"},
-		{"codex", "codex", true, "Codex"},
-		{"droid", "droid", true, "Droid"},
-		{"opencode", "opencode", true, "OpenCode"},
-		{"unknown integration", "unknown", false, ""},
-		{"empty string", "", false, ""},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			r, found := integrations[strings.ToLower(tt.input)]
-			if found != tt.wantFound {
-				t.Errorf("integrations[%q] found = %v, want %v", tt.input, found, tt.wantFound)
-			}
-			if found && r.String() != tt.wantName {
-				t.Errorf("integrations[%q].String() = %q, want %q", tt.input, r.String(), tt.wantName)
-			}
-		})
-	}
-}
-
-func TestIntegrationRegistry(t *testing.T) {
-	expectedIntegrations := []string{"claude", "codex", "droid", "opencode"}
-
-	for _, name := range expectedIntegrations {
-		t.Run(name, func(t *testing.T) {
-			r, ok := integrations[name]
-			if !ok {
-				t.Fatalf("integration %q not found in registry", name)
-			}
-			if r.String() == "" {
-				t.Error("integration.String() should not be empty")
-			}
-		})
-	}
-}
-
-func TestHasLocalModel(t *testing.T) {
-	tests := []struct {
-		name   string
-		models []string
-		want   bool
-	}{
-		{"empty list", []string{}, false},
-		{"single local model", []string{"llama3.2"}, true},
-		{"single cloud model", []string{"cloud-model"}, false},
-		{"mixed models", []string{"cloud-model", "llama3.2"}, true},
-		{"multiple local models", []string{"llama3.2", "qwen2.5"}, true},
-		{"multiple cloud models", []string{"cloud-a", "cloud-b"}, false},
-		{"local model first", []string{"llama3.2", "cloud-model"}, true},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := slices.ContainsFunc(tt.models, func(m string) bool {
-				return !strings.Contains(m, "cloud")
-			})
-			if got != tt.want {
-				t.Errorf("hasLocalModel(%v) = %v, want %v", tt.models, got, tt.want)
-			}
-		})
-	}
-}
-
-func TestLaunchCmd(t *testing.T) {
-	// Mock checkServerHeartbeat that always succeeds
-	mockCheck := func(cmd *cobra.Command, args []string) error {
-		return nil
-	}
-
-	cmd := LaunchCmd(mockCheck)
-
-	t.Run("command structure", func(t *testing.T) {
-		if cmd.Use != "launch [INTEGRATION] [-- [EXTRA_ARGS...]]" {
-			t.Errorf("Use = %q, want %q", cmd.Use, "launch [INTEGRATION] [-- [EXTRA_ARGS...]]")
-		}
-		if cmd.Short == "" {
-			t.Error("Short description should not be empty")
-		}
-		if cmd.Long == "" {
-			t.Error("Long description should not be empty")
-		}
-	})
-
-	t.Run("flags exist", func(t *testing.T) {
-		modelFlag := cmd.Flags().Lookup("model")
-		if modelFlag == nil {
-			t.Error("--model flag should exist")
-		}
-
-		configFlag := cmd.Flags().Lookup("config")
-		if configFlag == nil {
-			t.Error("--config flag should exist")
-		}
-	})
-
-	t.Run("PreRunE is set", func(t *testing.T) {
-		if cmd.PreRunE == nil {
-			t.Error("PreRunE should be set to checkServerHeartbeat")
-		}
-	})
-}
-
-func TestRunIntegration_UnknownIntegration(t *testing.T) {
-	err := runIntegration("unknown-integration", "model", nil)
-	if err == nil {
-		t.Error("expected error for unknown integration, got nil")
-	}
-	if !strings.Contains(err.Error(), "unknown integration") {
-		t.Errorf("error should mention 'unknown integration', got: %v", err)
-	}
-}
-
-func TestHasLocalModel_DocumentsHeuristic(t *testing.T) {
-	tests := []struct {
-		name   string
-		models []string
-		want   bool
-		reason string
-	}{
-		{"empty list", []string{}, false, "empty list has no local models"},
-		{"contains-cloud-substring", []string{"deepseek-r1:cloud"}, false, "model with 'cloud' substring is considered cloud"},
-		{"cloud-in-name", []string{"my-cloud-model"}, false, "'cloud' anywhere in name = cloud model"},
-		{"cloudless", []string{"cloudless-model"}, false, "'cloudless' still contains 'cloud'"},
-		{"local-model", []string{"llama3.2"}, true, "no 'cloud' = local"},
-		{"mixed", []string{"cloud-model", "llama3.2"}, true, "one local model = hasLocalModel true"},
-		{"all-cloud", []string{"cloud-a", "cloud-b"}, false, "all contain 'cloud'"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := slices.ContainsFunc(tt.models, func(m string) bool {
-				return !strings.Contains(m, "cloud")
-			})
-			if got != tt.want {
-				t.Errorf("hasLocalModel(%v) = %v, want %v (%s)", tt.models, got, tt.want, tt.reason)
-			}
-		})
-	}
-}
-
-func TestLaunchCmd_NilHeartbeat(t *testing.T) {
-	// This should not panic - cmd creation should work even with nil
-	cmd := LaunchCmd(nil)
-	if cmd == nil {
-		t.Fatal("LaunchCmd returned nil")
-	}
-
-	// PreRunE should be nil when passed nil
-	if cmd.PreRunE != nil {
-		t.Log("Note: PreRunE is set even when nil is passed (acceptable)")
-	}
-}
-
-func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
-	for name, r := range integrations {
-		t.Run(name, func(t *testing.T) {
-			displayName := r.String()
-			if displayName == "" {
-				t.Error("String() should not return empty")
-			}
-			var _ func(string, []string) error = r.Run
-		})
-	}
-}
-
-func TestParseArgs(t *testing.T) {
-	// Tests reflect cobra's ArgsLenAtDash() semantics:
-	// - cobra strips "--" from args
-	// - ArgsLenAtDash() returns the index where "--" was, or -1
-	tests := []struct {
-		name     string
-		args     []string // args as cobra delivers them (no "--")
-		dashIdx  int      // what ArgsLenAtDash() returns
-		wantName string
-		wantArgs []string
-		wantErr  bool
-	}{
-		{
-			name:     "no extra args, no dash",
-			args:     []string{"claude"},
-			dashIdx:  -1,
-			wantName: "claude",
-		},
-		{
-			name:     "with extra args after --",
-			args:     []string{"codex", "-p", "myprofile"},
-			dashIdx:  1,
-			wantName: "codex",
-			wantArgs: []string{"-p", "myprofile"},
-		},
-		{
-			name:     "extra args only after --",
-			args:     []string{"codex", "--sandbox", "workspace-write"},
-			dashIdx:  1,
-			wantName: "codex",
-			wantArgs: []string{"--sandbox", "workspace-write"},
-		},
-		{
-			name:     "-- at end with no args after",
-			args:     []string{"claude"},
-			dashIdx:  1,
-			wantName: "claude",
-		},
-		{
-			name:     "-- with no integration name",
-			args:     []string{"--verbose"},
-			dashIdx:  0,
-			wantName: "",
-			wantArgs: []string{"--verbose"},
-		},
-		{
-			name:    "multiple args before -- is error",
-			args:    []string{"claude", "codex", "--verbose"},
-			dashIdx: 2,
-			wantErr: true,
-		},
-		{
-			name:    "multiple args without -- is error",
-			args:    []string{"claude", "codex"},
-			dashIdx: -1,
-			wantErr: true,
-		},
-		{
-			name:     "no args, no dash",
-			args:     []string{},
-			dashIdx:  -1,
-			wantName: "",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Simulate the parsing logic from LaunchCmd using dashIdx
-			var name string
-			var parsedArgs []string
-			var err error
-
-			dashIdx := tt.dashIdx
-			args := tt.args
-
-			if dashIdx == -1 {
-				if len(args) > 1 {
-					err = fmt.Errorf("unexpected arguments: %v", args[1:])
-				} else if len(args) == 1 {
-					name = args[0]
-				}
-			} else {
-				if dashIdx > 1 {
-					err = fmt.Errorf("expected at most 1 integration name before '--', got %d", dashIdx)
-				} else {
-					if dashIdx == 1 {
-						name = args[0]
-					}
-					parsedArgs = args[dashIdx:]
-				}
-			}
-
-			if tt.wantErr {
-				if err == nil {
-					t.Fatal("expected error, got nil")
-				}
-				return
-			}
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-			if name != tt.wantName {
-				t.Errorf("name = %q, want %q", name, tt.wantName)
-			}
-			if !slices.Equal(parsedArgs, tt.wantArgs) {
-				t.Errorf("args = %v, want %v", parsedArgs, tt.wantArgs)
-			}
-		})
-	}
-}
-
-func TestIsCloudModel(t *testing.T) {
-	tests := []struct {
-		name string
-		want bool
-	}{
-		{"glm-4.7:cloud", true},
-		{"kimi-k2.5:cloud", true},
-		{"glm-4.7-flash", false},
-		{"glm-4.7-flash:latest", false},
-		{"cloud-model", false},
-		{"model:cloudish", false},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := isCloudModel(tt.name); got != tt.want {
-				t.Errorf("isCloudModel(%q) = %v, want %v", tt.name, got, tt.want)
-			}
-		})
-	}
-}
-
-func names(items []selectItem) []string {
-	var out []string
-	for _, item := range items {
-		out = append(out, item.Name)
-	}
-	return out
-}
-
-func TestBuildModelList_NoExistingModels(t *testing.T) {
-	items, _, _, _ := buildModelList(nil, nil, "")
-
-	want := []string{"glm-4.7-flash", "qwen3:8b", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, names(items)); diff != "" {
-		t.Errorf("with no existing models, items should be recommended in order (-want +got):\n%s", diff)
-	}
-
-	for _, item := range items {
-		if !strings.HasSuffix(item.Description, "install?") {
-			t.Errorf("item %q should have description ending with 'install?', got %q", item.Name, item.Description)
-		}
-	}
-}
-
-func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "qwen2.5:latest", Remote: false},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"llama3.2", "qwen2.5", "glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud", "qwen3:8b"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("cloud recs should be at bottom (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"glm-4.7:cloud", "llama3.2", "glm-4.7-flash", "kimi-k2.5:cloud", "qwen3:8b"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("mixed models should be alphabetical (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_PreCheckedFirst(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
-	got := names(items)
-
-	if got[0] != "llama3.2" {
-		t.Errorf("pre-checked model should be first, got %v", got)
-	}
-}
-
-func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-
-	for _, item := range items {
-		switch item.Name {
-		case "glm-4.7-flash", "glm-4.7:cloud":
-			if strings.HasSuffix(item.Description, "install?") {
-				t.Errorf("installed recommended %q should not have 'install?' suffix, got %q", item.Name, item.Description)
-			}
-		case "kimi-k2.5:cloud", "qwen3:8b":
-			if !strings.HasSuffix(item.Description, "install?") {
-				t.Errorf("non-installed recommended %q should have 'install?' suffix, got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// glm-4.7-flash and glm-4.7:cloud are installed so they sort normally;
-	// kimi-k2.5:cloud and qwen3:8b are not installed so they go to the bottom
-	want := []string{"glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud", "qwen3:8b"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("existing cloud models should sort normally (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "kimi-k2.5:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// kimi-k2.5:cloud is installed so it sorts normally;
-	// the rest of the recommendations are not installed so they go to the bottom
-	want := []string{"kimi-k2.5:cloud", "llama3.2", "glm-4.7-flash", "glm-4.7:cloud", "qwen3:8b"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("only non-installed models should be at bottom (-want +got):\n%s", diff)
-	}
-
-	for _, item := range items {
-		if !slices.Contains([]string{"kimi-k2.5:cloud", "llama3.2"}, item.Name) {
-			if !strings.HasSuffix(item.Description, "install?") {
-				t.Errorf("non-installed %q should have 'install?' suffix, got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_LatestTagStripped(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash:latest", Remote: false},
-		{Name: "llama3.2:latest", Remote: false},
-	}
-
-	items, _, existingModels, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// :latest should be stripped from display names
-	for _, name := range got {
-		if strings.HasSuffix(name, ":latest") {
-			t.Errorf("name %q should not have :latest suffix", name)
-		}
-	}
-
-	// glm-4.7-flash should not be duplicated (existing :latest matches the recommendation)
-	count := 0
-	for _, name := range got {
-		if name == "glm-4.7-flash" {
-			count++
-		}
-	}
-	if count != 1 {
-		t.Errorf("glm-4.7-flash should appear exactly once, got %d in %v", count, got)
-	}
-
-	// Stripped name should be in existingModels so it won't be pulled
-	if !existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should be in existingModels")
-	}
-}
-
-func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
-
-	if !existingModels["llama3.2"] {
-		t.Error("llama3.2 should be in existingModels")
-	}
-	if !existingModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in existingModels")
-	}
-	if existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should not be in existingModels (it's a recommendation)")
-	}
-
-	if !cloudModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in cloudModels")
-	}
-	if !cloudModels["kimi-k2.5:cloud"] {
-		t.Error("kimi-k2.5:cloud should be in cloudModels (recommended cloud)")
-	}
-	if cloudModels["llama3.2"] {
-		t.Error("llama3.2 should not be in cloudModels")
-	}
-}
--- a/cmd/config/openclaw.go
+++ b/cmd/config/openclaw.go
@@ -1,254 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-type Openclaw struct{}
-
-func (c *Openclaw) String() string { return "OpenClaw" }
-
-const ansiGreen = "\033[32m"
-
-func (c *Openclaw) Run(model string, args []string) error {
-	bin := "openclaw"
-	if _, err := exec.LookPath(bin); err != nil {
-		bin = "clawdbot"
-		if _, err := exec.LookPath(bin); err != nil {
-			return fmt.Errorf("openclaw is not installed, install from https://docs.openclaw.ai")
-		}
-	}
-
-	models := []string{model}
-	if config, err := loadIntegration("openclaw"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	} else if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := c.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	if !c.onboarded() {
-		// Onboarding not completed: run it (model already set via Edit)
-		// Use "ollama" as gateway token for simple local access
-		cmd := exec.Command(bin, "onboard",
-			"--auth-choice", "skip",
-			"--gateway-token", "ollama",
-		)
-		cmd.Stdin = os.Stdin
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		return cmd.Run()
-	}
-
-	// Onboarding completed: run gateway
-	cmd := exec.Command(bin, append([]string{"gateway"}, args...)...)
-	cmd.Stdin = os.Stdin
-
-	// Capture output to detect "already running" message
-	var outputBuf bytes.Buffer
-	cmd.Stdout = io.MultiWriter(os.Stdout, &outputBuf)
-	cmd.Stderr = io.MultiWriter(os.Stderr, &outputBuf)
-
-	err := cmd.Run()
-	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
-		fmt.Fprintf(os.Stderr, "%sOpenClaw has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
-		return nil
-	}
-	return err
-}
-
-// onboarded checks if OpenClaw onboarding wizard was completed
-// by looking for the wizard.lastRunAt marker in the config
-func (c *Openclaw) onboarded() bool {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return false
-	}
-
-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else {
-		return false
-	}
-
-	// Check for wizard.lastRunAt marker (set when onboarding completes)
-	wizard, _ := config["wizard"].(map[string]any)
-	if wizard == nil {
-		return false
-	}
-	lastRunAt, _ := wizard["lastRunAt"].(string)
-	return lastRunAt != ""
-}
-
-func (c *Openclaw) Paths() []string {
-	home, _ := os.UserHomeDir()
-	p := filepath.Join(home, ".openclaw", "openclaw.json")
-	if _, err := os.Stat(p); err == nil {
-		return []string{p}
-	}
-	legacy := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if _, err := os.Stat(legacy); err == nil {
-		return []string{legacy}
-	}
-	return nil
-}
-
-func (c *Openclaw) Edit(models []string) error {
-	if len(models) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
-		return err
-	}
-
-	// Read into map[string]any to preserve unknown fields
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	}
-
-	// Navigate/create: models.providers.ollama (preserving other providers)
-	modelsSection, _ := config["models"].(map[string]any)
-	if modelsSection == nil {
-		modelsSection = make(map[string]any)
-	}
-	providers, _ := modelsSection["providers"].(map[string]any)
-	if providers == nil {
-		providers = make(map[string]any)
-	}
-	ollama, _ := providers["ollama"].(map[string]any)
-	if ollama == nil {
-		ollama = make(map[string]any)
-	}
-
-	ollama["baseUrl"] = envconfig.Host().String() + "/v1"
-	// needed to register provider
-	ollama["apiKey"] = "ollama-local"
-	// TODO(parthsareen): potentially move to responses
-	ollama["api"] = "openai-completions"
-
-	// Build map of existing models to preserve user customizations
-	existingModels, _ := ollama["models"].([]any)
-	existingByID := make(map[string]map[string]any)
-	for _, m := range existingModels {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				existingByID[id] = entry
-			}
-		}
-	}
-
-	var newModels []any
-	for _, model := range models {
-		entry := map[string]any{
-			"id":        model,
-			"name":      model,
-			"reasoning": false,
-			"input":     []any{"text"},
-			"cost": map[string]any{
-				"input":      0,
-				"output":     0,
-				"cacheRead":  0,
-				"cacheWrite": 0,
-			},
-			// TODO(parthsareen): get these values from API
-			"contextWindow": 131072,
-			"maxTokens":     16384,
-		}
-		// Merge existing fields (user customizations)
-		if existing, ok := existingByID[model]; ok {
-			for k, v := range existing {
-				if _, isNew := entry[k]; !isNew {
-					entry[k] = v
-				}
-			}
-		}
-		newModels = append(newModels, entry)
-	}
-	ollama["models"] = newModels
-
-	providers["ollama"] = ollama
-	modelsSection["providers"] = providers
-	config["models"] = modelsSection
-
-	// Update agents.defaults.model.primary (preserving other agent settings)
-	agents, _ := config["agents"].(map[string]any)
-	if agents == nil {
-		agents = make(map[string]any)
-	}
-	defaults, _ := agents["defaults"].(map[string]any)
-	if defaults == nil {
-		defaults = make(map[string]any)
-	}
-	modelConfig, _ := defaults["model"].(map[string]any)
-	if modelConfig == nil {
-		modelConfig = make(map[string]any)
-	}
-	modelConfig["primary"] = "ollama/" + models[0]
-	defaults["model"] = modelConfig
-	agents["defaults"] = defaults
-	config["agents"] = agents
-
-	data, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(configPath, data)
-}
-
-func (c *Openclaw) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	config, err := readJSONFile(filepath.Join(home, ".openclaw", "openclaw.json"))
-	if err != nil {
-		config, err = readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
-		if err != nil {
-			return nil
-		}
-	}
-
-	modelsSection, _ := config["models"].(map[string]any)
-	providers, _ := modelsSection["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-
-	var result []string
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				result = append(result, id)
-			}
-		}
-	}
-	return result
-}
--- a/cmd/config/openclaw_test.go
+++ b/cmd/config/openclaw_test.go
@@ -1,878 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestOpenclawIntegration(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "OpenClaw" {
-			t.Errorf("String() = %q, want %q", got, "OpenClaw")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = c
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = c
-	})
-}
-
-func TestOpenclawEdit(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("fresh install", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("multiple models - first is primary", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelExists(t, configPath, "mistral")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("preserve other providers", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"anthropic":{"apiKey":"xxx"}}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		if providers["anthropic"] == nil {
-			t.Error("anthropic provider was removed")
-		}
-	})
-
-	t.Run("preserve top-level keys", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"theme":"dark","mcp":{"servers":{}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("theme was removed")
-		}
-		if cfg["mcp"] == nil {
-			t.Error("mcp was removed")
-		}
-	})
-
-	t.Run("preserve user customizations on models", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2"})
-
-		// User adds custom field
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		ollama := providers["ollama"].(map[string]any)
-		modelList := ollama["models"].([]any)
-		entry := modelList[0].(map[string]any)
-		entry["customField"] = "user-value"
-		configData, _ := json.MarshalIndent(cfg, "", "  ")
-		os.WriteFile(configPath, configData, 0o644)
-
-		// Re-run Edit
-		c.Edit([]string{"llama3.2"})
-
-		data, _ = os.ReadFile(configPath)
-		json.Unmarshal(data, &cfg)
-		models = cfg["models"].(map[string]any)
-		providers = models["providers"].(map[string]any)
-		ollama = providers["ollama"].(map[string]any)
-		modelList = ollama["models"].([]any)
-		entry = modelList[0].(map[string]any)
-		if entry["customField"] != "user-value" {
-			t.Error("custom field was lost")
-		}
-	})
-
-	t.Run("edit replaces models list", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2", "mistral"})
-		c.Edit([]string{"llama3.2"})
-
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelNotExists(t, configPath, "mistral")
-	})
-
-	t.Run("empty models is no-op", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		original := `{"existing":"data"}`
-		os.WriteFile(configPath, []byte(original), 0o644)
-
-		c.Edit([]string{})
-
-		data, _ := os.ReadFile(configPath)
-		if string(data) != original {
-			t.Error("empty models should not modify file")
-		}
-	})
-
-	t.Run("corrupted JSON treated as empty", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		if err := json.Unmarshal(data, &cfg); err != nil {
-			t.Error("result should be valid JSON")
-		}
-	})
-
-	t.Run("wrong type models section", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"not a map"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-	})
-}
-
-func TestOpenclawModels(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("no config returns nil", func(t *testing.T) {
-		if models := c.Models(); len(models) > 0 {
-			t.Errorf("expected nil/empty, got %v", models)
-		}
-	})
-
-	t.Run("returns all ollama models", func(t *testing.T) {
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[
-				{"id":"llama3.2"},
-				{"id":"mistral"}
-			]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 2 {
-			t.Errorf("expected 2 models, got %v", models)
-		}
-	})
-}
-
-// Helper functions
-func assertOpenclawModelExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				return
-			}
-		}
-	}
-	t.Errorf("model %s not found", model)
-}
-
-func assertOpenclawModelNotExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models, _ := cfg["models"].(map[string]any)
-	providers, _ := models["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				t.Errorf("model %s should not exist", model)
-			}
-		}
-	}
-}
-
-func assertOpenclawPrimaryModel(t *testing.T, path, expected string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	agents := cfg["agents"].(map[string]any)
-	defaults := agents["defaults"].(map[string]any)
-	model := defaults["model"].(map[string]any)
-	if model["primary"] != expected {
-		t.Errorf("primary model = %v, want %v", model["primary"], expected)
-	}
-}
-
-func TestOpenclawPaths(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("returns path when config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Errorf("expected 1 path, got %d", len(paths))
-		}
-	})
-
-	t.Run("returns nil when config missing", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if paths := c.Paths(); paths != nil {
-			t.Errorf("expected nil, got %v", paths)
-		}
-	})
-}
-
-func TestOpenclawModelsEdgeCases(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("corrupted JSON returns nil", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at models level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"string"}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at providers level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":"string"}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at ollama level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":"string"}}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("model entry missing id", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"name":"test"}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for missing id")
-		}
-	})
-
-	t.Run("model id is not string", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"id":123}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for non-string id")
-		}
-	})
-}
-
-func TestOpenclawEditSchemaFields(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-
-	if err := c.Edit([]string{"llama3.2"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	entry := modelList[0].(map[string]any)
-
-	// Verify required schema fields
-	if entry["reasoning"] != false {
-		t.Error("reasoning should be false")
-	}
-	if entry["input"] == nil {
-		t.Error("input should be set")
-	}
-	if entry["contextWindow"] == nil {
-		t.Error("contextWindow should be set")
-	}
-	if entry["maxTokens"] == nil {
-		t.Error("maxTokens should be set")
-	}
-	cost := entry["cost"].(map[string]any)
-	if cost["cacheRead"] == nil {
-		t.Error("cost.cacheRead should be set")
-	}
-	if cost["cacheWrite"] == nil {
-		t.Error("cost.cacheWrite should be set")
-	}
-}
-
-func TestOpenclawEditModelNames(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".openclaw")) }
-
-	t.Run("model with colon tag", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2:70b"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2:70b")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2:70b")
-	})
-
-	t.Run("model with slash", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"library/model:tag"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "library/model:tag")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/library/model:tag")
-	})
-
-	t.Run("model with hyphen", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"test-model"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "test-model")
-	})
-}
-
-func TestOpenclawEditAgentsPreservation(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("preserve other agent defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{"model":{"primary":"old"},"temperature":0.7}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		defaults := agents["defaults"].(map[string]any)
-		if defaults["temperature"] != 0.7 {
-			t.Error("temperature setting was lost")
-		}
-	})
-
-	t.Run("preserve other agents besides defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{},"custom-agent":{"foo":"bar"}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		if agents["custom-agent"] == nil {
-			t.Error("custom-agent was lost")
-		}
-	})
-}
-
-const testOpenclawFixture = `{
-  "theme": "dark",
-  "mcp": {"servers": {"custom": {"enabled": true}}},
-  "models": {
-    "providers": {
-      "anthropic": {"apiKey": "xxx"},
-      "ollama": {
-        "baseUrl": "http://127.0.0.1:11434/v1",
-        "models": [{"id": "old-model", "customField": "preserved"}]
-      }
-    }
-  },
-  "agents": {
-    "defaults": {"model": {"primary": "old"}, "temperature": 0.7},
-    "custom-agent": {"foo": "bar"}
-  }
-}`
-
-func TestOpenclawEdit_RoundTrip(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-
-	// Verify top-level preserved
-	if cfg["theme"] != "dark" {
-		t.Error("theme not preserved")
-	}
-	mcp := cfg["mcp"].(map[string]any)
-	servers := mcp["servers"].(map[string]any)
-	if servers["custom"] == nil {
-		t.Error("mcp.servers.custom not preserved")
-	}
-
-	// Verify other providers preserved
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	if providers["anthropic"] == nil {
-		t.Error("anthropic provider not preserved")
-	}
-
-	// Verify agents preserved
-	agents := cfg["agents"].(map[string]any)
-	if agents["custom-agent"] == nil {
-		t.Error("custom-agent not preserved")
-	}
-	defaults := agents["defaults"].(map[string]any)
-	if defaults["temperature"] != 0.7 {
-		t.Error("temperature not preserved")
-	}
-}
-
-func TestOpenclawEdit_Idempotent(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	firstData, _ := os.ReadFile(configPath)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	secondData, _ := os.ReadFile(configPath)
-
-	if string(firstData) != string(secondData) {
-		t.Error("repeated edits with same models produced different results")
-	}
-}
-
-func TestOpenclawEdit_MultipleConsecutiveEdits(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	for i := range 10 {
-		models := []string{"model-a", "model-b"}
-		if i%2 == 0 {
-			models = []string{"model-x", "model-y", "model-z"}
-		}
-		if err := c.Edit(models); err != nil {
-			t.Fatalf("edit %d failed: %v", i, err)
-		}
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatalf("file is not valid JSON after multiple edits: %v", err)
-	}
-
-	if cfg["theme"] != "dark" {
-		t.Error("theme lost after multiple edits")
-	}
-}
-
-func TestOpenclawEdit_BackupCreated(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	backupDir := filepath.Join(os.TempDir(), "ollama-backups")
-
-	os.MkdirAll(configDir, 0o755)
-	uniqueMarker := fmt.Sprintf("test-marker-%d", os.Getpid())
-	original := fmt.Sprintf(`{"theme": "%s"}`, uniqueMarker)
-	os.WriteFile(configPath, []byte(original), 0o644)
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	backups, _ := filepath.Glob(filepath.Join(backupDir, "openclaw.json.*"))
-	foundBackup := false
-	for _, backup := range backups {
-		data, _ := os.ReadFile(backup)
-		if string(data) == original {
-			foundBackup = true
-			break
-		}
-	}
-
-	if !foundBackup {
-		t.Error("backup with original content not found")
-	}
-}
-
-func TestOpenclawClawdbotAlias(t *testing.T) {
-	for _, alias := range []string{"clawdbot", "moltbot"} {
-		t.Run(alias+" alias resolves to Openclaw runner", func(t *testing.T) {
-			r, ok := integrations[alias]
-			if !ok {
-				t.Fatalf("%s not found in integrations", alias)
-			}
-			if _, ok := r.(*Openclaw); !ok {
-				t.Errorf("%s integration is %T, want *Openclaw", alias, r)
-			}
-		})
-
-		t.Run(alias+" is hidden from selector", func(t *testing.T) {
-			if !integrationAliases[alias] {
-				t.Errorf("%s should be in integrationAliases", alias)
-			}
-		})
-	}
-}
-
-func TestOpenclawLegacyPaths(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("falls back to legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(legacyDir, "clawdbot.json") {
-			t.Errorf("expected legacy path, got %s", paths[0])
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(newDir, "openclaw.json") {
-			t.Errorf("expected new path, got %s", paths[0])
-		}
-	})
-
-	t.Run("Models reads from legacy path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"llama3.2"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "llama3.2" {
-			t.Errorf("expected [llama3.2], got %v", models)
-		}
-	})
-
-	t.Run("Models prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"new-model"}]}}}
-		}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"legacy-model"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "new-model" {
-			t.Errorf("expected [new-model], got %v", models)
-		}
-	})
-
-	t.Run("Edit reads new path over legacy when both exist", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{"theme":"new"}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"legacy"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(filepath.Join(newDir, "openclaw.json"))
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "new" {
-			t.Errorf("expected theme from new config, got %v", cfg["theme"])
-		}
-	})
-
-	t.Run("Edit migrates from legacy config", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		// Should write to new path
-		newPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-		data, err := os.ReadFile(newPath)
-		if err != nil {
-			t.Fatal("expected new config file to be created")
-		}
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("legacy theme setting was not migrated")
-		}
-	})
-}
-
-func TestOpenclawEdit_CreatesDirectoryIfMissing(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-
-	if _, err := os.Stat(configDir); !os.IsNotExist(err) {
-		t.Fatal("directory should not exist before test")
-	}
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	if _, err := os.Stat(configDir); os.IsNotExist(err) {
-		t.Fatal("directory was not created")
-	}
-}
-
-func TestOpenclawOnboarded(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("returns false when no config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if c.onboarded() {
-			t.Error("expected false when no config exists")
-		}
-	})
-
-	t.Run("returns false when config exists but no wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when no wizard section")
-		}
-	})
-
-	t.Run("returns false when wizard section exists but no lastRunAt", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is missing")
-		}
-	})
-
-	t.Run("returns false when wizard.lastRunAt is empty string", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":""}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is empty")
-		}
-	})
-
-	t.Run("returns true when wizard.lastRunAt is set", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when wizard.lastRunAt is set")
-		}
-	})
-
-	t.Run("checks legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when legacy config has wizard.lastRunAt")
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		// New path has no wizard marker
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		// Legacy has wizard marker
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false - should prefer new path which has no wizard marker")
-		}
-	})
-
-	t.Run("handles corrupted JSON gracefully", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{corrupted`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false for corrupted JSON")
-		}
-	})
-
-	t.Run("handles wrong type for wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":"not a map"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard is wrong type")
-		}
-	})
-}
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -1,226 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"maps"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"slices"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-// OpenCode implements Runner and Editor for OpenCode integration
-type OpenCode struct{}
-
-func (o *OpenCode) String() string { return "OpenCode" }
-
-func (o *OpenCode) Run(model string, args []string) error {
-	if _, err := exec.LookPath("opencode"); err != nil {
-		return fmt.Errorf("opencode is not installed, install from https://opencode.ai")
-	}
-
-	// Call Edit() to ensure config is up-to-date before launch
-	models := []string{model}
-	if config, err := loadIntegration("opencode"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := o.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	cmd := exec.Command("opencode", args...)
-	cmd.Stdin = os.Stdin
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	return cmd.Run()
-}
-
-func (o *OpenCode) Paths() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	var paths []string
-	p := filepath.Join(home, ".config", "opencode", "opencode.json")
-	if _, err := os.Stat(p); err == nil {
-		paths = append(paths, p)
-	}
-	sp := filepath.Join(home, ".local", "state", "opencode", "model.json")
-	if _, err := os.Stat(sp); err == nil {
-		paths = append(paths, sp)
-	}
-	return paths
-}
-
-func (o *OpenCode) Edit(modelList []string) error {
-	if len(modelList) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	configPath := filepath.Join(home, ".config", "opencode", "opencode.json")
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
-		return err
-	}
-
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config) // Ignore parse errors; treat missing/corrupt files as empty
-	}
-
-	config["$schema"] = "https://opencode.ai/config.json"
-
-	provider, ok := config["provider"].(map[string]any)
-	if !ok {
-		provider = make(map[string]any)
-	}
-
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		ollama = map[string]any{
-			"npm":  "@ai-sdk/openai-compatible",
-			"name": "Ollama (local)",
-			"options": map[string]any{
-				"baseURL": envconfig.Host().String() + "/v1",
-			},
-		}
-	}
-
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		models = make(map[string]any)
-	}
-
-	selectedSet := make(map[string]bool)
-	for _, m := range modelList {
-		selectedSet[m] = true
-	}
-
-	for name, cfg := range models {
-		if cfgMap, ok := cfg.(map[string]any); ok {
-			if isOllamaModel(cfgMap) && !selectedSet[name] {
-				delete(models, name)
-			}
-		}
-	}
-
-	for _, model := range modelList {
-		if existing, ok := models[model].(map[string]any); ok {
-			// migrate existing models without _launch marker
-			if isOllamaModel(existing) {
-				existing["_launch"] = true
-				if name, ok := existing["name"].(string); ok {
-					existing["name"] = strings.TrimSuffix(name, " [Ollama]")
-				}
-			}
-			continue
-		}
-		models[model] = map[string]any{
-			"name":    model,
-			"_launch": true,
-		}
-	}
-
-	ollama["models"] = models
-	provider["ollama"] = ollama
-	config["provider"] = provider
-
-	configData, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return err
-	}
-	if err := writeWithBackup(configPath, configData); err != nil {
-		return err
-	}
-
-	statePath := filepath.Join(home, ".local", "state", "opencode", "model.json")
-	if err := os.MkdirAll(filepath.Dir(statePath), 0o755); err != nil {
-		return err
-	}
-
-	state := map[string]any{
-		"recent":   []any{},
-		"favorite": []any{},
-		"variant":  map[string]any{},
-	}
-	if data, err := os.ReadFile(statePath); err == nil {
-		_ = json.Unmarshal(data, &state) // Ignore parse errors; use defaults
-	}
-
-	recent, _ := state["recent"].([]any)
-
-	modelSet := make(map[string]bool)
-	for _, m := range modelList {
-		modelSet[m] = true
-	}
-
-	// Filter out existing Ollama models we're about to re-add
-	newRecent := slices.DeleteFunc(slices.Clone(recent), func(entry any) bool {
-		e, ok := entry.(map[string]any)
-		if !ok || e["providerID"] != "ollama" {
-			return false
-		}
-		modelID, _ := e["modelID"].(string)
-		return modelSet[modelID]
-	})
-
-	// Prepend models in reverse order so first model ends up first
-	for _, model := range slices.Backward(modelList) {
-		newRecent = slices.Insert(newRecent, 0, any(map[string]any{
-			"providerID": "ollama",
-			"modelID":    model,
-		}))
-	}
-
-	const maxRecentModels = 10
-	newRecent = newRecent[:min(len(newRecent), maxRecentModels)]
-
-	state["recent"] = newRecent
-
-	stateData, err := json.MarshalIndent(state, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(statePath, stateData)
-}
-
-func (o *OpenCode) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-	config, err := readJSONFile(filepath.Join(home, ".config", "opencode", "opencode.json"))
-	if err != nil {
-		return nil
-	}
-	provider, _ := config["provider"].(map[string]any)
-	ollama, _ := provider["ollama"].(map[string]any)
-	models, _ := ollama["models"].(map[string]any)
-	if len(models) == 0 {
-		return nil
-	}
-	keys := slices.Collect(maps.Keys(models))
-	slices.Sort(keys)
-	return keys
-}
-
-// isOllamaModel reports whether a model config entry is managed by us
-func isOllamaModel(cfg map[string]any) bool {
-	if v, ok := cfg["_launch"].(bool); ok && v {
-		return true
-	}
-	// previously used [Ollama] as a suffix for the model managed by ollama launch
-	if name, ok := cfg["name"].(string); ok {
-		return strings.HasSuffix(name, "[Ollama]")
-	}
-	return false
-}
--- a/cmd/config/opencode_test.go
+++ b/cmd/config/opencode_test.go
@@ -1,507 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestOpenCodeIntegration(t *testing.T) {
-	o := &OpenCode{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := o.String(); got != "OpenCode" {
-			t.Errorf("String() = %q, want %q", got, "OpenCode")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = o
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = o
-	})
-}
-
-func TestOpenCodeEdit(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	cleanup := func() {
-		os.RemoveAll(configDir)
-		os.RemoveAll(stateDir)
-	}
-
-	t.Run("fresh install", func(t *testing.T) {
-		cleanup()
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-	})
-
-	t.Run("preserve other providers", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"provider":{"anthropic":{"apiKey":"xxx"}}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		provider := cfg["provider"].(map[string]any)
-		if provider["anthropic"] == nil {
-			t.Error("anthropic provider was removed")
-		}
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("preserve other models", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"mistral":{"name":"Mistral"}}}}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeModelExists(t, configPath, "mistral")
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("update existing model", func(t *testing.T) {
-		cleanup()
-		o.Edit([]string{"llama3.2"})
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-	})
-
-	t.Run("preserve top-level keys", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"theme":"dark","keybindings":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("theme was removed")
-		}
-		if cfg["keybindings"] == nil {
-			t.Error("keybindings was removed")
-		}
-	})
-
-	t.Run("model state - insert at index 0", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-		assertOpenCodeRecentModel(t, statePath, 1, "anthropic", "claude")
-	})
-
-	t.Run("model state - preserve favorites and variants", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[],"favorite":[{"providerID":"x","modelID":"y"}],"variant":{"a":"b"}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(statePath)
-		var state map[string]any
-		json.Unmarshal(data, &state)
-		if len(state["favorite"].([]any)) != 1 {
-			t.Error("favorite was modified")
-		}
-		if state["variant"].(map[string]any)["a"] != "b" {
-			t.Error("variant was modified")
-		}
-	})
-
-	t.Run("model state - deduplicate on re-add", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(stateDir, 0o755)
-		os.WriteFile(statePath, []byte(`{"recent":[{"providerID":"ollama","modelID":"llama3.2"},{"providerID":"anthropic","modelID":"claude"}],"favorite":[],"variant":{}}`), 0o644)
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(statePath)
-		var state map[string]any
-		json.Unmarshal(data, &state)
-		recent := state["recent"].([]any)
-		if len(recent) != 2 {
-			t.Errorf("expected 2 recent entries, got %d", len(recent))
-		}
-		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
-	})
-
-	t.Run("remove model", func(t *testing.T) {
-		cleanup()
-		// First add two models
-		o.Edit([]string{"llama3.2", "mistral"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelExists(t, configPath, "mistral")
-
-		// Then remove one by only selecting the other
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelNotExists(t, configPath, "mistral")
-	})
-
-	t.Run("preserve user customizations on managed models", func(t *testing.T) {
-		cleanup()
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		// Add custom fields to the model entry (simulating user edits)
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		provider := cfg["provider"].(map[string]any)
-		ollama := provider["ollama"].(map[string]any)
-		models := ollama["models"].(map[string]any)
-		entry := models["llama3.2"].(map[string]any)
-		entry["_myPref"] = "custom-value"
-		entry["_myNum"] = 42
-		configData, _ := json.MarshalIndent(cfg, "", "  ")
-		os.WriteFile(configPath, configData, 0o644)
-
-		// Re-run Edit — should preserve custom fields
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ = os.ReadFile(configPath)
-		json.Unmarshal(data, &cfg)
-		provider = cfg["provider"].(map[string]any)
-		ollama = provider["ollama"].(map[string]any)
-		models = ollama["models"].(map[string]any)
-		entry = models["llama3.2"].(map[string]any)
-
-		if entry["_myPref"] != "custom-value" {
-			t.Errorf("_myPref was lost: got %v", entry["_myPref"])
-		}
-		if entry["_myNum"] != float64(42) {
-			t.Errorf("_myNum was lost: got %v", entry["_myNum"])
-		}
-		if v, ok := entry["_launch"].(bool); !ok || !v {
-			t.Errorf("_launch marker missing or false: got %v", entry["_launch"])
-		}
-	})
-
-	t.Run("migrate legacy [Ollama] suffix entries", func(t *testing.T) {
-		cleanup()
-		// Write a config with a legacy entry (has [Ollama] suffix but no _launch marker)
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"llama3.2":{"name":"llama3.2 [Ollama]"}}}}}`), 0o644)
-
-		if err := o.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		provider := cfg["provider"].(map[string]any)
-		ollama := provider["ollama"].(map[string]any)
-		models := ollama["models"].(map[string]any)
-		entry := models["llama3.2"].(map[string]any)
-
-		// _launch marker should be added
-		if v, ok := entry["_launch"].(bool); !ok || !v {
-			t.Errorf("_launch marker not added during migration: got %v", entry["_launch"])
-		}
-		// [Ollama] suffix should be stripped
-		if name, ok := entry["name"].(string); !ok || name != "llama3.2" {
-			t.Errorf("name suffix not stripped: got %q", entry["name"])
-		}
-	})
-
-	t.Run("remove model preserves non-ollama models", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		// Add a non-Ollama model manually
-		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"external":{"name":"External Model"}}}}}`), 0o644)
-
-		o.Edit([]string{"llama3.2"})
-		assertOpenCodeModelExists(t, configPath, "llama3.2")
-		assertOpenCodeModelExists(t, configPath, "external") // Should be preserved
-	})
-}
-
-func assertOpenCodeModelExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatal(err)
-	}
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		t.Fatal("provider not found")
-	}
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		t.Fatal("ollama provider not found")
-	}
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		t.Fatal("models not found")
-	}
-	if models[model] == nil {
-		t.Errorf("model %s not found", model)
-	}
-}
-
-func assertOpenCodeModelNotExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatal(err)
-	}
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		return // No provider means no model
-	}
-	ollama, ok := provider["ollama"].(map[string]any)
-	if !ok {
-		return // No ollama means no model
-	}
-	models, ok := ollama["models"].(map[string]any)
-	if !ok {
-		return // No models means no model
-	}
-	if models[model] != nil {
-		t.Errorf("model %s should not exist but was found", model)
-	}
-}
-
-func assertOpenCodeRecentModel(t *testing.T, path string, index int, providerID, modelID string) {
-	t.Helper()
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	var state map[string]any
-	if err := json.Unmarshal(data, &state); err != nil {
-		t.Fatal(err)
-	}
-	recent, ok := state["recent"].([]any)
-	if !ok {
-		t.Fatal("recent not found")
-	}
-	if index >= len(recent) {
-		t.Fatalf("index %d out of range (len=%d)", index, len(recent))
-	}
-	entry, ok := recent[index].(map[string]any)
-	if !ok {
-		t.Fatal("entry is not a map")
-	}
-	if entry["providerID"] != providerID {
-		t.Errorf("expected providerID %s, got %s", providerID, entry["providerID"])
-	}
-	if entry["modelID"] != modelID {
-		t.Errorf("expected modelID %s, got %s", modelID, entry["modelID"])
-	}
-}
-
-// Edge case tests for opencode.go
-
-func TestOpenCodeEdit_CorruptedConfigJSON(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(`{corrupted json content`), 0o644)
-
-	// Should not panic - corrupted JSON should be treated as empty
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit failed with corrupted config: %v", err)
-	}
-
-	// Verify valid JSON was created
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Errorf("resulting config is not valid JSON: %v", err)
-	}
-}
-
-func TestOpenCodeEdit_CorruptedStateJSON(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	os.MkdirAll(stateDir, 0o755)
-	os.WriteFile(statePath, []byte(`{corrupted state`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit failed with corrupted state: %v", err)
-	}
-
-	// Verify valid state was created
-	data, _ := os.ReadFile(statePath)
-	var state map[string]any
-	if err := json.Unmarshal(data, &state); err != nil {
-		t.Errorf("resulting state is not valid JSON: %v", err)
-	}
-}
-
-func TestOpenCodeEdit_WrongTypeProvider(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(`{"provider": "not a map"}`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit with wrong type provider failed: %v", err)
-	}
-
-	// Verify provider is now correct type
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-
-	provider, ok := cfg["provider"].(map[string]any)
-	if !ok {
-		t.Fatalf("provider should be map after setup, got %T", cfg["provider"])
-	}
-	if provider["ollama"] == nil {
-		t.Error("ollama provider should be created")
-	}
-}
-
-func TestOpenCodeEdit_WrongTypeRecent(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	stateDir := filepath.Join(tmpDir, ".local", "state", "opencode")
-	statePath := filepath.Join(stateDir, "model.json")
-
-	os.MkdirAll(stateDir, 0o755)
-	os.WriteFile(statePath, []byte(`{"recent": "not an array", "favorite": [], "variant": {}}`), 0o644)
-
-	err := o.Edit([]string{"llama3.2"})
-	if err != nil {
-		t.Fatalf("Edit with wrong type recent failed: %v", err)
-	}
-
-	// The function should handle this gracefully
-	data, _ := os.ReadFile(statePath)
-	var state map[string]any
-	json.Unmarshal(data, &state)
-
-	// recent should be properly set after setup
-	recent, ok := state["recent"].([]any)
-	if !ok {
-		t.Logf("Note: recent type after setup is %T (documenting behavior)", state["recent"])
-	} else if len(recent) == 0 {
-		t.Logf("Note: recent is empty (documenting behavior)")
-	}
-}
-
-func TestOpenCodeEdit_EmptyModels(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-
-	os.MkdirAll(configDir, 0o755)
-	originalContent := `{"provider":{"ollama":{"models":{"existing":{}}}}}`
-	os.WriteFile(configPath, []byte(originalContent), 0o644)
-
-	// Empty models should be no-op
-	err := o.Edit([]string{})
-	if err != nil {
-		t.Fatalf("Edit with empty models failed: %v", err)
-	}
-
-	// Original content should be preserved (file not modified)
-	data, _ := os.ReadFile(configPath)
-	if string(data) != originalContent {
-		t.Errorf("empty models should not modify file, but content changed")
-	}
-}
-
-func TestOpenCodeEdit_SpecialCharsInModelName(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	// Model name with special characters (though unusual)
-	specialModel := `model-with-"quotes"`
-
-	err := o.Edit([]string{specialModel})
-	if err != nil {
-		t.Fatalf("Edit with special chars failed: %v", err)
-	}
-
-	// Verify it was stored correctly
-	configDir := filepath.Join(tmpDir, ".config", "opencode")
-	configPath := filepath.Join(configDir, "opencode.json")
-	data, _ := os.ReadFile(configPath)
-
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatalf("resulting config is invalid JSON: %v", err)
-	}
-
-	// Model should be accessible
-	provider, _ := cfg["provider"].(map[string]any)
-	ollama, _ := provider["ollama"].(map[string]any)
-	models, _ := ollama["models"].(map[string]any)
-
-	if models[specialModel] == nil {
-		t.Errorf("model with special chars not found in config")
-	}
-}
-
-func TestOpenCodeModels_NoConfig(t *testing.T) {
-	o := &OpenCode{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	models := o.Models()
-	if len(models) > 0 {
-		t.Errorf("expected nil/empty for missing config, got %v", models)
-	}
-}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -1,512 +0,0 @@
-package config
-
-import (
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"strings"
-
-	"golang.org/x/term"
-)
-
-// ANSI escape sequences for terminal formatting.
-const (
-	ansiHideCursor = "\033[?25l"
-	ansiShowCursor = "\033[?25h"
-	ansiBold       = "\033[1m"
-	ansiReset      = "\033[0m"
-	ansiGray       = "\033[37m"
-	ansiClearDown  = "\033[J"
-)
-
-const maxDisplayedItems = 10
-
-var errCancelled = errors.New("cancelled")
-
-type selectItem struct {
-	Name        string
-	Description string
-}
-
-type inputEvent int
-
-const (
-	eventNone inputEvent = iota
-	eventEnter
-	eventEscape
-	eventUp
-	eventDown
-	eventTab
-	eventBackspace
-	eventChar
-)
-
-type selectState struct {
-	items        []selectItem
-	filter       string
-	selected     int
-	scrollOffset int
-}
-
-func newSelectState(items []selectItem) *selectState {
-	return &selectState{items: items}
-}
-
-func (s *selectState) filtered() []selectItem {
-	return filterItems(s.items, s.filter)
-}
-
-func (s *selectState) handleInput(event inputEvent, char byte) (done bool, result string, err error) {
-	filtered := s.filtered()
-
-	switch event {
-	case eventEnter:
-		if len(filtered) > 0 && s.selected < len(filtered) {
-			return true, filtered[s.selected].Name, nil
-		}
-	case eventEscape:
-		return true, "", errCancelled
-	case eventBackspace:
-		if len(s.filter) > 0 {
-			s.filter = s.filter[:len(s.filter)-1]
-			s.selected = 0
-			s.scrollOffset = 0
-		}
-	case eventUp:
-		if s.selected > 0 {
-			s.selected--
-			if s.selected < s.scrollOffset {
-				s.scrollOffset = s.selected
-			}
-		}
-	case eventDown:
-		if s.selected < len(filtered)-1 {
-			s.selected++
-			if s.selected >= s.scrollOffset+maxDisplayedItems {
-				s.scrollOffset = s.selected - maxDisplayedItems + 1
-			}
-		}
-	case eventChar:
-		s.filter += string(char)
-		s.selected = 0
-		s.scrollOffset = 0
-	}
-
-	return false, "", nil
-}
-
-type multiSelectState struct {
-	items         []selectItem
-	itemIndex     map[string]int
-	filter        string
-	highlighted   int
-	scrollOffset  int
-	checked       map[int]bool
-	checkOrder    []int
-	focusOnButton bool
-}
-
-func newMultiSelectState(items []selectItem, preChecked []string) *multiSelectState {
-	s := &multiSelectState{
-		items:     items,
-		itemIndex: make(map[string]int, len(items)),
-		checked:   make(map[int]bool),
-	}
-
-	for i, item := range items {
-		s.itemIndex[item.Name] = i
-	}
-
-	for _, name := range preChecked {
-		if idx, ok := s.itemIndex[name]; ok {
-			s.checked[idx] = true
-			s.checkOrder = append(s.checkOrder, idx)
-		}
-	}
-
-	return s
-}
-
-func (s *multiSelectState) filtered() []selectItem {
-	return filterItems(s.items, s.filter)
-}
-
-func (s *multiSelectState) toggleItem() {
-	filtered := s.filtered()
-	if len(filtered) == 0 || s.highlighted >= len(filtered) {
-		return
-	}
-
-	item := filtered[s.highlighted]
-	origIdx := s.itemIndex[item.Name]
-
-	if s.checked[origIdx] {
-		delete(s.checked, origIdx)
-		for i, idx := range s.checkOrder {
-			if idx == origIdx {
-				s.checkOrder = append(s.checkOrder[:i], s.checkOrder[i+1:]...)
-				break
-			}
-		}
-	} else {
-		s.checked[origIdx] = true
-		s.checkOrder = append(s.checkOrder, origIdx)
-	}
-}
-
-func (s *multiSelectState) handleInput(event inputEvent, char byte) (done bool, result []string, err error) {
-	filtered := s.filtered()
-
-	switch event {
-	case eventEnter:
-		if s.focusOnButton && len(s.checkOrder) > 0 {
-			var res []string
-			for _, idx := range s.checkOrder {
-				res = append(res, s.items[idx].Name)
-			}
-			return true, res, nil
-		} else if !s.focusOnButton {
-			s.toggleItem()
-		}
-	case eventTab:
-		if len(s.checkOrder) > 0 {
-			s.focusOnButton = !s.focusOnButton
-		}
-	case eventEscape:
-		return true, nil, errCancelled
-	case eventBackspace:
-		if len(s.filter) > 0 {
-			s.filter = s.filter[:len(s.filter)-1]
-			s.highlighted = 0
-			s.scrollOffset = 0
-			s.focusOnButton = false
-		}
-	case eventUp:
-		if s.focusOnButton {
-			s.focusOnButton = false
-		} else if s.highlighted > 0 {
-			s.highlighted--
-			if s.highlighted < s.scrollOffset {
-				s.scrollOffset = s.highlighted
-			}
-		}
-	case eventDown:
-		if s.focusOnButton {
-			s.focusOnButton = false
-		} else if s.highlighted < len(filtered)-1 {
-			s.highlighted++
-			if s.highlighted >= s.scrollOffset+maxDisplayedItems {
-				s.scrollOffset = s.highlighted - maxDisplayedItems + 1
-			}
-		}
-	case eventChar:
-		s.filter += string(char)
-		s.highlighted = 0
-		s.scrollOffset = 0
-		s.focusOnButton = false
-	}
-
-	return false, nil, nil
-}
-
-func (s *multiSelectState) selectedCount() int {
-	return len(s.checkOrder)
-}
-
-// Terminal I/O handling
-
-type terminalState struct {
-	fd       int
-	oldState *term.State
-}
-
-func enterRawMode() (*terminalState, error) {
-	fd := int(os.Stdin.Fd())
-	oldState, err := term.MakeRaw(fd)
-	if err != nil {
-		return nil, err
-	}
-	fmt.Fprint(os.Stderr, ansiHideCursor)
-	return &terminalState{fd: fd, oldState: oldState}, nil
-}
-
-func (t *terminalState) restore() {
-	fmt.Fprint(os.Stderr, ansiShowCursor)
-	term.Restore(t.fd, t.oldState)
-}
-
-func clearLines(n int) {
-	if n > 0 {
-		fmt.Fprintf(os.Stderr, "\033[%dA", n)
-		fmt.Fprint(os.Stderr, ansiClearDown)
-	}
-}
-
-func parseInput(r io.Reader) (inputEvent, byte, error) {
-	buf := make([]byte, 3)
-	n, err := r.Read(buf)
-	if err != nil {
-		return 0, 0, err
-	}
-
-	switch {
-	case n == 1 && buf[0] == 13:
-		return eventEnter, 0, nil
-	case n == 1 && (buf[0] == 3 || buf[0] == 27):
-		return eventEscape, 0, nil
-	case n == 1 && buf[0] == 9:
-		return eventTab, 0, nil
-	case n == 1 && buf[0] == 127:
-		return eventBackspace, 0, nil
-	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 65:
-		return eventUp, 0, nil
-	case n == 3 && buf[0] == 27 && buf[1] == 91 && buf[2] == 66:
-		return eventDown, 0, nil
-	case n == 1 && buf[0] >= 32 && buf[0] < 127:
-		return eventChar, buf[0], nil
-	}
-
-	return eventNone, 0, nil
-}
-
-// Rendering
-
-func renderSelect(w io.Writer, prompt string, s *selectState) int {
-	filtered := s.filtered()
-
-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
-	lineCount := 1
-
-	if len(filtered) == 0 {
-		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
-		lineCount++
-	} else {
-		displayCount := min(len(filtered), maxDisplayedItems)
-
-		for i := range displayCount {
-			idx := s.scrollOffset + i
-			if idx >= len(filtered) {
-				break
-			}
-			item := filtered[idx]
-			prefix := "    "
-			if idx == s.selected {
-				prefix = "  " + ansiBold + "> "
-			}
-			if item.Description != "" {
-				fmt.Fprintf(w, "%s%s%s %s- %s%s\r\n", prefix, item.Name, ansiReset, ansiGray, item.Description, ansiReset)
-			} else {
-				fmt.Fprintf(w, "%s%s%s\r\n", prefix, item.Name, ansiReset)
-			}
-			lineCount++
-		}
-
-		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
-			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
-			lineCount++
-		}
-	}
-
-	return lineCount
-}
-
-func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
-	filtered := s.filtered()
-
-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
-	lineCount := 1
-
-	if len(filtered) == 0 {
-		fmt.Fprintf(w, "  %s(no matches)%s\r\n", ansiGray, ansiReset)
-		lineCount++
-	} else {
-		displayCount := min(len(filtered), maxDisplayedItems)
-
-		for i := range displayCount {
-			idx := s.scrollOffset + i
-			if idx >= len(filtered) {
-				break
-			}
-			item := filtered[idx]
-			origIdx := s.itemIndex[item.Name]
-
-			checkbox := "[ ]"
-			if s.checked[origIdx] {
-				checkbox = "[x]"
-			}
-
-			prefix := "  "
-			suffix := ""
-			if idx == s.highlighted && !s.focusOnButton {
-				prefix = "> "
-			}
-			if len(s.checkOrder) > 0 && s.checkOrder[0] == origIdx {
-				suffix = " " + ansiGray + "(default)" + ansiReset
-			}
-
-			desc := ""
-			if item.Description != "" {
-				desc = " " + ansiGray + "- " + item.Description + ansiReset
-			}
-
-			if idx == s.highlighted && !s.focusOnButton {
-				fmt.Fprintf(w, "  %s%s %s %s%s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, desc, suffix)
-			} else {
-				fmt.Fprintf(w, "  %s %s %s%s%s\r\n", prefix, checkbox, item.Name, desc, suffix)
-			}
-			lineCount++
-		}
-
-		if remaining := len(filtered) - s.scrollOffset - displayCount; remaining > 0 {
-			fmt.Fprintf(w, "  %s... and %d more%s\r\n", ansiGray, remaining, ansiReset)
-			lineCount++
-		}
-	}
-
-	fmt.Fprintf(w, "\r\n")
-	lineCount++
-	count := s.selectedCount()
-	switch {
-	case count == 0:
-		fmt.Fprintf(w, "  %sSelect at least one model.%s\r\n", ansiGray, ansiReset)
-	case s.focusOnButton:
-		fmt.Fprintf(w, "  %s> [ Continue ]%s %s(%d selected)%s\r\n", ansiBold, ansiReset, ansiGray, count, ansiReset)
-	default:
-		fmt.Fprintf(w, "    %s[ Continue ] (%d selected) - press Tab%s\r\n", ansiGray, count, ansiReset)
-	}
-	lineCount++
-
-	return lineCount
-}
-
-// selectPrompt prompts the user to select a single item from a list.
-func selectPrompt(prompt string, items []selectItem) (string, error) {
-	if len(items) == 0 {
-		return "", fmt.Errorf("no items to select from")
-	}
-
-	ts, err := enterRawMode()
-	if err != nil {
-		return "", err
-	}
-	defer ts.restore()
-
-	state := newSelectState(items)
-	var lastLineCount int
-
-	render := func() {
-		clearLines(lastLineCount)
-		lastLineCount = renderSelect(os.Stderr, prompt, state)
-	}
-
-	render()
-
-	for {
-		event, char, err := parseInput(os.Stdin)
-		if err != nil {
-			return "", err
-		}
-
-		done, result, err := state.handleInput(event, char)
-		if done {
-			clearLines(lastLineCount)
-			if err != nil {
-				return "", err
-			}
-			return result, nil
-		}
-
-		render()
-	}
-}
-
-// multiSelectPrompt prompts the user to select multiple items from a list.
-func multiSelectPrompt(prompt string, items []selectItem, preChecked []string) ([]string, error) {
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no items to select from")
-	}
-
-	ts, err := enterRawMode()
-	if err != nil {
-		return nil, err
-	}
-	defer ts.restore()
-
-	state := newMultiSelectState(items, preChecked)
-	var lastLineCount int
-
-	render := func() {
-		clearLines(lastLineCount)
-		lastLineCount = renderMultiSelect(os.Stderr, prompt, state)
-	}
-
-	render()
-
-	for {
-		event, char, err := parseInput(os.Stdin)
-		if err != nil {
-			return nil, err
-		}
-
-		done, result, err := state.handleInput(event, char)
-		if done {
-			clearLines(lastLineCount)
-			if err != nil {
-				return nil, err
-			}
-			return result, nil
-		}
-
-		render()
-	}
-}
-
-func confirmPrompt(prompt string) (bool, error) {
-	fd := int(os.Stdin.Fd())
-	oldState, err := term.MakeRaw(fd)
-	if err != nil {
-		return false, err
-	}
-	defer term.Restore(fd, oldState)
-
-	fmt.Fprintf(os.Stderr, "%s (\033[1my\033[0m/n) ", prompt)
-
-	buf := make([]byte, 1)
-	for {
-		if _, err := os.Stdin.Read(buf); err != nil {
-			return false, err
-		}
-
-		switch buf[0] {
-		case 'Y', 'y', 13:
-			fmt.Fprintf(os.Stderr, "yes\r\n")
-			return true, nil
-		case 'N', 'n', 27, 3:
-			fmt.Fprintf(os.Stderr, "no\r\n")
-			return false, nil
-		}
-	}
-}
-
-func filterItems(items []selectItem, filter string) []selectItem {
-	if filter == "" {
-		return items
-	}
-	var result []selectItem
-	filterLower := strings.ToLower(filter)
-	for _, item := range items {
-		if strings.Contains(strings.ToLower(item.Name), filterLower) {
-			result = append(result, item)
-		}
-	}
-	return result
-}
--- a/cmd/config/selector_test.go
+++ b/cmd/config/selector_test.go
@@ -1,913 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"strings"
-	"testing"
-)
-
-func TestFilterItems(t *testing.T) {
-	items := []selectItem{
-		{Name: "llama3.2:latest"},
-		{Name: "qwen2.5:7b"},
-		{Name: "deepseek-v3:cloud"},
-		{Name: "GPT-OSS:20b"},
-	}
-
-	t.Run("EmptyFilter_ReturnsAllItems", func(t *testing.T) {
-		result := filterItems(items, "")
-		if len(result) != len(items) {
-			t.Errorf("expected %d items, got %d", len(items), len(result))
-		}
-	})
-
-	t.Run("CaseInsensitive_UppercaseFilterMatchesLowercase", func(t *testing.T) {
-		result := filterItems(items, "LLAMA")
-		if len(result) != 1 || result[0].Name != "llama3.2:latest" {
-			t.Errorf("expected llama3.2:latest, got %v", result)
-		}
-	})
-
-	t.Run("CaseInsensitive_LowercaseFilterMatchesUppercase", func(t *testing.T) {
-		result := filterItems(items, "gpt")
-		if len(result) != 1 || result[0].Name != "GPT-OSS:20b" {
-			t.Errorf("expected GPT-OSS:20b, got %v", result)
-		}
-	})
-
-	t.Run("PartialMatch", func(t *testing.T) {
-		result := filterItems(items, "deep")
-		if len(result) != 1 || result[0].Name != "deepseek-v3:cloud" {
-			t.Errorf("expected deepseek-v3:cloud, got %v", result)
-		}
-	})
-
-	t.Run("NoMatch_ReturnsEmpty", func(t *testing.T) {
-		result := filterItems(items, "nonexistent")
-		if len(result) != 0 {
-			t.Errorf("expected 0 items, got %d", len(result))
-		}
-	})
-}
-
-func TestSelectState(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-		{Name: "item3"},
-	}
-
-	t.Run("InitialState", func(t *testing.T) {
-		s := newSelectState(items)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0, got %d", s.selected)
-		}
-		if s.filter != "" {
-			t.Errorf("expected empty filter, got %q", s.filter)
-		}
-		if s.scrollOffset != 0 {
-			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Enter_SelectsCurrentItem", func(t *testing.T) {
-		s := newSelectState(items)
-		done, result, err := s.handleInput(eventEnter, 0)
-		if !done || result != "item1" || err != nil {
-			t.Errorf("expected (true, item1, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Enter_WithFilter_SelectsFilteredItem", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "item3"
-		done, result, err := s.handleInput(eventEnter, 0)
-		if !done || result != "item3" || err != nil {
-			t.Errorf("expected (true, item3, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Enter_EmptyFilteredList_DoesNothing", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "nonexistent"
-		done, result, err := s.handleInput(eventEnter, 0)
-		if done || result != "" || err != nil {
-			t.Errorf("expected (false, '', nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
-		s := newSelectState(items)
-		done, result, err := s.handleInput(eventEscape, 0)
-		if !done || result != "" || err != errCancelled {
-			t.Errorf("expected (true, '', errCancelled), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Down_MovesSelection", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventDown, 0)
-		if s.selected != 1 {
-			t.Errorf("expected selected=1, got %d", s.selected)
-		}
-	})
-
-	t.Run("Down_AtBottom_StaysAtBottom", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventDown, 0)
-		if s.selected != 2 {
-			t.Errorf("expected selected=2 (stayed at bottom), got %d", s.selected)
-		}
-	})
-
-	t.Run("Up_MovesSelection", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventUp, 0)
-		if s.selected != 1 {
-			t.Errorf("expected selected=1, got %d", s.selected)
-		}
-	})
-
-	t.Run("Up_AtTop_StaysAtTop", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventUp, 0)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 (stayed at top), got %d", s.selected)
-		}
-	})
-
-	t.Run("Char_AppendsToFilter", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventChar, 'i')
-		s.handleInput(eventChar, 't')
-		s.handleInput(eventChar, 'e')
-		s.handleInput(eventChar, 'm')
-		s.handleInput(eventChar, '2')
-		if s.filter != "item2" {
-			t.Errorf("expected filter='item2', got %q", s.filter)
-		}
-		filtered := s.filtered()
-		if len(filtered) != 1 || filtered[0].Name != "item2" {
-			t.Errorf("expected [item2], got %v", filtered)
-		}
-	})
-
-	t.Run("Char_ResetsSelectionToZero", func(t *testing.T) {
-		s := newSelectState(items)
-		s.selected = 2
-		s.handleInput(eventChar, 'x')
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 after typing, got %d", s.selected)
-		}
-	})
-
-	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "test"
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "tes" {
-			t.Errorf("expected filter='tes', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_EmptyFilter_DoesNothing", func(t *testing.T) {
-		s := newSelectState(items)
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "" {
-			t.Errorf("expected filter='', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_ResetsSelectionToZero", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "test"
-		s.selected = 2
-		s.handleInput(eventBackspace, 0)
-		if s.selected != 0 {
-			t.Errorf("expected selected=0 after backspace, got %d", s.selected)
-		}
-	})
-
-	t.Run("Scroll_DownPastVisibleItems_ScrollsViewport", func(t *testing.T) {
-		// maxDisplayedItems is 10, so with 15 items we need to scroll
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-
-		// move down 12 times (past the 10-item viewport)
-		for range 12 {
-			s.handleInput(eventDown, 0)
-		}
-
-		if s.selected != 12 {
-			t.Errorf("expected selected=12, got %d", s.selected)
-		}
-		if s.scrollOffset != 3 {
-			t.Errorf("expected scrollOffset=3 (12-10+1), got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Scroll_UpPastScrollOffset_ScrollsViewport", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-		s.selected = 5
-		s.scrollOffset = 5
-
-		s.handleInput(eventUp, 0)
-
-		if s.selected != 4 {
-			t.Errorf("expected selected=4, got %d", s.selected)
-		}
-		if s.scrollOffset != 4 {
-			t.Errorf("expected scrollOffset=4, got %d", s.scrollOffset)
-		}
-	})
-}
-
-func TestMultiSelectState(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-		{Name: "item3"},
-	}
-
-	t.Run("InitialState_NoPrechecked", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-		if s.selectedCount() != 0 {
-			t.Errorf("expected 0 selected, got %d", s.selectedCount())
-		}
-		if s.focusOnButton {
-			t.Error("expected focusOnButton=false initially")
-		}
-	})
-
-	t.Run("InitialState_WithPrechecked", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item3"})
-		if s.selectedCount() != 2 {
-			t.Errorf("expected 2 selected, got %d", s.selectedCount())
-		}
-		if !s.checked[1] || !s.checked[2] {
-			t.Error("expected item2 and item3 to be checked")
-		}
-	})
-
-	t.Run("Prechecked_PreservesSelectionOrder", func(t *testing.T) {
-		// order matters: first checked = default model
-		s := newMultiSelectState(items, []string{"item3", "item1"})
-		if len(s.checkOrder) != 2 {
-			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
-		}
-		if s.checkOrder[0] != 2 || s.checkOrder[1] != 0 {
-			t.Errorf("expected checkOrder=[2,0] (item3 first), got %v", s.checkOrder)
-		}
-	})
-
-	t.Run("Prechecked_IgnoresInvalidNames", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "nonexistent"})
-		if s.selectedCount() != 1 {
-			t.Errorf("expected 1 selected (nonexistent ignored), got %d", s.selectedCount())
-		}
-	})
-
-	t.Run("Toggle_ChecksUncheckedItem", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.toggleItem()
-		if !s.checked[0] {
-			t.Error("expected item1 to be checked after toggle")
-		}
-	})
-
-	t.Run("Toggle_UnchecksCheckedItem", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.toggleItem()
-		if s.checked[0] {
-			t.Error("expected item1 to be unchecked after toggle")
-		}
-	})
-
-	t.Run("Toggle_RemovesFromCheckOrder", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "item2", "item3"})
-		s.highlighted = 1 // toggle item2
-		s.toggleItem()
-
-		if len(s.checkOrder) != 2 {
-			t.Fatalf("expected 2 in checkOrder, got %d", len(s.checkOrder))
-		}
-		// should be [0, 2] (item1, item3) with item2 removed
-		if s.checkOrder[0] != 0 || s.checkOrder[1] != 2 {
-			t.Errorf("expected checkOrder=[0,2], got %v", s.checkOrder)
-		}
-	})
-
-	t.Run("Enter_TogglesWhenNotOnButton", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventEnter, 0)
-		if !s.checked[0] {
-			t.Error("expected item1 to be checked after enter")
-		}
-	})
-
-	t.Run("Enter_OnButton_ReturnsSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item1"})
-		s.focusOnButton = true
-
-		done, result, err := s.handleInput(eventEnter, 0)
-
-		if !done || err != nil {
-			t.Errorf("expected done=true, err=nil, got done=%v, err=%v", done, err)
-		}
-		// result should preserve selection order
-		if len(result) != 2 || result[0] != "item2" || result[1] != "item1" {
-			t.Errorf("expected [item2, item1], got %v", result)
-		}
-	})
-
-	t.Run("Enter_OnButton_EmptySelection_DoesNothing", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.focusOnButton = true
-		done, result, err := s.handleInput(eventEnter, 0)
-		if done || result != nil || err != nil {
-			t.Errorf("expected (false, nil, nil), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("Tab_SwitchesToButton_WhenHasSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.handleInput(eventTab, 0)
-		if !s.focusOnButton {
-			t.Error("expected focus on button after tab")
-		}
-	})
-
-	t.Run("Tab_DoesNothing_WhenNoSelection", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventTab, 0)
-		if s.focusOnButton {
-			t.Error("tab should not focus button when nothing selected")
-		}
-	})
-
-	t.Run("Tab_TogglesButtonFocus", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.handleInput(eventTab, 0)
-		if !s.focusOnButton {
-			t.Error("expected focus on button after first tab")
-		}
-		s.handleInput(eventTab, 0)
-		if s.focusOnButton {
-			t.Error("expected focus back on list after second tab")
-		}
-	})
-
-	t.Run("Escape_ReturnsCancelledError", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		done, result, err := s.handleInput(eventEscape, 0)
-		if !done || result != nil || err != errCancelled {
-			t.Errorf("expected (true, nil, errCancelled), got (%v, %v, %v)", done, result, err)
-		}
-	})
-
-	t.Run("IsDefault_TrueForFirstChecked", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item2", "item1"})
-		if !(len(s.checkOrder) > 0 && s.checkOrder[0] == 1) {
-			t.Error("expected item2 (idx 1) to be default (first checked)")
-		}
-		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
-			t.Error("expected item1 (idx 0) to NOT be default")
-		}
-	})
-
-	t.Run("IsDefault_FalseWhenNothingChecked", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		if len(s.checkOrder) > 0 && s.checkOrder[0] == 0 {
-			t.Error("expected isDefault=false when nothing checked")
-		}
-	})
-
-	t.Run("Down_MovesHighlight", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventDown, 0)
-		if s.highlighted != 1 {
-			t.Errorf("expected highlighted=1, got %d", s.highlighted)
-		}
-	})
-
-	t.Run("Up_MovesHighlight", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.highlighted = 1
-		s.handleInput(eventUp, 0)
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-	})
-
-	t.Run("Arrow_ReturnsFocusFromButton", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.focusOnButton = true
-		s.handleInput(eventDown, 0)
-		if s.focusOnButton {
-			t.Error("expected focus to return to list on arrow key")
-		}
-	})
-
-	t.Run("Char_AppendsToFilter", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.handleInput(eventChar, 'x')
-		if s.filter != "x" {
-			t.Errorf("expected filter='x', got %q", s.filter)
-		}
-	})
-
-	t.Run("Char_ResetsHighlightAndScroll", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newMultiSelectState(manyItems, nil)
-		s.highlighted = 10
-		s.scrollOffset = 5
-
-		s.handleInput(eventChar, 'x')
-
-		if s.highlighted != 0 {
-			t.Errorf("expected highlighted=0, got %d", s.highlighted)
-		}
-		if s.scrollOffset != 0 {
-			t.Errorf("expected scrollOffset=0, got %d", s.scrollOffset)
-		}
-	})
-
-	t.Run("Backspace_RemovesLastFilterChar", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		s.filter = "test"
-		s.handleInput(eventBackspace, 0)
-		if s.filter != "tes" {
-			t.Errorf("expected filter='tes', got %q", s.filter)
-		}
-	})
-
-	t.Run("Backspace_RemovesFocusFromButton", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		s.filter = "x"
-		s.focusOnButton = true
-		s.handleInput(eventBackspace, 0)
-		if s.focusOnButton {
-			t.Error("expected focusOnButton=false after backspace")
-		}
-	})
-}
-
-func TestParseInput(t *testing.T) {
-	t.Run("Enter", func(t *testing.T) {
-		event, char, err := parseInput(bytes.NewReader([]byte{13}))
-		if err != nil || event != eventEnter || char != 0 {
-			t.Errorf("expected (eventEnter, 0, nil), got (%v, %v, %v)", event, char, err)
-		}
-	})
-
-	t.Run("Escape", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27}))
-		if err != nil || event != eventEscape {
-			t.Errorf("expected eventEscape, got %v", event)
-		}
-	})
-
-	t.Run("CtrlC_TreatedAsEscape", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{3}))
-		if err != nil || event != eventEscape {
-			t.Errorf("expected eventEscape for Ctrl+C, got %v", event)
-		}
-	})
-
-	t.Run("Tab", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{9}))
-		if err != nil || event != eventTab {
-			t.Errorf("expected eventTab, got %v", event)
-		}
-	})
-
-	t.Run("Backspace", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{127}))
-		if err != nil || event != eventBackspace {
-			t.Errorf("expected eventBackspace, got %v", event)
-		}
-	})
-
-	t.Run("UpArrow", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 65}))
-		if err != nil || event != eventUp {
-			t.Errorf("expected eventUp, got %v", event)
-		}
-	})
-
-	t.Run("DownArrow", func(t *testing.T) {
-		event, _, err := parseInput(bytes.NewReader([]byte{27, 91, 66}))
-		if err != nil || event != eventDown {
-			t.Errorf("expected eventDown, got %v", event)
-		}
-	})
-
-	t.Run("PrintableChars", func(t *testing.T) {
-		tests := []struct {
-			name string
-			char byte
-		}{
-			{"lowercase", 'a'},
-			{"uppercase", 'Z'},
-			{"digit", '5'},
-			{"space", ' '},
-			{"tilde", '~'},
-		}
-		for _, tt := range tests {
-			t.Run(tt.name, func(t *testing.T) {
-				event, char, err := parseInput(bytes.NewReader([]byte{tt.char}))
-				if err != nil || event != eventChar || char != tt.char {
-					t.Errorf("expected (eventChar, %q), got (%v, %q)", tt.char, event, char)
-				}
-			})
-		}
-	})
-}
-
-func TestRenderSelect(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1", Description: "first item"},
-		{Name: "item2"},
-	}
-
-	t.Run("ShowsPromptAndItems", func(t *testing.T) {
-		s := newSelectState(items)
-		var buf bytes.Buffer
-		lineCount := renderSelect(&buf, "Select:", s)
-
-		output := buf.String()
-		if !strings.Contains(output, "Select:") {
-			t.Error("expected prompt in output")
-		}
-		if !strings.Contains(output, "item1") {
-			t.Error("expected item1 in output")
-		}
-		if !strings.Contains(output, "first item") {
-			t.Error("expected description in output")
-		}
-		if !strings.Contains(output, "item2") {
-			t.Error("expected item2 in output")
-		}
-		if lineCount != 3 { // 1 prompt + 2 items
-			t.Errorf("expected 3 lines, got %d", lineCount)
-		}
-	})
-
-	t.Run("EmptyFilteredList_ShowsNoMatches", func(t *testing.T) {
-		s := newSelectState(items)
-		s.filter = "xyz"
-		var buf bytes.Buffer
-		renderSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "no matches") {
-			t.Error("expected 'no matches' message")
-		}
-	})
-
-	t.Run("LongList_ShowsRemainingCount", func(t *testing.T) {
-		manyItems := make([]selectItem, 15)
-		for i := range manyItems {
-			manyItems[i] = selectItem{Name: string(rune('a' + i))}
-		}
-		s := newSelectState(manyItems)
-		var buf bytes.Buffer
-		renderSelect(&buf, "Select:", s)
-
-		// 15 items - 10 displayed = 5 more
-		if !strings.Contains(buf.String(), "5 more") {
-			t.Error("expected '5 more' indicator")
-		}
-	})
-}
-
-func TestRenderMultiSelect(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1"},
-		{Name: "item2"},
-	}
-
-	t.Run("ShowsCheckboxes", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		output := buf.String()
-		if !strings.Contains(output, "[x]") {
-			t.Error("expected checked checkbox [x]")
-		}
-		if !strings.Contains(output, "[ ]") {
-			t.Error("expected unchecked checkbox [ ]")
-		}
-	})
-
-	t.Run("ShowsDefaultMarker", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "(default)") {
-			t.Error("expected (default) marker for first checked item")
-		}
-	})
-
-	t.Run("ShowsSelectedCount", func(t *testing.T) {
-		s := newMultiSelectState(items, []string{"item1", "item2"})
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "2 selected") {
-			t.Error("expected '2 selected' in output")
-		}
-	})
-
-	t.Run("NoSelection_ShowsHelperText", func(t *testing.T) {
-		s := newMultiSelectState(items, nil)
-		var buf bytes.Buffer
-		renderMultiSelect(&buf, "Select:", s)
-
-		if !strings.Contains(buf.String(), "Select at least one") {
-			t.Error("expected 'Select at least one' helper text")
-		}
-	})
-}
-
-func TestErrCancelled(t *testing.T) {
-	t.Run("NotNil", func(t *testing.T) {
-		if errCancelled == nil {
-			t.Error("errCancelled should not be nil")
-		}
-	})
-
-	t.Run("Message", func(t *testing.T) {
-		if errCancelled.Error() != "cancelled" {
-			t.Errorf("expected 'cancelled', got %q", errCancelled.Error())
-		}
-	})
-}
-
-// Edge case tests for selector.go
-
-// TestSelectState_SingleItem verifies that single item list works without crash.
-// List with only one item should still work.
-func TestSelectState_SingleItem(t *testing.T) {
-	items := []selectItem{{Name: "only-one"}}
-
-	s := newSelectState(items)
-
-	// Down should do nothing (already at bottom)
-	s.handleInput(eventDown, 0)
-	if s.selected != 0 {
-		t.Errorf("down on single item: expected selected=0, got %d", s.selected)
-	}
-
-	// Up should do nothing (already at top)
-	s.handleInput(eventUp, 0)
-	if s.selected != 0 {
-		t.Errorf("up on single item: expected selected=0, got %d", s.selected)
-	}
-
-	// Enter should select the only item
-	done, result, err := s.handleInput(eventEnter, 0)
-	if !done || result != "only-one" || err != nil {
-		t.Errorf("enter on single item: expected (true, 'only-one', nil), got (%v, %q, %v)", done, result, err)
-	}
-}
-
-// TestSelectState_ExactlyMaxItems verifies boundary condition at maxDisplayedItems.
-// List with exactly maxDisplayedItems items should not scroll.
-func TestSelectState_ExactlyMaxItems(t *testing.T) {
-	items := make([]selectItem, maxDisplayedItems)
-	for i := range items {
-		items[i] = selectItem{Name: string(rune('a' + i))}
-	}
-
-	s := newSelectState(items)
-
-	// Move to last item
-	for range maxDisplayedItems - 1 {
-		s.handleInput(eventDown, 0)
-	}
-
-	if s.selected != maxDisplayedItems-1 {
-		t.Errorf("expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
-	}
-
-	// Should not scroll when exactly at max
-	if s.scrollOffset != 0 {
-		t.Errorf("expected scrollOffset=0 for exactly maxDisplayedItems, got %d", s.scrollOffset)
-	}
-
-	// One more down should do nothing
-	s.handleInput(eventDown, 0)
-	if s.selected != maxDisplayedItems-1 {
-		t.Errorf("down at max: expected selected=%d, got %d", maxDisplayedItems-1, s.selected)
-	}
-}
-
-// TestFilterItems_RegexSpecialChars verifies that filter is literal, not regex.
-// User typing "model.v1" shouldn't match "modelsv1".
-func TestFilterItems_RegexSpecialChars(t *testing.T) {
-	items := []selectItem{
-		{Name: "model.v1"},
-		{Name: "modelsv1"},
-		{Name: "model-v1"},
-	}
-
-	// Filter with dot should only match literal dot
-	result := filterItems(items, "model.v1")
-	if len(result) != 1 {
-		t.Errorf("expected 1 exact match, got %d", len(result))
-	}
-	if len(result) > 0 && result[0].Name != "model.v1" {
-		t.Errorf("expected 'model.v1', got %s", result[0].Name)
-	}
-
-	// Other regex special chars should be literal too
-	items2 := []selectItem{
-		{Name: "test[0]"},
-		{Name: "test0"},
-		{Name: "test(1)"},
-	}
-
-	result2 := filterItems(items2, "test[0]")
-	if len(result2) != 1 || result2[0].Name != "test[0]" {
-		t.Errorf("expected only 'test[0]', got %v", result2)
-	}
-}
-
-// TestMultiSelectState_DuplicateNames documents handling of duplicate item names.
-// itemIndex uses name as key - duplicates cause collision. This documents
-// the current behavior: the last index for a duplicate name is stored
-func TestMultiSelectState_DuplicateNames(t *testing.T) {
-	// Duplicate names - this is an edge case that shouldn't happen in practice
-	items := []selectItem{
-		{Name: "duplicate"},
-		{Name: "duplicate"},
-		{Name: "unique"},
-	}
-
-	s := newMultiSelectState(items, nil)
-
-	// DOCUMENTED BEHAVIOR: itemIndex maps name to LAST index
-	// When there are duplicates, only the last occurrence's index is stored
-	if s.itemIndex["duplicate"] != 1 {
-		t.Errorf("itemIndex should map 'duplicate' to last index (1), got %d", s.itemIndex["duplicate"])
-	}
-
-	// Toggle item at highlighted=0 (first "duplicate")
-	// Due to name collision, toggleItem uses itemIndex["duplicate"] = 1
-	// So it actually toggles the SECOND duplicate item, not the first
-	s.toggleItem()
-
-	// This documents the potentially surprising behavior:
-	// We toggled at highlighted=0, but itemIndex lookup returned 1
-	if !s.checked[1] {
-		t.Error("toggle should check index 1 (due to name collision in itemIndex)")
-	}
-	if s.checked[0] {
-		t.Log("Note: index 0 is NOT checked, even though highlighted=0 (name collision behavior)")
-	}
-}
-
-// TestSelectState_FilterReducesBelowSelection verifies selection resets when filter reduces list.
-// Prevents index-out-of-bounds on next keystroke
-func TestSelectState_FilterReducesBelowSelection(t *testing.T) {
-	items := []selectItem{
-		{Name: "apple"},
-		{Name: "banana"},
-		{Name: "cherry"},
-	}
-
-	s := newSelectState(items)
-	s.selected = 2 // Select "cherry"
-
-	// Type a filter that removes cherry from results
-	s.handleInput(eventChar, 'a') // Filter to "a" - matches "apple" and "banana"
-
-	// Selection should reset to 0
-	if s.selected != 0 {
-		t.Errorf("expected selected=0 after filter, got %d", s.selected)
-	}
-
-	filtered := s.filtered()
-	if len(filtered) != 2 {
-		t.Errorf("expected 2 filtered items, got %d", len(filtered))
-	}
-}
-
-// TestFilterItems_UnicodeCharacters verifies filtering works with UTF-8.
-// Model names might contain unicode characters
-func TestFilterItems_UnicodeCharacters(t *testing.T) {
-	items := []selectItem{
-		{Name: "llama-日本語"},
-		{Name: "模型-chinese"},
-		{Name: "émoji-🦙"},
-		{Name: "regular-model"},
-	}
-
-	t.Run("filter japanese", func(t *testing.T) {
-		result := filterItems(items, "日本")
-		if len(result) != 1 || result[0].Name != "llama-日本語" {
-			t.Errorf("expected llama-日本語, got %v", result)
-		}
-	})
-
-	t.Run("filter chinese", func(t *testing.T) {
-		result := filterItems(items, "模型")
-		if len(result) != 1 || result[0].Name != "模型-chinese" {
-			t.Errorf("expected 模型-chinese, got %v", result)
-		}
-	})
-
-	t.Run("filter emoji", func(t *testing.T) {
-		result := filterItems(items, "🦙")
-		if len(result) != 1 || result[0].Name != "émoji-🦙" {
-			t.Errorf("expected émoji-🦙, got %v", result)
-		}
-	})
-
-	t.Run("filter accented char", func(t *testing.T) {
-		result := filterItems(items, "émoji")
-		if len(result) != 1 || result[0].Name != "émoji-🦙" {
-			t.Errorf("expected émoji-🦙, got %v", result)
-		}
-	})
-}
-
-// TestMultiSelectState_FilterReducesBelowHighlight verifies highlight resets when filter reduces list.
-func TestMultiSelectState_FilterReducesBelowHighlight(t *testing.T) {
-	items := []selectItem{
-		{Name: "apple"},
-		{Name: "banana"},
-		{Name: "cherry"},
-	}
-
-	s := newMultiSelectState(items, nil)
-	s.highlighted = 2 // Highlight "cherry"
-
-	// Type a filter that removes cherry
-	s.handleInput(eventChar, 'a')
-
-	if s.highlighted != 0 {
-		t.Errorf("expected highlighted=0 after filter, got %d", s.highlighted)
-	}
-}
-
-// TestMultiSelectState_EmptyItems verifies handling of empty item list.
-// Empty list should be handled gracefully.
-func TestMultiSelectState_EmptyItems(t *testing.T) {
-	s := newMultiSelectState([]selectItem{}, nil)
-
-	// Toggle should not panic on empty list
-	s.toggleItem()
-
-	if s.selectedCount() != 0 {
-		t.Errorf("expected 0 selected for empty list, got %d", s.selectedCount())
-	}
-
-	// Render should handle empty list
-	var buf bytes.Buffer
-	lineCount := renderMultiSelect(&buf, "Select:", s)
-	if lineCount == 0 {
-		t.Error("renderMultiSelect should produce output even for empty list")
-	}
-	if !strings.Contains(buf.String(), "no matches") {
-		t.Error("expected 'no matches' for empty list")
-	}
-}
-
-// TestSelectState_RenderWithDescriptions verifies rendering items with descriptions.
-func TestSelectState_RenderWithDescriptions(t *testing.T) {
-	items := []selectItem{
-		{Name: "item1", Description: "First item description"},
-		{Name: "item2", Description: ""},
-		{Name: "item3", Description: "Third item"},
-	}
-
-	s := newSelectState(items)
-	var buf bytes.Buffer
-	renderSelect(&buf, "Select:", s)
-
-	output := buf.String()
-	if !strings.Contains(output, "First item description") {
-		t.Error("expected description to be rendered")
-	}
-	if !strings.Contains(output, "item2") {
-		t.Error("expected item without description to be rendered")
-	}
-}
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -116,7 +116,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		Prompt:         ">>> ",
 		AltPrompt:      "... ",
 		Placeholder:    "Send a message (/? for help)",
-		AltPlaceholder: "Press Enter to send",
+		AltPlaceholder: `Use """ to end multi-line input`,
 	})
 	if err != nil {
 		return err
@@ -159,7 +159,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			sb.WriteString(before)
 			if !ok {
 				fmt.Fprintln(&sb)
-				scanner.Prompt.UseAlt = true
 				continue
 			}

--- a/convert/convert.go
+++ b/convert/convert.go
@@ -311,12 +311,6 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &deepseekocr{}
 	case "DeepseekV3ForCausalLM":
 		conv = &deepseek2Model{}
-	case "Glm4MoeLiteForCausalLM":
-		conv = &glm4MoeLiteModel{}
-	case "GlmOcrForConditionalGeneration":
-		conv = &glmOcrModel{}
-	case "Lfm2ForCausalLM":
-		conv = &lfm2Model{}
 	default:
 		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
--- a/convert/convert_glm4moelite.go
+++ b/convert/convert_glm4moelite.go
@@ -1,264 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"fmt"
-	"log/slog"
-	"regexp"
-	"strconv"
-	"strings"
-
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type glm4MoeLiteModel struct {
-	ModelParameters
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	HiddenSize            uint32  `json:"hidden_size"`
-	HiddenLayers          uint32  `json:"num_hidden_layers"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	RMSNormEPS            float32 `json:"rms_norm_eps"`
-
-	RopeTheta     float32 `json:"rope_theta"`
-	QKNopeHeadDim uint32  `json:"qk_nope_head_dim"`
-	QKRopeHeadDim uint32  `json:"qk_rope_head_dim"`
-	KVLoraRank    uint32  `json:"kv_lora_rank"`
-	QLoraRank     uint32  `json:"q_lora_rank"`
-	VHeadDim      uint32  `json:"v_head_dim"`
-
-	ExpertCount            uint32  `json:"n_routed_experts"`
-	ExpertSharedCount      uint32  `json:"n_shared_experts"`
-	ExpertIntermediateSize uint32  `json:"moe_intermediate_size"`
-	ExpertUsedCount        uint32  `json:"num_experts_per_tok"`
-	ExpertWeightsNorm      bool    `json:"norm_topk_prob"`
-	ExpertWeightsScale     float32 `json:"routed_scaling_factor"`
-
-	LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
-}
-
-func (p *glm4MoeLiteModel) KV(t *Tokenizer) KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "glm4moelite"
-	kv["general.type"] = "model"
-	kv["glm4moelite.block_count"] = p.HiddenLayers
-
-	numHeads := p.NumAttentionHeads
-	numKVHeads := p.NumKeyValueHeads
-
-	kv["glm4moelite.attention.head_count"] = numHeads
-	kv["glm4moelite.attention.head_count_kv"] = numKVHeads
-	kv["glm4moelite.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
-	kv["glm4moelite.attention.kv_lora_rank"] = p.KVLoraRank
-	kv["glm4moelite.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	kv["glm4moelite.attention.q_lora_rank"] = p.QLoraRank
-	kv["glm4moelite.attention.value_length"] = p.VHeadDim
-	kv["glm4moelite.context_length"] = p.MaxPositionEmbeddings
-	kv["glm4moelite.embedding_length"] = p.HiddenSize
-	kv["glm4moelite.expert_count"] = p.ExpertCount
-	kv["glm4moelite.expert_feed_forward_length"] = p.ExpertIntermediateSize
-	kv["glm4moelite.expert_shared_count"] = p.ExpertSharedCount
-
-	kv["glm4moelite.expert_gating_func"] = uint32(2)
-	kv["glm4moelite.expert_used_count"] = p.ExpertUsedCount
-	kv["glm4moelite.expert_weights_norm"] = p.ExpertWeightsNorm
-	kv["glm4moelite.expert_weights_scale"] = p.ExpertWeightsScale
-	kv["glm4moelite.feed_forward_length"] = p.IntermediateSize
-	kv["glm4moelite.leading_dense_block_count"] = p.LeadingDenseBlockCount
-
-	kv["glm4moelite.rope.dimension_count"] = p.QKRopeHeadDim
-	kv["glm4moelite.rope.freq_base"] = cmp.Or(p.RopeTheta, float32(1000000.0))
-
-	kv["glm4moelite.attention.key_length_mla"] = p.KVLoraRank + p.QKRopeHeadDim
-	kv["glm4moelite.attention.value_length_mla"] = p.KVLoraRank
-
-	kv["tokenizer.ggml.pre"] = "glm4"
-
-	return kv
-}
-
-func (p *glm4MoeLiteModel) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
-		"self_attn.kv_a_layernorm", "attn_kv_a_norm",
-		"self_attn.kv_b_proj", "attn_kv_b",
-		"self_attn.q_a_proj", "attn_q_a",
-		"self_attn.q_a_layernorm", "attn_q_a_norm",
-		"self_attn.q_b_proj", "attn_q_b",
-		"self_attn.o_proj", "attn_output",
-		"post_attention_layernorm", "ffn_norm",
-		"mlp.shared_experts.down_proj", "ffn_down_shexp",
-		"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
-		"mlp.shared_experts.up_proj", "ffn_up_shexp",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
-		"mlp.gate", "ffn_gate_inp",
-	}
-}
-
-// repackKVB extracts K or V from the combined KV_B tensor for MLA absorption.
-// K output row-major: [n_head, kv_lora_rank, qk_nope] -> GGML ne[]={qk_nope, kv_lora_rank, n_head}
-// V output row-major: [n_head, v_head, kv_lora_rank] -> GGML ne[]={kv_lora_rank, v_head, n_head}
-func (p *glm4MoeLiteModel) repackKVB(extractK bool, kvFirst bool, numHeads int) Repacker {
-	qkNope := int(p.QKNopeHeadDim)
-	vHeadDim := int(p.VHeadDim)
-	kvLoraRank := int(p.KVLoraRank)
-	kvPerHead := qkNope + vHeadDim
-
-	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
-		dims := make([]int, len(shape))
-		for i := range shape {
-			dims[i] = int(shape[i])
-		}
-
-		var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-		var err error
-
-		// Normalize to [n_head * (qk_nope + v_head), kv_lora_rank] layout
-		if kvFirst {
-			tt, err = tensor.Transpose(tt, 1, 0)
-			if err != nil {
-				return nil, err
-			}
-			tt = tensor.Materialize(tt)
-		}
-
-		// Reshape to [n_head, qk_nope + v_head, kv_lora_rank]
-		if err := tt.Reshape(numHeads, kvPerHead, kvLoraRank); err != nil {
-			return nil, err
-		}
-
-		if extractK {
-			// Slice K: [n_head, qk_nope, kv_lora_rank]
-			tt, err = tt.Slice(nil, tensor.S(0, qkNope), nil)
-			if err != nil {
-				return nil, err
-			}
-			tt = tensor.Materialize(tt)
-			// Transpose to [n_head, kv_lora_rank, qk_nope]
-			tt, err = tensor.Transpose(tt, 0, 2, 1)
-			if err != nil {
-				return nil, err
-			}
-			tt = tensor.Materialize(tt)
-		} else {
-			// Slice V: [n_head, v_head, kv_lora_rank] - already correct layout
-			tt, err = tt.Slice(nil, tensor.S(qkNope, kvPerHead), nil)
-			if err != nil {
-				return nil, err
-			}
-			tt = tensor.Materialize(tt)
-		}
-
-		if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
-			return nil, err
-		}
-		return native.VectorF32(tt.(*tensor.Dense))
-	}
-}
-
-func (p *glm4MoeLiteModel) Tensors(s []Tensor) (out []*ggml.Tensor) {
-	merges := make([]merge, p.HiddenLayers*3)
-	for i := range p.HiddenLayers {
-		merges[i*3+0] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
-		}
-		merges[i*3+1] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
-		}
-		merges[i*3+2] = merge{
-			fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
-			fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
-		}
-	}
-
-	skipLayer := func(n string, minValue uint32) bool {
-		re := regexp.MustCompile(`^blk\.(\d+)`)
-		matches := re.FindStringSubmatch(n)
-		if matches == nil {
-			return false
-		}
-
-		blkNum, err := strconv.Atoi(matches[1])
-		if err != nil {
-			return false
-		}
-
-		return uint32(blkNum) >= minValue
-	}
-
-	out, s = mergeTensors(s, merges...)
-	for _, t := range s {
-		// skip any additional layers (such as the Multi-Token Prediction layer)
-		if skipLayer(t.Name(), p.HiddenLayers) {
-			slog.Debug("skipping layer", "name", t.Name())
-			continue
-		}
-
-		// Split attn_kv_b into separate attn_k_b and attn_v_b for MLA absorption
-		if strings.HasSuffix(t.Name(), ".attn_kv_b.weight") {
-			qkNope := int(p.QKNopeHeadDim)
-			vHeadDim := int(p.VHeadDim)
-			kvLoraRank := int(p.KVLoraRank)
-			kvPerHead := qkNope + vHeadDim
-			numHeads := int(p.NumAttentionHeads)
-			kvFirst := true
-			if len(t.Shape()) == 2 {
-				switch {
-				case int(t.Shape()[0]) == kvLoraRank:
-					if kvPerHead > 0 && int(t.Shape()[1])%kvPerHead == 0 {
-						numHeads = int(t.Shape()[1]) / kvPerHead
-					}
-					kvFirst = true
-				case int(t.Shape()[1]) == kvLoraRank:
-					if kvPerHead > 0 && int(t.Shape()[0])%kvPerHead == 0 {
-						numHeads = int(t.Shape()[0]) / kvPerHead
-					}
-					kvFirst = false
-				default:
-					slog.Warn("glm4moelite: unexpected attn_kv_b layout", "name", t.Name(), "shape", t.Shape())
-				}
-			}
-
-			kTensor := t.Clone()
-			kTensor.SetRepacker(p.repackKVB(true, kvFirst, numHeads))
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(t.Name(), "attn_kv_b", "attn_k_b", 1),
-				Kind:     t.Kind(),
-				Shape:    []uint64{uint64(numHeads), uint64(kvLoraRank), uint64(qkNope)},
-				WriterTo: kTensor,
-			})
-
-			vTensor := t.Clone()
-			vTensor.SetRepacker(p.repackKVB(false, kvFirst, numHeads))
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(t.Name(), "attn_kv_b", "attn_v_b", 1),
-				Kind:     t.Kind(),
-				Shape:    []uint64{uint64(numHeads), uint64(vHeadDim), uint64(kvLoraRank)},
-				WriterTo: vTensor,
-			})
-			continue
-		}
-
-		out = append(out, &ggml.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-	return out
-}
--- a/convert/convert_glmocr.go
+++ b/convert/convert_glmocr.go
@@ -1,455 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"encoding/json"
-	"io/fs"
-	"log/slog"
-	"regexp"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/fs/ggml"
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-)
-
-// normalToNeoXRepacker creates a repacker that permutes Q/K weights from interleaved (LLaMA)
-// to NeoX ordering for compatibility with GGML's M-RoPE kernel.
-//
-// For weights: reshape [out, in] -> [n_heads, head_dim, in], permute rotary dims, reshape back
-// For biases: reshape [out] -> [n_heads, head_dim], permute rotary dims, reshape back
-func normalToNeoXRepacker(nHeads, headDim int, partialRotaryFactor float32) func(string, []float32, []uint64) ([]float32, error) {
-	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
-		rotaryDim := int(float32(headDim) * partialRotaryFactor)
-		if rotaryDim%2 != 0 {
-			rotaryDim = (rotaryDim / 2) * 2 // Round down to even
-		}
-
-		// Handle 1D (bias) or 2D (weight) tensors
-		is1D := len(shape) == 1
-		var inFeatures int
-		if is1D {
-			inFeatures = 1
-		} else {
-			inFeatures = int(shape[1])
-		}
-		outFeatures := int(shape[0])
-		nEffectiveHeads := outFeatures / headDim
-
-		if nEffectiveHeads != nHeads {
-			slog.Warn("normalToNeoX: unexpected head count", "effective", nEffectiveHeads, "expected", nHeads)
-		}
-
-		// Reshape to [n_heads, head_dim, in_features]
-		reshaped := make([]float32, len(data))
-		copy(reshaped, data)
-
-		// Permute the rotary dimensions: even indices first, then odd
-		// For each head, reorder [0,1,2,3,4,5...] to [0,2,4...,1,3,5...]
-		result := make([]float32, len(data))
-		halfRotary := rotaryDim / 2
-
-		for h := range nEffectiveHeads {
-			for f := range inFeatures {
-				for i := range halfRotary {
-					// Even dim (0, 2, 4, ...) -> position i
-					srcIdx := h*headDim*inFeatures + (2*i)*inFeatures + f
-					dstIdx := h*headDim*inFeatures + i*inFeatures + f
-					result[dstIdx] = reshaped[srcIdx]
-
-					// Odd dim (1, 3, 5, ...) -> position halfRotary + i
-					srcIdx = h*headDim*inFeatures + (2*i+1)*inFeatures + f
-					dstIdx = h*headDim*inFeatures + (halfRotary+i)*inFeatures + f
-					result[dstIdx] = reshaped[srcIdx]
-				}
-
-				// Non-rotary part: copy as-is
-				for i := rotaryDim; i < headDim; i++ {
-					srcIdx := h*headDim*inFeatures + i*inFeatures + f
-					result[srcIdx] = reshaped[srcIdx]
-				}
-			}
-		}
-
-		return result, nil
-	}
-}
-
-type glmOcrModel struct {
-	ModelParameters
-
-	TextConfig struct {
-		HiddenSize          uint32  `json:"hidden_size"`
-		IntermediateSize    uint32  `json:"intermediate_size"`
-		NumHiddenLayers     uint32  `json:"num_hidden_layers"`
-		NumAttentionHeads   uint32  `json:"num_attention_heads"`
-		NumKeyValueHeads    uint32  `json:"num_key_value_heads"`
-		HeadDim             uint32  `json:"head_dim"`
-		MaxPositionEmbed    uint32  `json:"max_position_embeddings"`
-		RMSNormEps          float32 `json:"rms_norm_eps"`
-		PartialRotaryFactor float32 `json:"partial_rotary_factor"`
-		RopeParameters      struct {
-			RopeType            string  `json:"rope_type"`
-			MRopeSection        []int32 `json:"mrope_section"`
-			RopeTheta           float32 `json:"rope_theta"`
-			PartialRotaryFactor float32 `json:"partial_rotary_factor"`
-		} `json:"rope_parameters"`
-	} `json:"text_config"`
-
-	VisionConfig struct {
-		HiddenSize        uint32  `json:"hidden_size"`
-		IntermediateSize  uint32  `json:"intermediate_size"`
-		Depth             uint32  `json:"depth"`
-		NumHeads          uint32  `json:"num_heads"`
-		ImageSize         uint32  `json:"image_size"`
-		PatchSize         uint32  `json:"patch_size"`
-		OutHiddenSize     uint32  `json:"out_hidden_size"`
-		RMSNormEps        float32 `json:"rms_norm_eps"`
-		SpatialMergeSize  uint32  `json:"spatial_merge_size"`
-		TemporalPatchSize uint32  `json:"temporal_patch_size"`
-	} `json:"vision_config"`
-
-	ImageStartTokenID uint32 `json:"image_start_token_id"`
-	ImageEndTokenID   uint32 `json:"image_end_token_id"`
-	VideoStartTokenID uint32 `json:"video_start_token_id"`
-	VideoEndTokenID   uint32 `json:"video_end_token_id"`
-	ImageTokenID      uint32 `json:"image_token_id"`
-	VideoTokenID      uint32 `json:"video_token_id"`
-
-	// Preprocessor config (preprocessor_config.json)
-	Preprocessor struct {
-		Size struct {
-			ShortestEdge uint32 `json:"shortest_edge"`
-			LongestEdge  uint32 `json:"longest_edge"`
-		} `json:"size"`
-		PatchSize         uint32    `json:"patch_size"`
-		TemporalPatchSize uint32    `json:"temporal_patch_size"`
-		MergeSize         uint32    `json:"merge_size"`
-		ImageMean         []float32 `json:"image_mean"`
-		ImageStd          []float32 `json:"image_std"`
-	} `json:"-"`
-}
-
-var _ ModelConverter = (*glmOcrModel)(nil)
-
-func (m *glmOcrModel) parseMore(fsys fs.FS) error {
-	bts, err := fs.ReadFile(fsys, "preprocessor_config.json")
-	if err != nil {
-		return err
-	}
-
-	return json.Unmarshal(bts, &m.Preprocessor)
-}
-
-func (m *glmOcrModel) KV(t *Tokenizer) KV {
-	kv := m.ModelParameters.KV(t)
-	kv["general.architecture"] = "glmocr"
-
-	// Text model parameters
-	kv["glmocr.block_count"] = cmp.Or(m.TextConfig.NumHiddenLayers, 16)
-	kv["glmocr.embedding_length"] = cmp.Or(m.TextConfig.HiddenSize, 1536)
-	kv["glmocr.attention.head_count"] = cmp.Or(m.TextConfig.NumAttentionHeads, 16)
-	kv["glmocr.attention.head_count_kv"] = cmp.Or(m.TextConfig.NumKeyValueHeads, 8)
-	headDim := cmp.Or(m.TextConfig.HeadDim, m.TextConfig.HiddenSize/m.TextConfig.NumAttentionHeads)
-	kv["glmocr.attention.key_length"] = headDim
-	kv["glmocr.attention.value_length"] = headDim
-	kv["glmocr.feed_forward_length"] = cmp.Or(m.TextConfig.IntermediateSize, 4608)
-	kv["glmocr.attention.layer_norm_rms_epsilon"] = cmp.Or(m.TextConfig.RMSNormEps, 1e-5)
-	kv["glmocr.context_length"] = cmp.Or(m.TextConfig.MaxPositionEmbed, 131072)
-	kv["glmocr.rope.freq_base"] = cmp.Or(m.TextConfig.RopeParameters.RopeTheta, float32(10000))
-	kv["glmocr.rope.partial_rotary_factor"] = cmp.Or(m.TextConfig.RopeParameters.PartialRotaryFactor, m.TextConfig.PartialRotaryFactor, float32(1.0))
-	if len(m.TextConfig.RopeParameters.MRopeSection) > 0 {
-		kv["glmocr.rope.mrope_section"] = m.TextConfig.RopeParameters.MRopeSection
-	}
-
-	// Vision model parameters
-	kv["glmocr.vision.block_count"] = cmp.Or(m.VisionConfig.Depth, 24)
-	kv["glmocr.vision.embedding_length"] = cmp.Or(m.VisionConfig.HiddenSize, 1024)
-	kv["glmocr.vision.attention.head_count"] = cmp.Or(m.VisionConfig.NumHeads, 16)
-	kv["glmocr.vision.image_size"] = cmp.Or(m.VisionConfig.ImageSize, 336)
-	kv["glmocr.vision.patch_size"] = cmp.Or(m.VisionConfig.PatchSize, m.Preprocessor.PatchSize, 14)
-	kv["glmocr.vision.spatial_merge_size"] = cmp.Or(m.VisionConfig.SpatialMergeSize, m.Preprocessor.MergeSize, 2)
-	kv["glmocr.vision.temporal_patch_size"] = cmp.Or(m.VisionConfig.TemporalPatchSize, m.Preprocessor.TemporalPatchSize, 2)
-	kv["glmocr.vision.out_hidden_size"] = cmp.Or(m.VisionConfig.OutHiddenSize, 1536)
-	kv["glmocr.vision.intermediate_size"] = cmp.Or(m.VisionConfig.IntermediateSize, 4096)
-	kv["glmocr.vision.attention.layer_norm_rms_epsilon"] = cmp.Or(m.VisionConfig.RMSNormEps, 1e-5)
-
-	// Preprocessor-derived image settings (min/max pixels and normalization)
-	// Note: fs.Config.keyValue() auto-prepends architecture prefix, so use full key
-	if m.Preprocessor.Size.ShortestEdge > 0 {
-		kv["glmocr.vision.min_pixels"] = m.Preprocessor.Size.ShortestEdge
-	}
-	if m.Preprocessor.Size.LongestEdge > 0 {
-		kv["glmocr.vision.max_pixels"] = m.Preprocessor.Size.LongestEdge
-	}
-	if len(m.Preprocessor.ImageMean) == 3 {
-		kv["glmocr.vision.image_mean"] = m.Preprocessor.ImageMean
-	}
-	if len(m.Preprocessor.ImageStd) == 3 {
-		kv["glmocr.vision.image_std"] = m.Preprocessor.ImageStd
-	}
-
-	// Special tokens
-	kv["glmocr.image_token_id"] = m.ImageTokenID
-	kv["glmocr.image_start_token_id"] = m.ImageStartTokenID
-	kv["glmocr.image_end_token_id"] = m.ImageEndTokenID
-	kv["glmocr.video_token_id"] = m.VideoTokenID
-	kv["glmocr.video_start_token_id"] = m.VideoStartTokenID
-	kv["glmocr.video_end_token_id"] = m.VideoEndTokenID
-
-	return kv
-}
-
-func (m *glmOcrModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	var out []*ggml.Tensor
-
-	// Skip layers >= num_hidden_layers (Multi-Token Prediction layers not needed for basic inference)
-	numLayers := int(cmp.Or(m.TextConfig.NumHiddenLayers, 16))
-	skipLayer := func(name string) bool {
-		// Tensor names are already replaced to "blk.N.xxx" format
-		re := regexp.MustCompile(`^blk\.(\d+)`)
-		matches := re.FindStringSubmatch(name)
-		if matches == nil {
-			return false
-		}
-		blkNum, err := strconv.Atoi(matches[1])
-		if err != nil {
-			return false
-		}
-		return blkNum >= numLayers
-	}
-
-	for _, t := range ts {
-		name := t.Name()
-
-		// Skip next-n prediction layers (layers >= num_hidden_layers)
-		if skipLayer(name) {
-			continue
-		}
-
-		// Split ffn_gate_up into separate gate and up projections
-		if strings.Contains(name, "ffn_gate_up") {
-			for t := range splitDim(t, 0,
-				split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_gate")},
-				split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_up")},
-			) {
-				out = append(out, t)
-			}
-			continue
-		}
-
-		if strings.HasSuffix(name, "patch_embd.weight") {
-			shape := t.Shape()
-			if len(shape) == 5 && shape[2] == 2 {
-				newShape := []uint64{shape[0], shape[1], shape[3], shape[4]}
-
-				t0 := t.Clone()
-				t0.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
-					dims := make([]int, len(shape))
-					for i := range shape {
-						dims[i] = int(shape[i])
-					}
-					var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-					tt, err := tt.Slice(nil, nil, tensor.S(0, 1), nil, nil)
-					if err != nil {
-						return nil, err
-					}
-					tt = tensor.Materialize(tt)
-					newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
-					if err := tt.Reshape(newDims...); err != nil {
-						return nil, err
-					}
-					if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
-						return nil, err
-					}
-					return native.VectorF32(tt.(*tensor.Dense))
-				})
-				out = append(out, &ggml.Tensor{
-					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
-					Kind:     t.Kind(),
-					Shape:    newShape,
-					WriterTo: t0,
-				})
-
-				t1 := t.Clone()
-				t1.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
-					dims := make([]int, len(shape))
-					for i := range shape {
-						dims[i] = int(shape[i])
-					}
-					var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-					tt, err := tt.Slice(nil, nil, tensor.S(1, 2), nil, nil)
-					if err != nil {
-						return nil, err
-					}
-					tt = tensor.Materialize(tt)
-					newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
-					if err := tt.Reshape(newDims...); err != nil {
-						return nil, err
-					}
-					if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
-						return nil, err
-					}
-					return native.VectorF32(tt.(*tensor.Dense))
-				})
-				out = append(out, &ggml.Tensor{
-					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_1.weight", 1),
-					Kind:     t.Kind(),
-					Shape:    newShape,
-					WriterTo: t1,
-				})
-
-				continue
-			}
-
-			if len(shape) == 4 {
-				out = append(out, &ggml.Tensor{
-					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
-					Kind:     t.Kind(),
-					Shape:    t.Shape(),
-					WriterTo: t,
-				})
-				continue
-			}
-
-			slog.Warn("glmocr: patch_embed weight has unexpected shape - not splitting", "shape", shape)
-			// Fall through to default handling
-		}
-
-		// Handle pre-split patch embedding weights
-		// Pattern 1: v.patch_embd.0.weight, v.patch_embd.1.weight -> patch_embd_0.weight, patch_embd_1.weight
-		// Pattern 2: v.patch_embd.weight.0, v.patch_embd.weight.1 -> patch_embd_0.weight, patch_embd_1.weight
-		if strings.Contains(name, "patch_embd.0.") {
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(name, "patch_embd.0.", "patch_embd_0.", 1),
-				Kind:     t.Kind(),
-				Shape:    t.Shape(),
-				WriterTo: t,
-			})
-			continue
-		}
-		if strings.Contains(name, "patch_embd.1.") {
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(name, "patch_embd.1.", "patch_embd_1.", 1),
-				Kind:     t.Kind(),
-				Shape:    t.Shape(),
-				WriterTo: t,
-			})
-			continue
-		}
-		// Handle .weight.0 and .weight.1 suffix patterns
-		if strings.HasSuffix(name, "patch_embd.weight.0") {
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(name, "patch_embd.weight.0", "patch_embd_0.weight", 1),
-				Kind:     t.Kind(),
-				Shape:    t.Shape(),
-				WriterTo: t,
-			})
-			continue
-		}
-		if strings.HasSuffix(name, "patch_embd.weight.1") {
-			out = append(out, &ggml.Tensor{
-				Name:     strings.Replace(name, "patch_embd.weight.1", "patch_embd_1.weight", 1),
-				Kind:     t.Kind(),
-				Shape:    t.Shape(),
-				WriterTo: t,
-			})
-			continue
-		}
-
-		// Permute Q/K weights for M-RoPE compatibility (interleaved -> NeoX ordering)
-		// GGML's M-RoPE kernel uses NeoX-style rotation, but GLM-OCR uses interleaved (LLaMA-style)
-		// We permute at conversion time so the weights work correctly with GGML's kernel
-		// This aligns Q/K rotary dimensions with GGML's NeoX-style rotation
-		if len(m.TextConfig.RopeParameters.MRopeSection) > 0 &&
-			strings.Contains(name, "blk.") && (strings.Contains(name, "attn_q.") || strings.Contains(name, "attn_k.")) {
-			// Get config values for permutation
-			nHeads := int(cmp.Or(m.TextConfig.NumAttentionHeads, 16))
-			nKVHeads := int(cmp.Or(m.TextConfig.NumKeyValueHeads, 8))
-			hiddenSize := int(cmp.Or(m.TextConfig.HiddenSize, 1536))
-			headDim := int(cmp.Or(m.TextConfig.HeadDim, uint32(hiddenSize/nHeads)))
-			partialRotaryFactor := cmp.Or(m.TextConfig.PartialRotaryFactor, m.TextConfig.RopeParameters.PartialRotaryFactor, float32(1.0))
-
-			// Use appropriate head count: nHeads for Q, nKVHeads for K
-			effectiveHeads := nHeads
-			if strings.Contains(name, "attn_k.") {
-				effectiveHeads = nKVHeads
-			}
-
-			permutedT := t.Clone()
-			permutedT.SetRepacker(normalToNeoXRepacker(effectiveHeads, headDim, partialRotaryFactor))
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    t.Shape(),
-				WriterTo: permutedT,
-			})
-			continue
-		}
-
-		out = append(out, &ggml.Tensor{
-			Name:     name,
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (m *glmOcrModel) Replacements() []string {
-	return []string{
-		// Vision encoder
-		"model.visual.patch_embed.proj_1", "v.patch_embd_1", // Second temporal split
-		"model.visual.patch_embed.proj", "v.patch_embd",
-		"model.visual.blocks", "v.blk",
-		"model.visual.post_layernorm", "v.post_ln",
-		"model.visual.downsample", "mm.patch_merger",
-
-		// Vision attention
-		"attn.qkv", "attn_qkv",
-		"attn.proj", "attn_out",
-		"attn.q_norm", "attn_q_norm",
-		"attn.k_norm", "attn_k_norm",
-
-		// Vision norms
-		"norm1", "ln1",
-		"norm2", "ln2",
-
-		// Vision MLP
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.up_proj", "ffn_up",
-		"mlp.down_proj", "ffn_down",
-
-		// Merger (multimodal projector)
-		"model.visual.merger.proj", "mm.model.fc",
-		"model.visual.merger.post_projection_norm", "mm.post_norm",
-		"model.visual.merger.gate_proj", "mm.gate",
-		"model.visual.merger.up_proj", "mm.up",
-		"model.visual.merger.down_proj", "mm.down",
-
-		// Language model
-		"model.language_model.embed_tokens", "token_embd",
-		"model.language_model.layers", "blk",
-		"model.language_model.norm", "output_norm",
-		"lm_head", "output",
-
-		// Language model attention
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_out",
-
-		// Language model norms
-		"input_layernorm", "attn_norm",
-		"post_attention_layernorm", "ffn_norm",
-		"post_self_attn_layernorm", "post_attn_norm",
-		"post_mlp_layernorm", "post_ffn_norm",
-
-		// Language model MLP (remove mlp. prefix so ffn_* names work)
-		"mlp.gate_up_proj", "ffn_gate_up",
-		"mlp.down_proj", "ffn_down",
-	}
-}
--- a/convert/convert_lfm2.go
+++ b/convert/convert_lfm2.go
@@ -1,100 +0,0 @@
-package convert
-
-import (
-	"slices"
-	"strings"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type lfm2Model struct {
-	ModelParameters
-	HiddenSize            uint32   `json:"hidden_size"`
-	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
-	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
-	IntermediateSize      uint32   `json:"intermediate_size"`
-	NumAttentionHeads     uint32   `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
-	RopeTheta             float32  `json:"rope_theta"`
-	NormEps               float32  `json:"norm_eps"`
-	ConvLCache            uint32   `json:"conv_L_cache"`
-	LayerTypes            []string `json:"layer_types"`
-	TieEmbedding          bool     `json:"tie_embedding"`
-}
-
-var _ ModelConverter = (*lfm2Model)(nil)
-
-func (p *lfm2Model) KV(t *Tokenizer) KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "lfm2"
-	kv["lfm2.vocab_size"] = p.VocabSize
-	kv["lfm2.block_count"] = p.NumHiddenLayers
-	kv["lfm2.embedding_length"] = p.HiddenSize
-	kv["lfm2.feed_forward_length"] = p.IntermediateSize
-	kv["lfm2.context_length"] = p.MaxPositionEmbeddings
-
-	// Build per-layer KV head count array based on layer_types
-	// (0 = shortconv layer, non-zero = attention layer with that many KV heads)
-	kvHeadCounts := make([]uint32, p.NumHiddenLayers)
-	for i := range p.NumHiddenLayers {
-		if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
-			kvHeadCounts[i] = p.NumKeyValueHeads
-		}
-	}
-
-	kv["lfm2.attention.head_count"] = p.NumAttentionHeads
-	kv["lfm2.attention.head_count_kv"] = kvHeadCounts
-	kv["lfm2.attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.layer_norm_rms_epsilon"] = p.NormEps
-	kv["lfm2.rope.freq_base"] = p.RopeTheta
-	kv["lfm2.shortconv.l_cache"] = p.ConvLCache
-
-	return kv
-}
-
-func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
-	var out []*ggml.Tensor
-
-	for _, t := range ts {
-		shape := t.Shape()
-
-		// Squeeze conv weights: [D, 1, K] -> [D, K]
-		if strings.HasSuffix(t.Name(), "shortconv.conv.weight") {
-			if len(shape) == 3 && shape[1] == 1 {
-				shape = []uint64{shape[0], shape[2]}
-			}
-		}
-
-		out = append(out, &ggml.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    slices.Clone(shape),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *lfm2Model) Replacements() []string {
-	return []string{
-		"model.embed_tokens", "token_embd",
-		"model.embedding_norm", "output_norm",
-		"model.layers", "blk",
-		"operator_norm", "attn_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.out_proj", "attn_output",
-		"self_attn.q_layernorm", "attn_q_norm",
-		"self_attn.k_layernorm", "attn_k_norm",
-		"conv.conv", "shortconv.conv",
-		"conv.in_proj", "shortconv.in_proj",
-		"conv.out_proj", "shortconv.out_proj",
-		"feed_forward.w1", "ffn_gate",
-		"feed_forward.w2", "ffn_down",
-		"feed_forward.w3", "ffn_up",
-		"ffn_norm", "ffn_norm",
-	}
-}
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -40,7 +40,6 @@ const (
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
 		strings.HasSuffix(t.name, ".bias") ||
-		strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.tile_position_embd.weight" ||
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -99,7 +99,6 @@ func (st safetensor) Kind() uint32 {
 	if st.dtype == "BF16" &&
 		!strings.HasPrefix(st.name, "v.") &&
 		!strings.HasPrefix(st.name, "s.") &&
-		!strings.HasPrefix(st.name, "mm.") &&
 		kind != tensorKindFP32 {
 		kind = tensorKindBF16
 	}
--- a/docs/api.md
+++ b/docs/api.md
@@ -16,7 +16,6 @@
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
 - [Version](#version)
- [Experimental: Image Generation](#image-generation-experimental)

 ## Conventions

@@ -59,15 +58,6 @@ Advanced parameters (optional):
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 - `context` (deprecated): the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory

-Experimental image generation parameters (for image generation models only):
-
-> [!WARNING]
-> These parameters are experimental and may change in future versions.
-
- `width`: width of the generated image in pixels
- `height`: height of the generated image in pixels
- `steps`: number of diffusion steps
-
 #### Structured outputs

 Structured outputs are supported by providing a JSON schema in the `format` parameter. The model will generate a response that matches the schema. See the [structured outputs](#request-structured-outputs) example below.
@@ -1877,55 +1867,3 @@ curl http://localhost:11434/api/version
  "version": "0.5.1"
 }
 ```
-
-## Experimental Features
-
-### Image Generation (Experimental)
-
-> [!WARNING]
-> Image generation is experimental and may change in future versions.
-
-Image generation is now supported through the standard `/api/generate` endpoint when using image generation models. The API automatically detects when an image generation model is being used.
-
-See the [Generate a completion](#generate-a-completion) section for the full API documentation. The experimental image generation parameters (`width`, `height`, `steps`) are documented there.
-
-#### Example
-
-##### Request
-
-```shell
-curl http://localhost:11434/api/generate -d '{
-  "model": "x/z-image-turbo",
-  "prompt": "a sunset over mountains",
-  "width": 1024,
-  "height": 768
-}'
-```
-
-##### Response (streaming)
-
-Progress updates during generation:
-
-```json
-{
-  "model": "x/z-image-turbo",
-  "created_at": "2024-01-15T10:30:00.000000Z",
-  "completed": 5,
-  "total": 20,
-  "done": false
-}
-```
-
-##### Final Response
-
-```json
-{
-  "model": "x/z-image-turbo",
-  "created_at": "2024-01-15T10:30:15.000000Z",
-  "image": "iVBORw0KGgoAAAANSUhEUg...",
-  "done": true,
-  "done_reason": "stop",
-  "total_duration": 15000000000,
-  "load_duration": 2000000000
-}
-```
--- a/docs/api/anthropic-compatibility.mdx
+++ b/docs/api/anthropic-compatibility.mdx
@@ -4,6 +4,16 @@ title: Anthropic compatibility

 Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code.

+## Recommended models
+
+For coding use cases, models like `glm-4.7:cloud`, `minimax-m2.1:cloud`, and `qwen3-coder` are recommended.
+
+Pull a model before use:
+```shell
+ollama pull qwen3-coder
+ollama pull glm-4.7:cloud
+```
+
 ## Usage

 ### Environment variables
@@ -11,9 +21,8 @@ Ollama provides compatibility with the [Anthropic Messages API](https://docs.ant
 To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:

 ```shell
-export ANTHROPIC_AUTH_TOKEN=ollama  # required but ignored
-export ANTHROPIC_API_KEY="" # required but ignored
 export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama  # required but ignored
 ```

 ### Simple `/v1/messages` example
@@ -235,55 +244,29 @@ curl -X POST http://localhost:11434/v1/messages \

 ## Using with Claude Code

-[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend. 
-
-### Recommended models
-
-For coding use cases, models like `glm-4.7`, `minimax-m2.1`, and `qwen3-coder` are recommended.
-
-Download a model before use:
+[Claude Code](https://code.claude.com/docs/en/overview) can be configured to use Ollama as its backend:

 ```shell
-ollama pull qwen3-coder
-```
-> Note: Qwen 3 coder is a 30B parameter model requiring at least 24GB of VRAM to run smoothly. More is required for longer context lengths. 
-
-```shell
-ollama pull glm-4.7:cloud
-```
-
-### Quick setup
-
-```shell
-ollama launch claude
-```
-
-This will prompt you to select a model, configure Claude Code automatically, and launch it. To configure without launching:
-
-```shell
-ollama launch claude --config
-```
-
-### Manual setup
-
-Set the environment variables and run Claude Code:
-
-```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY="" claude --model qwen3-coder
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
 ```

 Or set the environment variables in your shell profile:

 ```shell
-export ANTHROPIC_AUTH_TOKEN=ollama
 export ANTHROPIC_BASE_URL=http://localhost:11434
-export ANTHROPIC_API_KEY=""
+export ANTHROPIC_API_KEY=ollama
 ```

 Then run Claude Code with any Ollama model:

 ```shell
+# Local models
 claude --model qwen3-coder
+claude --model gpt-oss:20b
+
+# Cloud models
+claude --model glm-4.7:cloud
+claude --model minimax-m2.1:cloud
 ```

 ## Endpoints
--- a/docs/api/openai-compatibility.mdx
+++ b/docs/api/openai-compatibility.mdx
@@ -275,73 +275,6 @@ curl -X POST http://localhost:11434/v1/chat/completions \
 - [x] `dimensions`
 - [ ] `user`

-### `/v1/images/generations` (experimental)
-
-> Note: This endpoint is experimental and may change or be removed in future versions.
-
-Generate images using image generation models.
-
-<CodeGroup dropdown>
-
-```python images.py
-from openai import OpenAI
-
-client = OpenAI(
-    base_url='http://localhost:11434/v1/',
-    api_key='ollama',  # required but ignored
-)
-
-response = client.images.generate(
-    model='x/z-image-turbo',
-    prompt='A cute robot learning to paint',
-    size='1024x1024',
-    response_format='b64_json',
-)
-print(response.data[0].b64_json[:50] + '...')
-```
-
-```javascript images.js
-import OpenAI from "openai";
-
-const openai = new OpenAI({
-  baseURL: "http://localhost:11434/v1/",
-  apiKey: "ollama", // required but ignored
-});
-
-const response = await openai.images.generate({
-  model: "x/z-image-turbo",
-  prompt: "A cute robot learning to paint",
-  size: "1024x1024",
-  response_format: "b64_json",
-});
-
-console.log(response.data[0].b64_json.slice(0, 50) + "...");
-```
-
-```shell images.sh
-curl -X POST http://localhost:11434/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
-  "model": "x/z-image-turbo",
-  "prompt": "A cute robot learning to paint",
-  "size": "1024x1024",
-  "response_format": "b64_json"
-}'
-```
-
-</CodeGroup>
-
-#### Supported request fields
-
- [x] `model`
- [x] `prompt`
- [x] `size` (e.g. "1024x1024")
- [x] `response_format` (only `b64_json` supported)
- [ ] `n`
- [ ] `quality`
- [ ] `style`
- [ ] `user`
-
 ### `/v1/responses`

 > Note: Added in Ollama v0.13.3
--- a/docs/capabilities/web-search.mdx
+++ b/docs/capabilities/web-search.mdx
@@ -110,7 +110,7 @@ More Ollama [Python example](https://github.com/ollama/ollama-python/blob/main/e
 import { Ollama } from "ollama";

 const client = new Ollama();
-const results = await client.webSearch("what is ollama?");
+const results = await client.webSearch({ query: "what is ollama?" });
 console.log(JSON.stringify(results, null, 2));
 ```

@@ -213,7 +213,7 @@ models](https://ollama.com/models)\n\nAvailable for macOS, Windows, and Linux',
 import { Ollama } from "ollama";

 const client = new Ollama();
-const fetchResult = await client.webFetch("https://ollama.com");
+const fetchResult = await client.webFetch({ url: "https://ollama.com" });
 console.log(JSON.stringify(fetchResult, null, 2));
 ```

--- a/docs/cli.mdx
+++ b/docs/cli.mdx
@@ -8,47 +8,6 @@ title: CLI Reference
 ollama run gemma3
 ```

-### Launch integrations
-
-```
-ollama launch
-```
-
-Configure and launch external applications to use Ollama models. This provides an interactive way to set up and start integrations with supported apps.
-
-#### Supported integrations
-
- **OpenCode** - Open-source coding assistant
- **Claude Code** - Anthropic's agentic coding tool
- **Codex** - OpenAI's coding assistant
- **Droid** - Factory's AI coding agent
-
-#### Examples
-
-Launch an integration interactively:
-
-```
-ollama launch
-```
-
-Launch a specific integration:
-
-```
-ollama launch claude
-```
-
-Launch with a specific model:
-
-```
-ollama launch claude --model qwen3-coder
-```
-
-Configure without launching:
-
-```
-ollama launch droid --config
-```
-
 #### Multiline input

 For multiline input, you can wrap text with `"""`:
--- a/docs/cloud.mdx
+++ b/docs/cloud.mdx
@@ -3,6 +3,8 @@ title: Cloud
 sidebarTitle: Cloud
 ---

+<Info>Ollama's cloud is currently in preview.</Info>
+
 ## Cloud Models

 Ollama's cloud models are a new kind of model in Ollama that can run without a powerful GPU. Instead, cloud models are automatically offloaded to Ollama's cloud service while offering the same capabilities as local models, making it possible to keep using your local tools while running larger models that wouldn't fit on a personal computer.
--- a/docs/context-length.mdx
+++ b/docs/context-length.mdx
@@ -8,7 +8,7 @@ Context length is the maximum number of tokens that the model has access to in m
  The default context length in Ollama is 4096 tokens.
 </Note>

-Tasks which require large context like web search, agents, and coding tools should be set to at least 64000 tokens.
+Tasks which require large context like web search, agents, and coding tools should be set to at least 32000 tokens.

 ## Setting context length

@@ -24,7 +24,7 @@ Change the slider in the Ollama app under settings to your desired context lengt
 ### CLI
 If editing the context length for Ollama is not possible, the context length can also be updated when serving Ollama.  
 ```
-OLLAMA_CONTEXT_LENGTH=64000 ollama serve
+OLLAMA_CONTEXT_LENGTH=32000 ollama serve
 ```

 ### Check allocated context length and model offloading
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -71,10 +71,6 @@
    {
      "source": "/api",
      "destination": "/api/introduction"
-    },
-    {
-      "source": "/integrations/clawdbot",
-      "destination": "/integrations/openclaw"
    }
  ],
  "navigation": {
@@ -106,20 +102,16 @@
            "group": "Integrations",
            "pages": [
              "/integrations/claude-code",
-              "/integrations/cline",
-              "/integrations/openclaw",
+              "/integrations/vscode",
+              "/integrations/jetbrains",
              "/integrations/codex",
+              "/integrations/cline",
              "/integrations/droid",
              "/integrations/goose",
-              "/integrations/jetbrains",
-              "/integrations/marimo",
-              "/integrations/n8n",
-              "/integrations/onyx",
-              "/integrations/opencode",
+              "/integrations/zed",
              "/integrations/roo-code",
-              "/integrations/vscode",
-              "/integrations/xcode",
-              "/integrations/zed"
+              "/integrations/n8n",
+              "/integrations/xcode"
            ]
          },
          {
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -22,7 +22,7 @@ Please refer to the [GPU docs](./gpu).

 ## How can I specify the context window size?

-By default, Ollama uses a context window size of 4096 tokens.
+By default, Ollama uses a context window size of 2048 tokens.

 This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:

--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@@ -10,7 +10,6 @@ Check your compute compatibility to see if your card is supported:

 | Compute Capability | Family              | Cards                                                                                                                          |
 | ------------------ | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| 12.1               | NVIDIA              | `GB10 (DGX Spark)`                                                                                                             |
 | 12.0               | GeForce RTX 50xx    | `RTX 5060` `RTX 5060 Ti` `RTX 5070` `RTX 5070 Ti` `RTX 5080` `RTX 5090`                                                        |
 |                    | NVIDIA Professional | `RTX PRO 4000 Blackwell` `RTX PRO 4500 Blackwell` `RTX PRO 5000 Blackwell` `RTX PRO 6000 Blackwell`                            |
 | 9.0                | NVIDIA              | `H200` `H100`                                                                                                                  |
@@ -164,4 +163,4 @@ To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
 described in the [FAQ](faq#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
-by setting `GGML_VK_VISIBLE_DEVICES=-1` 
+by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/docs/images/marimo-add-model.png
+++ b/docs/images/marimo-add-model.png
--- a/docs/images/marimo-chat.png
+++ b/docs/images/marimo-chat.png
--- a/docs/images/marimo-code-completion.png
+++ b/docs/images/marimo-code-completion.png
--- a/docs/images/marimo-models.png
+++ b/docs/images/marimo-models.png
--- a/docs/images/marimo-settings.png
+++ b/docs/images/marimo-settings.png
--- a/docs/images/onyx-login.png
+++ b/docs/images/onyx-login.png
--- a/docs/images/onyx-ollama-form.png
+++ b/docs/images/onyx-ollama-form.png
--- a/docs/images/onyx-ollama-llm.png
+++ b/docs/images/onyx-ollama-llm.png
--- a/docs/images/onyx-query.png
+++ b/docs/images/onyx-query.png
--- a/docs/import.mdx
+++ b/docs/import.mdx
@@ -134,12 +134,22 @@ success

 ### Supported Quantizations

+- `q4_0`
+- `q4_1`
+- `q5_0`
+- `q5_1`
 - `q8_0`

 #### K-means Quantizations

+- `q3_K_S`
+- `q3_K_M`
+- `q3_K_L`
 - `q4_K_S`
 - `q4_K_M`
+- `q5_K_S`
+- `q5_K_M`
+- `q6_K`

 ## Sharing your model on ollama.com

--- a/docs/index.mdx
+++ b/docs/index.mdx
@@ -9,7 +9,7 @@ sidebarTitle: Welcome

 <CardGroup cols={2}>
  <Card title="Quickstart" icon="rocket" href="/quickstart">
-    Get up and running with your first model or integrate Ollama with your favorite tools
+    Get up and running with your first model
  </Card>
  <Card
    title="Download Ollama"
--- a/docs/integrations/claude-code.mdx
+++ b/docs/integrations/claude-code.mdx
@@ -2,12 +2,6 @@
 title: Claude Code
 ---

-Claude Code is Anthropic's agentic coding tool that can read, modify, and execute code in your working directory. 
-
-Open models can be used with Claude Code through Ollama's Anthropic-compatible API, enabling you to use models such as `glm-4.7`, `qwen3-coder`, `gpt-oss`. 
-
-![Claude Code with Ollama](https://files.ollama.com/claude-code.png)
-
 ## Install

 Install [Claude Code](https://code.claude.com/docs/en/overview):
@@ -26,50 +20,50 @@ irm https://claude.ai/install.ps1 | iex

 ## Usage with Ollama

-### Quick setup
-
-```shell
-ollama launch claude
-```
-
-To configure without launching:
-
-```shell
-ollama launch claude --config
-```
-
-### Manual setup
-
 Claude Code connects to Ollama using the Anthropic-compatible API.

 1. Set the environment variables:

 ```shell
-export ANTHROPIC_AUTH_TOKEN=ollama
-export ANTHROPIC_API_KEY=""
 export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama
 ```

 2. Run Claude Code with an Ollama model:

 ```shell
-claude --model gpt-oss:20b
+claude --model qwen3-coder
 ```

 Or run with environment variables inline:

 ```shell
-ANTHROPIC_AUTH_TOKEN=ollama ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY="" claude --model qwen3-coder 
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model qwen3-coder
 ```

-**Note:** Claude Code requires a large context window. We recommend at least 64k tokens. See the [context length documentation](/context-length) for how to adjust context length in Ollama.
+## Connecting to ollama.com
+
+1. Create an [API key](https://ollama.com/settings/keys) on ollama.com
+2. Set the environment variables:
+
+```shell
+export ANTHROPIC_BASE_URL=https://ollama.com
+export ANTHROPIC_API_KEY=<your-api-key>
+```
+
+3. Run Claude Code with a cloud model:
+
+```shell
+claude --model glm-4.7:cloud
+```

 ## Recommended Models

- `qwen3-coder` 
- `glm-4.7`
- `gpt-oss:20b`
- `gpt-oss:120b`
-
-Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
+### Cloud models
+- `glm-4.7:cloud` - High-performance cloud model
+- `minimax-m2.1:cloud` - Fast cloud model
+- `qwen3-coder:480b` - Large coding model

+### Local models
+- `qwen3-coder` - Excellent for coding tasks
+- `gpt-oss:20b` - Strong general-purpose model
--- a/docs/integrations/codex.mdx
+++ b/docs/integrations/codex.mdx
@@ -13,21 +13,7 @@ npm install -g @openai/codex

 ## Usage with Ollama

-<Note>Codex requires a larger context window. It is recommended to use a context window of at least 64k tokens.</Note>
-
-### Quick setup
-
-```
-ollama launch codex
-```
-
-To configure without launching:
-
-```shell
-ollama launch codex --config
-```
-
-### Manual setup
+<Note>Codex requires a larger context window. It is recommended to use a context window of at least 32K tokens.</Note>

 To use `codex` with Ollama, use the `--oss` flag:

--- a/docs/integrations/droid.mdx
+++ b/docs/integrations/droid.mdx
@@ -11,24 +11,10 @@ Install the [Droid CLI](https://factory.ai/):
 curl -fsSL https://app.factory.ai/cli | sh
 ```

-<Note>Droid requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
+<Note>Droid requires a larger context window. It is recommended to use a context window of at least 32K tokens. See [Context length](/context-length) for more information.</Note>

 ## Usage with Ollama

-### Quick setup
-
-```bash
-ollama launch droid
-```
-
-To configure without launching:
-
-```shell
-ollama launch droid --config
-```
-
-### Manual setup
-
 Add a local configuration block to `~/.factory/config.json`:

 ```json
@@ -87,4 +73,4 @@ Add the cloud configuration block to `~/.factory/config.json`:
   }
   ```

-Run `droid` in a new terminal to load the new settings.
+Run `droid` in a new terminal to load the new settings.
--- a/docs/integrations/marimo.mdx
+++ b/docs/integrations/marimo.mdx
@@ -1,73 +0,0 @@
---
-title: marimo
---
-
-## Install
-
-Install [marimo](https://marimo.io). You can use `pip` or `uv` for this. You 
-can also use `uv` to create a sandboxed environment for marimo by running:
-
-```
-uvx marimo edit --sandbox notebook.py
-```
-
-## Usage with Ollama
-
-1. In marimo, go to the user settings and go to the AI tab. From here
-you can find and configure Ollama as an AI provider. For local use you
-would typically point the base url to `http://localhost:11434/v1`.
-
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/marimo-settings.png" 
-    alt="Ollama settings in marimo"
-    width="50%"
-  />
-</div>
-
-2. Once the AI provider is set up, you can turn on/off specific AI models you'd like to access. 
-
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/marimo-models.png" 
-    alt="Selecting an Ollama model"
-    width="50%"
-  />
-</div>
-
-3. You can also add a model to the list of available models by scrolling to the bottom and using the UI there. 
-
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/marimo-add-model.png" 
-    alt="Adding a new Ollama model"
-    width="50%"
-  />
-</div>
-
-4. Once configured, you can now use Ollama for AI chats in marimo.
-
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/marimo-chat.png" 
-    alt="Configure code completion"
-    width="50%"
-  />
-</div>
-
-4. Alternatively, you can now use Ollama for **inline code completion** in marimo. This can be configured in the "AI Features" tab. 
-
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/marimo-code-completion.png" 
-    alt="Configure code completion"
-    width="50%"
-  />
-</div>
-
-
-## Connecting to ollama.com
-
-1. Sign in to ollama cloud via `ollama signin` 
-2. In the ollama model settings add a model that ollama hosts, like `gpt-oss:120b`.
-3. You can now refer to this model in marimo!
--- a/docs/integrations/onyx.mdx
+++ b/docs/integrations/onyx.mdx
@@ -1,63 +0,0 @@
---
-title: Onyx
---
-
-## Overview
-[Onyx](http://onyx.app/) is a self-hostable Chat UI that integrates with all Ollama models. Features include:
- Creating custom Agents
- Web search
- Deep Research
- RAG over uploaded documents and connected apps
- Connectors to applications like Google Drive, Email, Slack, etc.
- MCP and OpenAPI Actions support
- Image generation
- User/Groups management, RBAC, SSO, etc.
-
-Onyx can be deployed for single users or large organizations.
-
-## Install Onyx
-
-Deploy Onyx with the [quickstart guide](https://docs.onyx.app/deployment/getting_started/quickstart).
-
-<Info>
-Resourcing/scaling docs [here](https://docs.onyx.app/deployment/getting_started/resourcing).
-</Info>
-
-## Usage with Ollama 
-
-1. Login to your Onyx deployment (create an account first).
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/onyx-login.png" 
-    alt="Onyx Login Page"
-    width="75%"
-  />
-</div>
-2. In the set-up process select `Ollama` as the LLM provider.
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/onyx-ollama-llm.png" 
-    alt="Onyx Set Up Form"
-    width="75%"
-  />
-</div>
-3. Provide your **Ollama API URL** and select your models.
-<Note>If you're running Onyx in Docker, to access your computer's local network use `http://host.docker.internal` instead of `http://127.0.0.1`.</Note>
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/onyx-ollama-form.png" 
-    alt="Selecting Ollama Models"
-    width="75%"
-  />
-</div>
-
-You can also easily connect up Onyx Cloud with the `Ollama Cloud` tab of the setup.
-
-## Send your first query
-<div style={{ display: 'flex', justifyContent: 'center' }}>
-  <img 
-    src="/images/onyx-query.png" 
-    alt="Onyx Query Example"
-    width="75%"
-  />
-</div>
--- a/docs/integrations/openclaw.mdx
+++ b/docs/integrations/openclaw.mdx
@@ -1,50 +0,0 @@
---
-title: OpenClaw
---
-
-OpenClaw is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.
-
-## Install
-
-Install [OpenClaw](https://openclaw.ai/) 
-
-```bash
-npm install -g openclaw@latest
-```
-
-Then run the onboarding wizard:
-
-```bash
-openclaw onboard --install-daemon
-```
-
-<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
-
-## Usage with Ollama
-
-### Quick setup
-
-```bash
-ollama launch openclaw
-```
-
-<Note>Previously known as Clawdbot. `ollama launch clawdbot` still works as an alias.</Note>
-
-This configures OpenClaw to use Ollama and starts the gateway.
-If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 
-
-
-To configure without launching:
-
-```shell
-ollama launch openclaw --config
-```
-
-## Recommended Models
-
- `qwen3-coder`
- `glm-4.7`
- `gpt-oss:20b`
- `gpt-oss:120b`
-
-Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
--- a/docs/integrations/opencode.mdx
+++ b/docs/integrations/opencode.mdx
@@ -1,106 +0,0 @@
---
-title: OpenCode
---
-
-OpenCode is an open-source AI coding assistant that runs in your terminal.
-
-## Install
-
-Install the [OpenCode CLI](https://opencode.ai):
-
-```bash
-curl -fsSL https://opencode.ai/install | bash
-```
-
-<Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
-
-## Usage with Ollama
-
-### Quick setup
-
-```bash
-ollama launch opencode
-```
-
-To configure without launching:
-
-```shell
-ollama launch opencode --config
-```
-
-### Manual setup
-
-Add a configuration block to `~/.config/opencode/opencode.json`:
-
-```json
-{
-  "$schema": "https://opencode.ai/config.json",
-  "provider": {
-    "ollama": {
-      "npm": "@ai-sdk/openai-compatible",
-      "name": "Ollama",
-      "options": {
-        "baseURL": "http://localhost:11434/v1"
-      },
-      "models": {
-        "qwen3-coder": {
-          "name": "qwen3-coder"
-        }
-      }
-    }
-  }
-}
-```
-
-## Cloud Models
-
-`glm-4.7:cloud` is the recommended model for use with OpenCode.
-
-Add the cloud configuration to `~/.config/opencode/opencode.json`:
-
-```json
-{
-  "$schema": "https://opencode.ai/config.json",
-  "provider": {
-    "ollama": {
-      "npm": "@ai-sdk/openai-compatible",
-      "name": "Ollama",
-      "options": {
-        "baseURL": "http://localhost:11434/v1"
-      },
-      "models": {
-        "glm-4.7:cloud": {
-          "name": "glm-4.7:cloud"
-        }
-      }
-    }
-  }
-}
-```
-
-## Connecting to ollama.com
-
-1. Create an [API key](https://ollama.com/settings/keys) from ollama.com and export it as `OLLAMA_API_KEY`.
-2. Update `~/.config/opencode/opencode.json` to point to ollama.com:
-
-```json
-{
-  "$schema": "https://opencode.ai/config.json",
-  "provider": {
-    "ollama": {
-      "npm": "@ai-sdk/openai-compatible",
-      "name": "Ollama Cloud",
-      "options": {
-        "baseURL": "https://ollama.com/v1"
-      },
-      "models": {
-        "glm-4.7:cloud": {
-          "name": "glm-4.7:cloud"
-        }
-      }
-    }
-  }
-}
-```
-
-Run `opencode` in a new terminal to load the new settings.
--- a/docs/linux.mdx
+++ b/docs/linux.mdx
@@ -1,5 +1,5 @@
 ---
-title: Linux
+title: "Linux"
 ---

 ## Install
@@ -13,15 +13,14 @@ curl -fsSL https://ollama.com/install.sh | sh
 ## Manual install

 <Note>
-  If you are upgrading from a prior version, you should remove the old libraries
-  with `sudo rm -rf /usr/lib/ollama` first.
+  If you are upgrading from a prior version, you should remove the old libraries with `sudo rm -rf /usr/lib/ollama` first.
 </Note>

 Download and extract the package:

 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
-    | sudo tar x -C /usr
+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
+    | sudo tar zx -C /usr
 ```

 Start Ollama:
@@ -41,8 +40,8 @@ ollama -v
 If you have an AMD GPU, also download and extract the additional ROCm package:

 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
-    | sudo tar x -C /usr
+curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz \
+    | sudo tar zx -C /usr
 ```

 ### ARM64 install
@@ -50,8 +49,8 @@ curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tar.zst \
 Download and extract the ARM64-specific package:

 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-arm64.tar.zst \
-    | sudo tar x -C /usr
+curl -fsSL https://ollama.com/download/ollama-linux-arm64.tgz \
+    | sudo tar zx -C /usr
 ```

 ### Adding Ollama as a startup service (recommended)
@@ -113,11 +112,7 @@ sudo systemctl status ollama
 ```

 <Note>
-  While AMD has contributed the `amdgpu` driver upstream to the official linux
-  kernel source, the version is older and may not support all ROCm features. We
-  recommend you install the latest driver from
-  https://www.amd.com/en/support/linux-drivers for best support of your Radeon
-  GPU.
+  While AMD has contributed the `amdgpu` driver upstream to the official linux kernel source, the version is older and may not support all ROCm features. We recommend you install the latest driver from https://www.amd.com/en/support/linux-drivers for best support of your Radeon GPU.
 </Note>

 ## Customizing
@@ -146,8 +141,8 @@ curl -fsSL https://ollama.com/install.sh | sh
 Or by re-downloading Ollama:

 ```shell
-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
-    | sudo tar x -C /usr
+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
+    | sudo tar zx -C /usr
 ```

 ## Installing specific versions
@@ -196,4 +191,4 @@ Remove the downloaded models and Ollama service user and group:
 sudo userdel ollama
 sudo groupdel ollama
 sudo rm -r /usr/share/ollama
-```
+```
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -18,13 +18,13 @@ This quickstart will walk your through running your first model with Ollama. To
  <Tab title="CLI">
    Open a terminal and run the command:

-    ```sh
+    ```
    ollama run gemma3
    ```

  </Tab>
  <Tab title="cURL">
-    ```sh
+    ```
    ollama pull gemma3
    ```

@@ -45,13 +45,13 @@ This quickstart will walk your through running your first model with Ollama. To
  <Tab title="Python">
    Start by downloading a model:

-    ```sh
+    ```
    ollama pull gemma3
    ```

    Then install Ollama's Python library:

-    ```sh
+    ```
    pip install ollama
    ```

@@ -101,42 +101,3 @@ This quickstart will walk your through running your first model with Ollama. To
 </Tabs>

 See a full list of available models [here](https://ollama.com/models).
-
-## Coding 
-
-For coding use cases, we recommend using the `glm-4.7-flash` model. 
-
-Note: this model requires 23 GB of VRAM with 64000 tokens context length.
-```sh
-ollama pull glm-4.7-flash 
-```
-
-Alternatively, you can use a more powerful cloud model (with full context length):
-```sh
-ollama pull glm-4.7:cloud
-```
-
-Use `ollama launch` to quickly set up a coding tool with Ollama models:
-
-```sh
-ollama launch
-```
-
-### Supported integrations
-
- [OpenCode](/integrations/opencode) - Open-source coding assistant
- [Claude Code](/integrations/claude-code) - Anthropic's agentic coding tool
- [Codex](/integrations/codex) - OpenAI's coding assistant
- [Droid](/integrations/droid) - Factory's AI coding agent
-
-### Launch with a specific model
-
-```sh
-ollama launch claude --model glm-4.7-flash
-```
-
-### Configure without launching
-
-```sh
-ollama launch claude --config
-```
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -0,0 +1,3 @@
+# Troubleshooting
+
+For troubleshooting, see [https://docs.ollama.com/troubleshooting](https://docs.ollama.com/troubleshooting)
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -201,7 +201,7 @@ var (
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
-	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 	// Auth enables authentication between the Ollama client and server
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
@@ -290,7 +290,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -282,7 +282,7 @@ func TestVar(t *testing.T) {

 func TestContextLength(t *testing.T) {
 	cases := map[string]uint{
-		"":     0,
+		"":     4096,
 		"2048": 2048,
 	}

--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -269,9 +269,6 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
-		"glm4moelite",
-		"glmocr",
-		"lfm2",
 	}, kv.Architecture())
 }

@@ -859,10 +856,7 @@ func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"bert",
 		"gemma3",
-		"glm4moelite",
-		"glmocr",
 		"gptoss", "gpt-oss",
-		"lfm2",
 		"mistral3",
 		"olmo3",
 		"qwen3", "qwen3moe",
--- a/go.mod
+++ b/go.mod
@@ -15,8 +15,8 @@ require (
 	github.com/spf13/cobra v1.7.0
 	github.com/stretchr/testify v1.9.0
 	github.com/x448/float16 v0.8.4
-	golang.org/x/sync v0.17.0
-	golang.org/x/sys v0.37.0
+	golang.org/x/sync v0.19.0
+	golang.org/x/sys v0.39.0
 )

 require (
@@ -30,8 +30,8 @@ require (
 	github.com/tkrajina/typescriptify-golang-structs v0.2.0
 	github.com/wk8/go-ordered-map/v2 v2.1.8
 	golang.org/x/image v0.22.0
-	golang.org/x/mod v0.30.0
-	golang.org/x/tools v0.38.0
+	golang.org/x/mod v0.31.0
+	golang.org/x/tools v0.40.0
 	gonum.org/v1/gonum v0.15.0
 )

@@ -81,11 +81,11 @@ require (
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
 	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/crypto v0.43.0
-	golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
-	golang.org/x/net v0.46.0 // indirect
-	golang.org/x/term v0.36.0
-	golang.org/x/text v0.30.0
+	golang.org/x/crypto v0.46.0
+	golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93
+	golang.org/x/net v0.48.0 // indirect
+	golang.org/x/term v0.38.0
+	golang.org/x/text v0.32.0
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -233,16 +233,16 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
-golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
+golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
+golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
-golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa h1:t2QcU6V556bFjYgu4L6C+6VrCPyJZ+eyRsABUPs1mz4=
-golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk=
+golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93 h1:fQsdNF2N+/YewlRZiricy4P1iimyPKZ/xwniHj8Q2a0=
+golang.org/x/exp v0.0.0-20251219203646-944ab1f22d93/go.mod h1:EPRbTFwzwjXj9NpYyyrvenVh9Y+GFeEvMNh7Xuz7xgU=
 golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
@@ -264,8 +264,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
-golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
+golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
+golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -278,8 +278,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
-golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
+golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -289,8 +289,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
-golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -306,17 +306,17 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
-golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
-golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
+golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
+golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
-golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
+golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -330,8 +330,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
-golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
+golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -73,18 +73,13 @@ func manhattanDistance[V float32 | float64](v1, v2 []V) V {
 }

 func TestEmbedCosineDistanceCorrelation(t *testing.T) {
-	softTimeout, hardTimeout := getTimeouts(t)
-	ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	started := time.Now()
 	for _, model := range libraryEmbedModels {
 		t.Run(model, func(t *testing.T) {
-			if time.Since(started) > softTimeout {
-				t.Skip("skipping - soft timeout exceeded")
-			}
 			testCases := []struct {
 				a string
 				b string
@@ -494,19 +489,14 @@ func TestEmbedTruncation(t *testing.T) {

 // TestEmbedLargeInput tests that embedding models can handle large inputs that would exceed typical batch sizes.
 func TestEmbedLargeInput(t *testing.T) {
-	softTimeout, hardTimeout := getTimeouts(t)
-	ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	started := time.Now()
 	for _, model := range libraryEmbedModels {
 		model := model
 		t.Run(model, func(t *testing.T) {
-			if time.Since(started) > softTimeout {
-				t.Skip("skipping - soft timeout exceeded")
-			}
 			mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
 			defer mcancel()

--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -1,148 +0,0 @@
-//go:build integration
-
-package integration
-
-import (
-	"context"
-	"encoding/base64"
-	"fmt"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestImageGeneration(t *testing.T) {
-	skipUnderMinVRAM(t, 8)
-
-	type testCase struct {
-		imageGenModel string
-		visionModel   string
-		prompt        string
-		expectedWords []string
-	}
-
-	testCases := []testCase{
-		{
-			imageGenModel: "jmorgan/z-image-turbo",
-			visionModel:   "llama3.2-vision",
-			prompt:        "A cartoon style llama flying like a superhero through the air with clouds in the background",
-			expectedWords: []string{"llama", "flying", "cartoon", "cloud", "sky", "superhero", "air", "animal", "camelid"},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("%s->%s", tc.imageGenModel, tc.visionModel), func(t *testing.T) {
-			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
-			defer cancel()
-
-			client, _, cleanup := InitServerConnection(ctx, t)
-			defer cleanup()
-
-			// Pull both models
-			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
-				t.Fatalf("failed to pull image gen model: %v", err)
-			}
-			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
-				t.Fatalf("failed to pull vision model: %v", err)
-			}
-
-			// Generate the image
-			t.Logf("Generating image with prompt: %s", tc.prompt)
-			imageBase64, err := generateImage(ctx, client, tc.imageGenModel, tc.prompt)
-			if err != nil {
-				if strings.Contains(err.Error(), "image generation not available") {
-					t.Skip("Target system does not support image generation")
-				} else if strings.Contains(err.Error(), "executable file not found in") { // Windows pattern, not yet supported
-					t.Skip("Windows does not support image generation yet")
-				} else if strings.Contains(err.Error(), "CUDA driver version is insufficient") {
-					t.Skip("Driver is too old")
-				} else if strings.Contains(err.Error(), "insufficient memory for image generation") {
-					t.Skip("insufficient memory for image generation")
-				} else if strings.Contains(err.Error(), "error while loading shared libraries: libcuda.so.1") { // AMD GPU or CPU
-					t.Skip("CUDA GPU is not available")
-				} else if strings.Contains(err.Error(), "ollama-mlx: no such file or directory") {
-					// most likely linux arm - not supported yet
-					t.Skip("unsupported architecture")
-				}
-				t.Fatalf("failed to generate image: %v", err)
-			}
-
-			imageData, err := base64.StdEncoding.DecodeString(imageBase64)
-			if err != nil {
-				t.Fatalf("failed to decode image: %v", err)
-			}
-			t.Logf("Generated image: %d bytes", len(imageData))
-
-			// Preload vision model and check GPU loading
-			err = client.Generate(ctx, &api.GenerateRequest{Model: tc.visionModel}, func(response api.GenerateResponse) error { return nil })
-			if err != nil {
-				t.Fatalf("failed to load vision model: %v", err)
-			}
-
-			// Use vision model to describe the image
-			chatReq := api.ChatRequest{
-				Model: tc.visionModel,
-				Messages: []api.Message{
-					{
-						Role:    "user",
-						Content: "Describe this image in detail. What is shown? What style is it? What is the main subject doing?",
-						Images:  []api.ImageData{imageData},
-					},
-				},
-				Stream: &stream,
-				Options: map[string]any{
-					"seed":        42,
-					"temperature": 0.0,
-				},
-			}
-
-			// Verify the vision model's response contains expected keywords
-			response := DoChat(ctx, t, client, chatReq, tc.expectedWords, 240*time.Second, 30*time.Second)
-			if response != nil {
-				t.Logf("Vision model response: %s", response.Content)
-
-				// Additional detailed check for keywords
-				content := strings.ToLower(response.Content)
-				foundWords := []string{}
-				missingWords := []string{}
-				for _, word := range tc.expectedWords {
-					if strings.Contains(content, word) {
-						foundWords = append(foundWords, word)
-					} else {
-						missingWords = append(missingWords, word)
-					}
-				}
-				t.Logf("Found keywords: %v", foundWords)
-				if len(missingWords) > 0 {
-					t.Logf("Missing keywords (at least one was found so test passed): %v", missingWords)
-				}
-			}
-		})
-	}
-}
-
-// generateImage calls the Ollama API to generate an image and returns the base64 image data
-func generateImage(ctx context.Context, client *api.Client, model, prompt string) (string, error) {
-	var imageBase64 string
-
-	err := client.Generate(ctx, &api.GenerateRequest{
-		Model:  model,
-		Prompt: prompt,
-	}, func(resp api.GenerateResponse) error {
-		if resp.Image != "" {
-			imageBase64 = resp.Image
-		}
-		return nil
-	})
-	if err != nil {
-		return "", fmt.Errorf("failed to generate image: %w", err)
-	}
-
-	if imageBase64 == "" {
-		return "", fmt.Errorf("no image data in response")
-	}
-
-	return imageBase64, nil
-}
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -21,10 +21,9 @@ func testPropsMap(m map[string]api.ToolProperty) *api.ToolPropertiesMap {
 }

 func TestAPIToolCalling(t *testing.T) {
-	initialTimeout := 90 * time.Second
-	streamTimeout := 90 * time.Second
-	softTimeout, hardTimeout := getTimeouts(t)
-	ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
+	initialTimeout := 60 * time.Second
+	streamTimeout := 60 * time.Second
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
 	defer cancel()

 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -48,12 +47,8 @@ func TestAPIToolCalling(t *testing.T) {
 		"granite3.3":    7,
 	}

-	started := time.Now()
 	for _, model := range libraryToolsModels {
 		t.Run(model, func(t *testing.T) {
-			if time.Since(started) > softTimeout {
-				t.Skip("skipping - soft timeout exceeded")
-			}
 			if v, ok := minVRAM[model]; ok {
 				skipUnderMinVRAM(t, v)
 			}
@@ -136,7 +131,7 @@ func TestAPIToolCalling(t *testing.T) {
 					t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather")
 				}

-				if _, ok := lastToolCall.Function.Arguments.Get("location"); !ok {
+				if _, ok := lastToolCall.Function.Arguments["location"]; !ok {
 					t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments.String())
 				}
 			case <-ctx.Done():
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -38,7 +38,6 @@ var (

 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
-		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
 		"gpt-oss:20b",
@@ -144,7 +143,6 @@ var (
 		"granite3.3",
 		"hermes3",
 		"internlm2",
-		"lfm2.5-thinking",
 		"llama-guard3",
 		"llama-pro",
 		"llama2-chinese",
@@ -265,7 +263,6 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
-		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
 		"gpt-oss:120b",
--- a/llama/README.md
+++ b/llama/README.md
@@ -14,28 +14,25 @@ make -f Makefile.sync apply-patches

 ### Updating Base Commit

-To update to a new base commit:
+**Pin to new base commit**

-1. **Update FETCH_HEAD** in `Makefile.sync` to the new commit hash.
+To change the base commit, update `FETCH_HEAD` in Makefile.sync.

-2. **Check for upstreamed patches**: Before applying, review if any patches have been merged upstream. Remove those patches from `./patches/` to avoid conflicts.
+When updating to a newer base commit, the existing patches may not apply cleanly and require manual merge resolution.

-3. **Apply patches**:
-   ```shell
-   make -f Makefile.sync apply-patches
-   ```
+Start by applying the patches. If any of the patches have conflicts, the `git am` will stop at the first failure.

-4. **Resolve conflicts** (if any): When `git am` fails on a patch:
-   - Fix conflicts in `./vendor/`
-   - Stage the resolved files: `git -C llama/vendor add <file>`
-   - Continue: `git -C llama/vendor am --continue`
-   - Re-run: `make -f Makefile.sync apply-patches`
-   - Repeat until all patches are applied.
+```shell
+make -f Makefile.sync apply-patches
+```

-5. **Regenerate patches and sync**:
-   ```shell
-   make -f Makefile.sync format-patches sync
-   ```
+If there are conflicts, you will see an error message. Resolve the conflicts in `./vendor/`, and continue the patch series with `git am --continue` and rerun `make -f Makefile.sync apply-patches`. Repeat until all patches are successfully applied.
+
+Once all patches are applied, commit the changes to the tracking repository.
+
+```shell
+make -f Makefile.sync format-patches sync
+```

 ### Generating Patches

--- a/llama/build-info.cpp
+++ b/llama/build-info.cpp
@@ -1,4 +1,4 @@
 int LLAMA_BUILD_NUMBER = 0;
-char const *LLAMA_COMMIT = "a5bb8ba4c50257437630c136210396810741bbf7";
+char const *LLAMA_COMMIT = "ec98e2002";
 char const *LLAMA_COMPILER = "";
 char const *LLAMA_BUILD_TARGET = "";
--- a/llama/llama.cpp/common/common.cpp
+++ b/llama/llama.cpp/common/common.cpp
@@ -251,7 +251,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
        case GGML_SCHED_PRIO_REALTIME: p = -20; break;
    }

-    if (setpriority(PRIO_PROCESS, 0, p) != 0) {
+    if (!setpriority(PRIO_PROCESS, 0, p)) {
        LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
        return false;
    }
@@ -1078,15 +1078,12 @@ struct common_init_result::impl {
    impl() = default;
    ~impl() = default;

-    // note: the order in which model, context, etc. are declared matters because their destructors will be called bottom-to-top
-
    llama_model_ptr   model;
    llama_context_ptr context;

    std::vector<llama_adapter_lora_ptr> lora;

    std::vector<common_sampler_ptr> samplers;
-    std::vector<llama_sampler_seq_config> samplers_seq_config;
 };

 common_init_result::common_init_result(common_params & params) :
@@ -1095,9 +1092,9 @@ common_init_result::common_init_result(common_params & params) :
    auto cparams = common_context_params_to_llama(params);

    if (params.fit_params) {
-        LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__);
+        LOG_INF("%s: fitting params to device memory, to report bugs during this step use -fit off (or --verbose if you can't)\n", __func__);
        llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
-            params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target.data(), params.fit_params_min_ctx,
+            params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
            params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
    }

@@ -1110,25 +1107,6 @@ common_init_result::common_init_result(common_params & params) :

    const llama_vocab * vocab = llama_model_get_vocab(model);

-    // load and optionally apply lora adapters (must be loaded before context creation)
-    for (auto & la : params.lora_adapters) {
-        llama_adapter_lora_ptr lora;
-        lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
-        if (lora == nullptr) {
-            LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str());
-            pimpl->model.reset(model);
-            return;
-        }
-
-        char buf[1024];
-        la.ptr = lora.get();
-        llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
-        la.task_name = buf;
-        llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
-        la.prompt_prefix = buf;
-        pimpl->lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
-    }
-
    // updates params.sampling
    // TODO: fix naming
    common_init_sampler_from_model(model, params.sampling);
@@ -1163,18 +1141,10 @@ common_init_result::common_init_result(common_params & params) :
    //    params.sampling.dry_penalty_last_n = llama_n_ctx(lctx);
    //}

-    // init the backend samplers as part of the context creation
    pimpl->samplers.resize(cparams.n_seq_max);
-    pimpl->samplers_seq_config.resize(cparams.n_seq_max);

    for (int i = 0; i < (int) cparams.n_seq_max; ++i) {
        pimpl->samplers[i].reset(common_sampler_init(model, params.sampling));
-        pimpl->samplers_seq_config[i] = { i, common_sampler_get(pimpl->samplers[i].get()) };
-    }
-
-    if (params.sampling.backend_sampling) {
-        cparams.samplers   = pimpl->samplers_seq_config.data();
-        cparams.n_samplers = pimpl->samplers_seq_config.size();
    }

    llama_context * lctx = llama_init_from_model(model, cparams);
@@ -1198,12 +1168,6 @@ common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
    return pimpl->samplers[seq_id].get();
 }

-void common_init_result::reset_samplers() {
-    for (int i = 0; i < (int) pimpl->samplers.size(); ++i) {
-        llama_sampler_reset(common_sampler_get(pimpl->samplers[i].get()));
-    }
-}
-
 std::vector<llama_adapter_lora_ptr> & common_init_result::lora() {
    return pimpl->lora;
 }
@@ -1279,6 +1243,24 @@ common_init_result_ptr common_init_from_params(common_params & params) {
        }
    }

+    // load and optionally apply lora adapters
+    for (auto & la : params.lora_adapters) {
+        llama_adapter_lora_ptr lora;
+        lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
+        if (lora == nullptr) {
+            LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+            return res;
+        }
+
+        char buf[1024];
+        la.ptr = lora.get();
+        llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
+        la.task_name = buf;
+        llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
+        la.prompt_prefix = buf;
+        res->lora().emplace_back(std::move(lora)); // copy to list of loaded adapters
+    }
+
    if (!params.lora_init_without_apply) {
        common_set_adapter_lora(lctx, params.lora_adapters);
    }
@@ -1319,9 +1301,6 @@ common_init_result_ptr common_init_from_params(common_params & params) {
        llama_synchronize(lctx);
        llama_perf_context_reset(lctx);
        llama_set_warmup(lctx, false);
-
-        // reset samplers to reset RNG state after warmup to the seeded state
-        res->reset_samplers();
    }

    return res;
@@ -1360,12 +1339,14 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
        mparams.devices = params.devices.data();
    }

-    mparams.n_gpu_layers    = params.n_gpu_layers;
+    if (params.n_gpu_layers != -1) {
+        mparams.n_gpu_layers = params.n_gpu_layers;
+    }
+
    mparams.main_gpu        = params.main_gpu;
    mparams.split_mode      = params.split_mode;
    mparams.tensor_split    = params.tensor_split;
    mparams.use_mmap        = params.use_mmap;
-    mparams.use_direct_io   = params.use_direct_io;
    mparams.use_mlock       = params.use_mlock;
    mparams.check_tensors   = params.check_tensors;
    mparams.use_extra_bufts = !params.no_extra_bufts;
--- a/llama/llama.cpp/common/common.h
+++ b/llama/llama.cpp/common/common.h
@@ -57,8 +57,6 @@ extern const char * LLAMA_COMMIT;
 extern const char * LLAMA_COMPILER;
 extern const char * LLAMA_BUILD_TARGET;

-const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
-
 struct common_control_vector_load_info;

 //
@@ -82,8 +80,6 @@ int32_t cpu_get_num_math();
 //

 enum llama_example {
-    LLAMA_EXAMPLE_BATCHED,
-    LLAMA_EXAMPLE_DEBUG,
    LLAMA_EXAMPLE_COMMON,
    LLAMA_EXAMPLE_SPECULATIVE,
    LLAMA_EXAMPLE_COMPLETION,
@@ -121,7 +117,6 @@ enum common_sampler_type {
    COMMON_SAMPLER_TYPE_INFILL      = 9,
    COMMON_SAMPLER_TYPE_PENALTIES   = 10,
    COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
-    COMMON_SAMPLER_TYPE_ADAPTIVE_P  = 12,
 };

 // dimensionality reduction methods, used by cvector-generator
@@ -169,34 +164,32 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler

-    int32_t n_prev             = 64;     // number of previous tokens to remember
-    int32_t n_probs            = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep           = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k              = 40;     // <= 0 to use vocab size
-    float   top_p              = 0.95f;  // 1.0 = disabled
-    float   min_p              = 0.05f;  // 0.0 = disabled
-    float   xtc_probability    = 0.00f;  // 0.0 = disabled
-    float   xtc_threshold      = 0.10f;  // > 0.5 disables XTC
-    float   typ_p              = 1.00f;  // typical_p, 1.0 = disabled
-    float   temp               = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range     = 0.00f;  // 0.0 = disabled
-    float   dynatemp_exponent  = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n     = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat     = 1.00f;  // 1.0 = disabled
-    float   penalty_freq       = 0.00f;  // 0.0 = disabled
-    float   penalty_present    = 0.00f;  // 0.0 = disabled
-    float   dry_multiplier     = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base           = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length = 2;      // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    float   adaptive_target    = -1.0f;  // select tokens near this probability (valid range 0.0 to 1.0; negative = disabled)
-    float   adaptive_decay     = 0.90f;  // EMA decay for adaptation; history ≈ 1/(1-decay) tokens (0.0 - 0.99)
-    int32_t mirostat           = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma        = -1.00f; // -1.0 = disabled
-    float   mirostat_tau       = 5.00f;  // target entropy
-    float   mirostat_eta       = 0.10f;  // learning rate
+    int32_t n_prev             = 64;    // number of previous tokens to remember
+    int32_t n_probs            = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep           = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k              = 40;    // <= 0 to use vocab size
+    float   top_p              = 0.95f; // 1.0 = disabled
+    float   min_p              = 0.05f; // 0.0 = disabled
+    float   xtc_probability    = 0.00f; // 0.0 = disabled
+    float   xtc_threshold      = 0.10f; // > 0.5 disables XTC
+    float   typ_p              = 1.00f; // typical_p, 1.0 = disabled
+    float   temp               = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range     = 0.00f; // 0.0 = disabled
+    float   dynatemp_exponent  = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat     = 1.00f; // 1.0 = disabled
+    float   penalty_freq       = 0.00f; // 0.0 = disabled
+    float   penalty_present    = 0.00f; // 0.0 = disabled
+    float   dry_multiplier     = 0.0f;  // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base           = 1.75f; // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length = 2;     // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n = -1;    // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    int32_t mirostat           = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma        = -1.00f;// -1.0 = disabled
+    float   mirostat_tau       = 5.00f; // target entropy
+    float   mirostat_eta       = 0.10f; // learning rate
    bool    ignore_eos         = false;
-    bool    no_perf            = false;  // disable performance metrics
+    bool    no_perf            = false; // disable performance metrics
    bool    timing_per_token   = false;

    uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
@@ -223,8 +216,6 @@ struct common_params_sampling {
    std::vector<llama_logit_bias> logit_bias;     // logit biases to apply
    std::vector<llama_logit_bias> logit_bias_eog; // pre-calculated logit biases for EOG tokens

-    bool backend_sampling = false;
-
    bool has_logit_bias() const {
        return !logit_bias.empty();
    }
@@ -286,7 +277,6 @@ struct common_params_diffusion {
 };

 // reasoning API response format (not to be confused as chat template's reasoning format)
-// only used by server
 enum common_reasoning_format {
    COMMON_REASONING_FORMAT_NONE,
    COMMON_REASONING_FORMAT_AUTO,            // Same as deepseek, using `message.reasoning_content`
@@ -339,14 +329,12 @@ struct common_params {
    // offload params
    std::vector<ggml_backend_dev_t> devices; // devices to use for offloading

-    int32_t n_gpu_layers       = -1;   // number of layers to store in VRAM, -1 is auto, <= -2 is all
-    int32_t main_gpu           = 0;    // the GPU that is used for scratch and small tensors
-    float   tensor_split[128]  = {0};  // how split tensors should be distributed across GPUs
-    bool    fit_params         = true; // whether to fit unset model/context parameters to free device memory
-    int32_t fit_params_min_ctx = 4096; // minimum context size to set when trying to reduce memory use
-
-    // margin per device in bytes for fitting parameters to free memory:
-    std::vector<size_t> fit_params_target = std::vector<size_t>(llama_max_devices(), 1024 * 1024*1024);
+    int32_t n_gpu_layers       = -1;               // number of layers to store in VRAM (-1 - use default)
+    int32_t main_gpu           = 0;                // the GPU that is used for scratch and small tensors
+    float   tensor_split[128]  = {0};              // how split tensors should be distributed across GPUs
+    bool    fit_params         = true;             // whether to fit unset model/context parameters to free device memory
+    size_t  fit_params_target  = 1024 * 1024*1024; // margin per device in bytes for fitting parameters to free memory
+    int32_t fit_params_min_ctx = 4096;             // minimum context size to set when trying to reduce memory use

    enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs

@@ -382,11 +370,6 @@ struct common_params {
    std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding          // NOLINT
    std::string logits_file          = ""; // file for saving *all* logits                                  // NOLINT

-    // llama-debug specific options
-    std::string logits_output_dir = "data"; // directory for saving logits output files                     // NOLINT
-    bool        save_logits       = false;  // whether to save logits to files                              // NOLINT
-    std::vector<std::string> tensor_filter; // filter tensor names for debug output (regex)                 // NOLINT
-
    std::vector<std::string> in_files;   // all input files
    std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
    std::vector<llama_model_kv_override> kv_overrides;
@@ -437,8 +420,7 @@ struct common_params {
    bool kv_unified        = false; // enable unified KV cache

    bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
-    bool use_mmap          = true;  // enable mmap to use filesystem cache
-    bool use_direct_io     = true;  // read from disk without buffering for faster model loading
+    bool use_mmap          = true;  // use mmap for faster loads
    bool use_mlock         = false; // use mlock to keep model in memory
    bool verbose_prompt    = false; // print prompt tokens before generation
    bool display_prompt    = true;  // print prompt before generation
@@ -482,7 +464,6 @@ struct common_params {
    int32_t timeout_write     = timeout_read; // http write timeout in seconds
    int32_t n_threads_http    = -1;           // number of threads to process HTTP requests (TODO: support threadpool)
    int32_t n_cache_reuse     = 0;            // min chunk size to reuse from the cache via KV shifting
-    bool    cache_prompt      = true;         // whether to enable prompt caching
    int32_t n_ctx_checkpoints = 8;            // max number of context checkpoints per slot
    int32_t cache_ram_mib     = 8192;         // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.

@@ -494,8 +475,7 @@ struct common_params {
    bool enable_chat_template = true;
    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
    int reasoning_budget = -1;
-    bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
-    int sleep_idle_seconds = -1;   // if >0, server will sleep after this many seconds of idle time
+    bool prefill_assistant = true;                                                                          // if true, any trailing assistant message will be prefilled into the response

    std::vector<std::string> api_keys;

@@ -504,11 +484,8 @@ struct common_params {

    std::map<std::string, std::string> default_template_kwargs;

-    // webui configs
-    bool webui = true;
-    std::string webui_config_json;
-
    // "advanced" endpoints are disabled by default for better security
+    bool webui            = true;
    bool endpoint_slots   = true;
    bool endpoint_props   = false; // only control POST requests, not GET
    bool endpoint_metrics = false;
@@ -708,9 +685,7 @@ struct common_init_result {

    llama_model * model();
    llama_context * context();
-
    common_sampler * sampler(llama_seq_id seq_id);
-    void reset_samplers();

    std::vector<llama_adapter_lora_ptr> & lora();

--- a/llama/llama.cpp/common/sampling.cpp
+++ b/llama/llama.cpp/common/sampling.cpp
@@ -104,9 +104,10 @@ struct ring_buffer {
 struct common_sampler {
    common_params_sampling params;

-    struct llama_sampler * grmr;
    struct llama_sampler * chain;

+    bool grammar;
+
    ring_buffer<llama_token> prev;

    std::vector<llama_token_data> cur;
@@ -120,34 +121,17 @@ struct common_sampler {
    }

    void set_logits(struct llama_context * ctx, int idx) {
-        const float *       sampled_probs  = llama_get_sampled_probs_ith     (ctx, idx);
-        const float *       sampled_logits = llama_get_sampled_logits_ith    (ctx, idx);
-        const llama_token * sampled_ids    = llama_get_sampled_candidates_ith(ctx, idx);
+        const auto * logits = llama_get_logits_ith(ctx, idx);

        const llama_model * model = llama_get_model(ctx);
        const llama_vocab * vocab = llama_model_get_vocab(model);

        const int n_vocab = llama_vocab_n_tokens(vocab);

-        if (sampled_probs) {
-            const uint32_t sampled_probs_count = llama_get_sampled_probs_count_ith(ctx, idx);
-            cur.resize(sampled_probs_count);
-            for (uint32_t i = 0; i < sampled_probs_count; ++i) {
-                cur[i] = llama_token_data{sampled_ids[i], sampled_logits[i], sampled_probs[i]};
-            }
-        } else if (sampled_logits) {
-            const uint32_t sampled_logits_count = llama_get_sampled_logits_count_ith(ctx, idx);
-            cur.resize(sampled_logits_count);
-            for (uint32_t i = 0; i < sampled_logits_count; i++) {
-                cur[i] = llama_token_data{sampled_ids[i], sampled_logits[i], 0.0f};
-            }
-        } else {
-            const auto * logits = llama_get_logits_ith(ctx, idx);
-            GGML_ASSERT(logits != nullptr);
-            cur.resize(n_vocab);
-            for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
-                cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
-            }
+        cur.resize(n_vocab);
+
+        for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
+            cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
        }

        cur_p = { cur.data(), cur.size(), -1, false };
@@ -167,59 +151,54 @@ std::string common_params_sampling::print() const {
            "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
            "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
            "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
-            "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, adaptive_target = %.3f, adaptive_decay = %.3f",
+            "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
            penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
            dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
            top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
-            mirostat, mirostat_eta, mirostat_tau, adaptive_target, adaptive_decay);
+            mirostat, mirostat_eta, mirostat_tau);

    return std::string(result);
 }

-struct common_sampler * common_sampler_init(const struct llama_model * model, struct common_params_sampling & params) {
+struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params) {
    const llama_vocab * vocab = llama_model_get_vocab(model);

    llama_sampler_chain_params lparams = llama_sampler_chain_default_params();

    lparams.no_perf = params.no_perf;

-    llama_sampler * grmr = nullptr;
    llama_sampler * chain = llama_sampler_chain_init(lparams);

+    bool grammar = false;
    std::vector<llama_sampler *> samplers;

    if (params.grammar.compare(0, 11, "%llguidance") == 0) {
 #ifdef LLAMA_USE_LLGUIDANCE
-        grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
+        samplers.push_back(llama_sampler_init_llg(vocab, "lark", params.grammar.c_str()));
+        grammar = true;
 #else
        GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
 #endif // LLAMA_USE_LLGUIDANCE
    } else {
        std::vector<std::string> trigger_patterns;
+        std::vector<std::string> patterns_anywhere;
        std::vector<llama_token> trigger_tokens;
        for (const auto & trigger : params.grammar_triggers) {
            switch (trigger.type) {
                case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
                {
                    const auto & word = trigger.value;
-                    trigger_patterns.push_back(regex_escape(word));
+                    patterns_anywhere.push_back(regex_escape(word));
                    break;
                }
                case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
                {
-                    trigger_patterns.push_back(trigger.value);
+                    patterns_anywhere.push_back(trigger.value);
                    break;
                }
                case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
                {
-                    const auto & pattern = trigger.value;
-                    std::string anchored = "^$";
-                    if (!pattern.empty()) {
-                        anchored = (pattern.front() != '^' ? "^" : "")
-                            + pattern
-                            + (pattern.back() != '$' ? "$" : "");
-                    }
-                    trigger_patterns.push_back(anchored);
+                    trigger_patterns.push_back(trigger.value);
                    break;
                }
                case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN:
@@ -233,6 +212,10 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
            }
        }

+        if (!patterns_anywhere.empty()) {
+            trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*");
+        }
+
        std::vector<const char *> trigger_patterns_c;
        trigger_patterns_c.reserve(trigger_patterns.size());
        for (const auto & regex : trigger_patterns) {
@@ -241,12 +224,15 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st

        if (!params.grammar.empty()) {
             if (params.grammar_lazy) {
-                 grmr = llama_sampler_init_grammar_lazy_patterns(vocab, params.grammar.c_str(), "root",
-                         trigger_patterns_c.data(), trigger_patterns_c.size(),
-                         trigger_tokens.data(), trigger_tokens.size());
+                 samplers.push_back(
+                         llama_sampler_init_grammar_lazy_patterns(vocab, params.grammar.c_str(), "root",
+                             trigger_patterns_c.data(), trigger_patterns_c.size(),
+                             trigger_tokens.data(),     trigger_tokens.size()));
             } else {
-                 grmr = llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
+                 samplers.push_back(llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"));
             }
+
+             grammar = true;
        }
    }

@@ -255,9 +241,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
    }

    if (params.mirostat == 0) {
-
-        bool use_adaptive_p = false; // see below
-
        for (const auto & cnstr : params.samplers) {
            switch (cnstr) {
                case COMMON_SAMPLER_TYPE_DRY:
@@ -267,54 +250,43 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
                        for (const auto & str : params.dry_sequence_breakers) {
                            c_breakers.push_back(str.c_str());
                        }
-                        samplers.push_back(llama_sampler_init_dry(vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
+
+                        samplers.push_back(llama_sampler_init_dry    (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                    }
                    break;
                case COMMON_SAMPLER_TYPE_TOP_K:
-                    samplers.push_back(llama_sampler_init_top_k(params.top_k));
+                    samplers.push_back(llama_sampler_init_top_k      (params.top_k));
                    break;
                case COMMON_SAMPLER_TYPE_TOP_P:
-                    samplers.push_back(llama_sampler_init_top_p(params.top_p, params.min_keep));
+                    samplers.push_back(llama_sampler_init_top_p      (params.top_p, params.min_keep));
                    break;
                case COMMON_SAMPLER_TYPE_TOP_N_SIGMA:
                    samplers.push_back(llama_sampler_init_top_n_sigma(params.top_n_sigma));
                    break;
                case COMMON_SAMPLER_TYPE_MIN_P:
-                    samplers.push_back(llama_sampler_init_min_p(params.min_p, params.min_keep));
+                    samplers.push_back(llama_sampler_init_min_p      (params.min_p, params.min_keep));
                    break;
                case COMMON_SAMPLER_TYPE_XTC:
-                    samplers.push_back(llama_sampler_init_xtc(params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
+                    samplers.push_back(llama_sampler_init_xtc        (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
                    break;
                case COMMON_SAMPLER_TYPE_TYPICAL_P:
-                    samplers.push_back(llama_sampler_init_typical(params.typ_p, params.min_keep));
+                    samplers.push_back(llama_sampler_init_typical    (params.typ_p, params.min_keep));
                    break;
                case COMMON_SAMPLER_TYPE_TEMPERATURE:
-                    samplers.push_back(llama_sampler_init_temp_ext(params.temp, params.dynatemp_range, params.dynatemp_exponent));
+                    samplers.push_back(llama_sampler_init_temp_ext   (params.temp, params.dynatemp_range, params.dynatemp_exponent));
                    break;
                case COMMON_SAMPLER_TYPE_INFILL:
-                    samplers.push_back(llama_sampler_init_infill(vocab));
+                    samplers.push_back(llama_sampler_init_infill     (vocab));
                    break;
                case COMMON_SAMPLER_TYPE_PENALTIES:
-                    samplers.push_back(llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
-                    break;
-                case COMMON_SAMPLER_TYPE_ADAPTIVE_P:
-                    // the `adaptive-p` sampler is like `dist` and `mirostat` in that it selects
-                    // a single token, so we will add `dist` at the end of the chain by default,
-                    // unless the user specifically included `adaptive-p`. we set this flag here
-                    // so we know to add the sampler at the very end.
-                    use_adaptive_p = true;
+                    samplers.push_back(llama_sampler_init_penalties  (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                    break;
                default:
                    GGML_ASSERT(false && "unknown sampler type");
            }
        }
-        if (use_adaptive_p) {
-            // only if user explicitly included adaptive-p sampler
-            samplers.push_back(llama_sampler_init_adaptive_p(params.adaptive_target, params.adaptive_decay, params.seed));
-        } else {
-            // default: sample from distribution
-            samplers.push_back(llama_sampler_init_dist(params.seed));
-        }
+
+        samplers.push_back(llama_sampler_init_dist(params.seed));
    } else if (params.mirostat == 1) {
        samplers.push_back(llama_sampler_init_temp(params.temp));
        samplers.push_back(llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
@@ -329,16 +301,10 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
        llama_sampler_chain_add(chain, smpl);
    }

-    if (grmr && params.backend_sampling) {
-        LOG_WRN("%s: backend sampling is not compatible with grammar, disabling\n", __func__);
-
-        params.backend_sampling = false;
-    }
-
    auto * result = new common_sampler {
        /* .params  = */ params,
-        /* .grmr    = */ grmr,
        /* .chain   = */ chain,
+        /* .grammar = */ grammar,
        /* .prev    = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
        /* .cur     = */ {},
        /* .cur_p   = */ {},
@@ -348,45 +314,47 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
 }

 void common_sampler_free(struct common_sampler * gsmpl) {
-    if (!gsmpl) {
-        return;
+    if (gsmpl) {
+        llama_sampler_free(gsmpl->chain);
+
+        delete gsmpl;
    }
-
-    llama_sampler_free(gsmpl->grmr);
-    llama_sampler_free(gsmpl->chain);
-
-    delete gsmpl;
 }

 void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
-    if (!gsmpl) {
-        return;
-    }
-
    const auto tm = gsmpl->tm();

-    if (gsmpl->grmr && accept_grammar) {
-        llama_sampler_accept(gsmpl->grmr, token);
-    }
+    if (gsmpl->grammar) {
+        const int n_smpl = llama_sampler_chain_n(gsmpl->chain);

-    llama_sampler_accept(gsmpl->chain, token);
+        for (int i = 0; i < n_smpl; i++) {
+            auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);
+
+            // the grammar sampler is always the first one
+            if (i == 0) {
+                if (accept_grammar) {
+                    llama_sampler_accept(smpl, token);
+                }
+            } else {
+                llama_sampler_accept(smpl, token);
+            }
+        }
+    } else {
+        llama_sampler_accept(gsmpl->chain, token);
+    }

    gsmpl->prev.push_back(token);
 }

 void common_sampler_reset(struct common_sampler * gsmpl) {
-    if (!gsmpl) {
-        return;
-    }
-
    gsmpl->reset();
 }

 struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
    return new common_sampler {
        /* .params  = */ gsmpl->params,
-        /* .grmr    = */ llama_sampler_clone(gsmpl->grmr),
        /* .chain   = */ llama_sampler_clone(gsmpl->chain),
+        /* .grammar = */ gsmpl->grammar,
        /* .prev    = */ gsmpl->prev,
        /* .cur     = */ gsmpl->cur,
        /* .cur_p   = */ gsmpl->cur_p,
@@ -439,14 +407,10 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
 }

 struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl) {
-    if (!gsmpl) {
-        return nullptr;
-    }
-
    return gsmpl->chain;
 }

-llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
+llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx) {
    llama_synchronize(ctx);

    // start measuring sampling time after the llama_context synchronization in order to not measure any ongoing async operations
@@ -454,61 +418,11 @@ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_co

    llama_token id = LLAMA_TOKEN_NULL;

-    auto & grmr  = gsmpl->grmr;
    auto & chain = gsmpl->chain;
    auto & cur_p = gsmpl->cur_p; // initialized by set_logits

-    // Check if a backend sampler has already sampled a token in which case we
-    // return that token id directly.
-    {
-        id = llama_get_sampled_token_ith(ctx, idx);
-
-        if (id != LLAMA_TOKEN_NULL) {
-            LOG_DBG("%s: Backend sampler selected token: '%d'. Will not run any CPU samplers\n", __func__, id);
-
-            GGML_ASSERT(!gsmpl->grmr && "using grammar in combination with backend sampling is not supported");
-
-            // TODO: simplify
-            gsmpl->cur.resize(1);
-            gsmpl->cur[0] = { id, 0.0f, 1.0f };
-            cur_p = { gsmpl->cur.data(), gsmpl->cur.size(), 0, true };
-
-            return id;
-        }
-    }
-
    gsmpl->set_logits(ctx, idx);

-    if (grammar_first) {
-        llama_sampler_apply(grmr, &cur_p);
-    }
-
-    llama_sampler_apply(chain, &cur_p);
-
-    id = cur_p.data[cur_p.selected].id;
-
-    if (grammar_first) {
-        return id;
-    }
-
-    // check if it the sampled token fits the grammar (grammar-based rejection sampling)
-    {
-        llama_token_data       single_token_data       = { id, 1.0f, 0.0f };
-        llama_token_data_array single_token_data_array = { &single_token_data, 1, -1, false };
-
-        llama_sampler_apply(grmr, &single_token_data_array);
-
-        const bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
-        if (is_valid) {
-            return id;
-        }
-    }
-
-    // resampling:
-    // if the token is not valid, sample again, but first apply the grammar sampler and then the sampling chain
-    gsmpl->set_logits(ctx, idx);
-
-    llama_sampler_apply(grmr,  &cur_p);
    llama_sampler_apply(chain, &cur_p);

    GGML_ASSERT(cur_p.selected != -1 && "no selected token during sampling - check your sampling configuration");
@@ -518,7 +432,7 @@ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_co
    return id;
 }

-std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first) {
+std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft) {
    GGML_ASSERT(idxs.size() == draft.size() + 1 && "idxs.size() must be draft.size() + 1");

    std::vector<llama_token> result;
@@ -526,7 +440,7 @@ std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sample

    size_t i = 0;
    for (; i < draft.size(); i++) {
-        const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
+        const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i]);

        common_sampler_accept(gsmpl, id, true);

@@ -538,7 +452,7 @@ std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sample
    }

    if (i == draft.size()) {
-        const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
+        const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i]);

        common_sampler_accept(gsmpl, id, true);

@@ -548,13 +462,13 @@ std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sample
    return result;
 }

-std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first) {
+std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft) {
    std::vector<int> idxs(draft.size() + 1);
    for (size_t i = 0; i < idxs.size(); ++i) {
        idxs[i] = i;
    }

-    return common_sampler_sample_and_accept_n(gsmpl, ctx, idxs, draft, grammar_first);
+    return common_sampler_sample_and_accept_n(gsmpl, ctx, idxs, draft);
 }

 uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
@@ -639,7 +553,6 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
        case COMMON_SAMPLER_TYPE_XTC:         return 'x';
        case COMMON_SAMPLER_TYPE_INFILL:      return 'i';
        case COMMON_SAMPLER_TYPE_PENALTIES:   return 'e';
-        case COMMON_SAMPLER_TYPE_ADAPTIVE_P:  return 'a';
        default : return '?';
    }
 }
@@ -656,7 +569,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
        case COMMON_SAMPLER_TYPE_XTC:         return "xtc";
        case COMMON_SAMPLER_TYPE_INFILL:      return "infill";
        case COMMON_SAMPLER_TYPE_PENALTIES:   return "penalties";
-        case COMMON_SAMPLER_TYPE_ADAPTIVE_P:  return "adaptive_p";
        default : return "";
    }
 }
@@ -673,7 +585,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
        { "xtc",         COMMON_SAMPLER_TYPE_XTC },
        { "infill",      COMMON_SAMPLER_TYPE_INFILL },
        { "penalties",   COMMON_SAMPLER_TYPE_PENALTIES },
-        { "adaptive_p",  COMMON_SAMPLER_TYPE_ADAPTIVE_P },
    };

    // since samplers names are written multiple ways
@@ -689,7 +600,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
        { "typ",         COMMON_SAMPLER_TYPE_TYPICAL_P },
        { "min-p",       COMMON_SAMPLER_TYPE_MIN_P },
        { "temp",        COMMON_SAMPLER_TYPE_TEMPERATURE },
-        { "adaptive-p",  COMMON_SAMPLER_TYPE_ADAPTIVE_P },
    };

    std::vector<common_sampler_type> samplers;
@@ -726,7 +636,6 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
        { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC),         COMMON_SAMPLER_TYPE_XTC },
        { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL),      COMMON_SAMPLER_TYPE_INFILL },
        { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES),   COMMON_SAMPLER_TYPE_PENALTIES },
-        { common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_ADAPTIVE_P),  COMMON_SAMPLER_TYPE_ADAPTIVE_P },
    };

    std::vector<common_sampler_type> samplers;
--- a/llama/llama.cpp/common/sampling.h
+++ b/llama/llama.cpp/common/sampling.h
@@ -36,8 +36,7 @@ struct common_sampler;

 // llama_sampler API overloads

-// note: can mutate params in some cases
-struct common_sampler * common_sampler_init(const struct llama_model * model, struct common_params_sampling & params);
+struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params);

 void common_sampler_free(struct common_sampler * gsmpl);

@@ -49,7 +48,6 @@ struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
 // arguments can be nullptr to skip printing
 void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);

-// get the underlying llama_sampler_chain
 struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl);

 // extended sampling implementation:
@@ -59,10 +57,7 @@ struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl);
 // - check if the token fits the grammar (if any)
 // - if not: resample by first applying the grammar constraints and then sampling again (slower path)
 //
-// if grammar_first is true, the grammar is applied before the samplers (slower)
-// useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
-//
-llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
+llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx);

 // generalized version of common_sampler_sample
 //
@@ -80,10 +75,10 @@ llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_co
 //
 // returns at least 1 token, up to idxs.size()
 //
-std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first = false);
+std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft);

 // assume idxs == [ 0, 1, 2, ..., draft.size() ]
-std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first = false);
+std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft);

 uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);

--- a/llama/llama.cpp/include/llama-cpp.h
+++ b/llama/llama.cpp/include/llama-cpp.h
@@ -21,9 +21,7 @@ struct llama_sampler_deleter {
 };

 struct llama_adapter_lora_deleter {
-    void operator()(llama_adapter_lora *) {
-        // llama_adapter_lora_free is deprecated
-    }
+    void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
 };

 typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
--- a/llama/llama.cpp/include/llama.h
+++ b/llama/llama.cpp/include/llama.h
@@ -286,7 +286,7 @@ extern "C" {
        // NULL-terminated list of buffer types to use for tensors that match a pattern
        const struct llama_model_tensor_buft_override * tensor_buft_overrides;

-        int32_t n_gpu_layers; // number of layers to store in VRAM, a negative value means all layers
+        int32_t n_gpu_layers; // number of layers to store in VRAM
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs

        // the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
@@ -309,7 +309,6 @@ extern "C" {
        // Keep the booleans together to avoid misalignment during copy-by-value.
        bool vocab_only;      // only load the vocabulary, no weights
        bool use_mmap;        // use mmap if possible
-        bool use_direct_io;   // use direct io, takes precedence over use_mmap
        bool use_mlock;       // force system to keep model in RAM
        bool check_tensors;   // validate model tensor data
        bool use_extra_bufts; // use extra buffer types (used for weight repacking)
@@ -317,11 +316,6 @@ extern "C" {
        bool no_alloc;        // only load metadata and simulate memory allocations
    };

-    struct llama_sampler_seq_config {
-        llama_seq_id           seq_id;
-        struct llama_sampler * sampler;
-    };
-
    // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
    //       https://github.com/ggml-org/llama.cpp/pull/7544
    struct llama_context_params {
@@ -370,12 +364,6 @@ extern "C" {
        bool kv_unified;  // use a unified buffer across the input sequences when computing the attention
                          // try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
                          // ref: https://github.com/ggml-org/llama.cpp/pull/14363
-
-        // [EXPERIMENTAL]
-        // backend sampler chain configuration (make sure the caller keeps the sampler chains alive)
-        // note: the samplers must be sampler chains (i.e. use llama_sampler_chain_init)
-        struct llama_sampler_seq_config * samplers;
-        size_t                            n_samplers;
    };

    // model quantization parameters
@@ -479,24 +467,16 @@ extern "C" {
    // Frees all allocated memory
    LLAMA_API void llama_free(struct llama_context * ctx);

-    enum llama_params_fit_status {
-        LLAMA_PARAMS_FIT_STATUS_SUCCESS = 0, // found allocations that are projected to fit
-        LLAMA_PARAMS_FIT_STATUS_FAILURE = 1, // could not find allocations that are projected to fit
-        LLAMA_PARAMS_FIT_STATUS_ERROR   = 2, // a hard error occured, e.g. because no model could be found at the specified path
-    };
-
    // fits mparams and cparams to free device memory (assumes system memory is unlimited)
-    //   - returns true if the parameters could be successfully modified to fit device memory
-    //   - this function is NOT thread safe because it modifies the global llama logger state
-    //   - only parameters that have the same value as in llama_default_model_params are modified
-    //     with the exception of the context size which is modified if and only if equal to 0
-    LLAMA_API enum llama_params_fit_status llama_params_fit(
+    // returns true if the parameters could be successfully modified to fit device memory
+    // this function is NOT thread safe because it modifies the global llama logger state
+    LLAMA_API bool llama_params_fit(
                                   const char   * path_model,
                    struct llama_model_params   * mparams,
                    struct llama_context_params * cparams,
                                          float * tensor_split,          // writable buffer for tensor split, needs at least llama_max_devices elements
        struct llama_model_tensor_buft_override * tensor_buft_overrides, // writable buffer for overrides, needs at least llama_max_tensor_buft_overrides elements
-                                         size_t * margins,               // margins of memory to leave per device in bytes
+                                         size_t   margin,                // margin of memory to leave per device in bytes
                                       uint32_t   n_ctx_min,             // minimum context size to set when trying to reduce memory use
                            enum ggml_log_level   log_level);            // minimum log level to print during fitting, lower levels go to debug log

@@ -537,7 +517,6 @@ extern "C" {
    LLAMA_API int32_t llama_model_n_ctx_train(const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_embd     (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_embd_inp (const struct llama_model * model);
-    LLAMA_API int32_t llama_model_n_embd_out (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_layer    (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_head     (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_head_kv  (const struct llama_model * model);
@@ -621,8 +600,6 @@ extern "C" {
    //

    // Load a LoRA adapter from file
-    // The adapter is valid as long as the associated model is not freed
-    // All adapters must be loaded before context creation
    LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
            struct llama_model * model,
            const char * path_lora);
@@ -647,8 +624,7 @@ extern "C" {

    // Manually free a LoRA adapter
    // NOTE: loaded adapters will be free when the associated model is deleted
-    LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter),
-            "adapters are now freed together with the associated model");
+    LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);

    // Get the invocation tokens if the current lora is an alora
    LLAMA_API uint64_t            llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
@@ -1007,32 +983,6 @@ extern "C" {
    // otherwise: float[n_embd] (1-dimensional)
    LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);

-    //
-    // backend sampling API [EXPERIMENTAL]
-    // note: use only if the llama_context was created with at least one llama_sampler_seq_config
-    //
-
-    // Get the backend sampled token for the ith token.
-    // Returns LLAMA_TOKEN_NULL if no token was sampled.
-    LLAMA_API llama_token llama_get_sampled_token_ith(struct llama_context * ctx, int32_t i);
-
-    // Get the backend sampled probabilites for the ith token
-    // The index matches llama_get_sampled_token_ith().
-    // Returns NULL if no probabilites were generated.
-    LLAMA_API float *  llama_get_sampled_probs_ith      (struct llama_context * ctx, int32_t i);
-    LLAMA_API uint32_t llama_get_sampled_probs_count_ith(struct llama_context * ctx, int32_t i);
-
-    // Get the backend sampled logits for the ith token
-    // Returns NULL if no logits were sampled.
-    LLAMA_API float *  llama_get_sampled_logits_ith      (struct llama_context * ctx, int32_t i);
-    LLAMA_API uint32_t llama_get_sampled_logits_count_ith(struct llama_context * ctx, int32_t i);
-
-    // Get the backend sampled candidates (token ids) for the ith token
-    // These are needed to map probability/logit indices to vocab token ids.
-    // Returns NULL if no candidates were sampled.
-    LLAMA_API llama_token * llama_get_sampled_candidates_ith      (struct llama_context * ctx, int32_t i);
-    LLAMA_API uint32_t      llama_get_sampled_candidates_count_ith(struct llama_context * ctx, int32_t i);
-
    //
    // Vocab
    //
@@ -1204,16 +1154,11 @@ extern "C" {
    //
    //    llama_sampler_free(smpl);
    //
+    // TODO: In the future, llama_sampler will be utilized to offload the sampling to the backends (e.g. GPU).
+    //

    typedef void * llama_sampler_context_t;

-    struct llama_sampler_data {
-        struct ggml_tensor * logits;
-        struct ggml_tensor * probs;
-        struct ggml_tensor * sampled;
-        struct ggml_tensor * candidates;
-    };
-
    // user code can implement the interface below in order to create custom llama_sampler
    struct llama_sampler_i {
        const char *           (*name)  (const struct llama_sampler * smpl);                                 // can be NULL
@@ -1223,44 +1168,17 @@ extern "C" {
        struct llama_sampler * (*clone) (const struct llama_sampler * smpl);                                 // can be NULL if ctx is NULL
        void                   (*free)  (      struct llama_sampler * smpl);                                 // can be NULL if ctx is NULL

-        // [EXPERIMENTAL]
-        // backend sampling interface:
-
-        // return true if the backend supports all ops needed by the sampler
-        // note: call once per sampler
-        bool (*backend_init)(struct llama_sampler * smpl, ggml_backend_buffer_type_t buft);
-
-        // call after .backend_apply()
-        void (*backend_accept)(
-                struct llama_sampler * smpl,
-                struct ggml_context  * ctx,
-                struct ggml_cgraph   * gf,
-                struct ggml_tensor   * selected_token);
-
-        // call after .backend_init()
-        void (*backend_apply)(
-                struct llama_sampler      * smpl,
-                struct ggml_context       * ctx,
-                struct ggml_cgraph        * gf,
-                struct llama_sampler_data * data);
-
-        // called before graph execution to set inputs for the current ubatch
-        void (*backend_set_input)(struct llama_sampler * smpl);
+        // TODO: API for internal libllama usage for appending the sampling to an existing ggml_cgraph
+        //void (*apply_ggml) (struct llama_sampler * smpl, ...);
    };

    struct llama_sampler {
-        struct llama_sampler_i * iface;
-
-        llama_sampler_context_t ctx;
+        const struct llama_sampler_i * iface;
+        llama_sampler_context_t        ctx;
    };

-    // [EXPERIMENTAL]
-    // attach a sampler to the context
-    // note: prefer initializing the context with llama_context_params.samplers when possible
-    LLAMA_API bool llama_set_sampler(struct llama_context * ctx, llama_seq_id seq_id, struct llama_sampler * smpl);
-
    // mirror of llama_sampler_i:
-    LLAMA_API struct llama_sampler * llama_sampler_init  (      struct llama_sampler_i * iface, llama_sampler_context_t ctx);
+    LLAMA_API struct llama_sampler * llama_sampler_init  (const struct llama_sampler_i * iface, llama_sampler_context_t ctx);
    LLAMA_API const char *           llama_sampler_name  (const struct llama_sampler * smpl);
    LLAMA_API void                   llama_sampler_accept(      struct llama_sampler * smpl, llama_token token);
    LLAMA_API void                   llama_sampler_apply (      struct llama_sampler * smpl, llama_token_data_array * cur_p);
@@ -1276,15 +1194,7 @@ extern "C" {

    // important: takes ownership of the sampler object and will free it when llama_sampler_free is called
    LLAMA_API void                   llama_sampler_chain_add(      struct llama_sampler * chain, struct llama_sampler * smpl);
-
-    // return NULL if:
-    //   - the sampler is NULL
-    //   - the sampler is not a llama_sampler_chain
-    //   - the index is out of bounds, unless i == -1
-    //   - if i == -1, returns the chain itself (can be used to check if the sampler is a chain)
-    LLAMA_API struct llama_sampler * llama_sampler_chain_get(      struct llama_sampler * chain, int32_t i);
-
-    // the total number of samplers in the chain
+    LLAMA_API struct llama_sampler * llama_sampler_chain_get(const struct llama_sampler * chain, int32_t i);
    LLAMA_API int                    llama_sampler_chain_n  (const struct llama_sampler * chain);

    // after removing a sampler, the chain will no longer own it, and it will not be freed when the chain is freed
@@ -1293,9 +1203,7 @@ extern "C" {
    // available samplers:

    LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
-
-    /// seed == LLAMA_DEFAULT_SEED to use a random seed.
-    LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);
+    LLAMA_API struct llama_sampler * llama_sampler_init_dist  (uint32_t seed);

    /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
    /// Setting k <= 0 makes this a noop
@@ -1396,33 +1304,6 @@ extern "C" {
                          const char ** seq_breakers,
                              size_t    num_breakers);

-    /// adaptive-p: select tokens near a configurable target probability over time.
-    ///
-    /// the adaptive-p sampler transforms the token probability distribution to favor tokens
-    /// that fall near a user-configurable probability target.
-    ///
-    /// internally, the sampler maintains an exponential moving average of the *ORIGINAL*
-    /// probabilities of selected tokens at each sampling step. it uses this EMA to compute an
-    /// adapted target probability at each sampling step, thus maintaining the desired target
-    /// probability over time.
-    ///
-    /// adaptive-p selects a token ID rather than just mutating candidates, so it must be last
-    /// in the sampler chain (like mirostat, dist, greedy).
-    ///
-    /// only mild truncation before this sampler is recommended. we suggest applying min-p
-    /// before adaptive-p as the only other active sampler in the chain.
-    ///
-    /// @param target select tokens near this probability (valid range 0.0 to 1.0; negative = disabled)
-    /// @param decay  EMA decay for adaptation; history ≈ 1/(1-decay) tokens (valid range 0.0 - 0.99)
-    /// @param seed   RNG seed
-    ///
-    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
-    ///
-    LLAMA_API struct llama_sampler * llama_sampler_init_adaptive_p(
-                               float   target,
-                               float   decay,
-                            uint32_t   seed);
-
    LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                             int32_t   n_vocab,
                             int32_t   n_logit_bias,
@@ -1476,12 +1357,12 @@ extern "C" {
    /// @details Build a split GGUF final path for this chunk.
    ///          llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
    //  Returns the split_path length.
-    LLAMA_API int32_t llama_split_path(char * split_path, size_t maxlen, const char * path_prefix, int32_t split_no, int32_t split_count);
+    LLAMA_API int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix, int split_no, int split_count);

    /// @details Extract the path prefix from the split_path if and only if the split_no and split_count match.
    ///          llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
    //  Returns the split_prefix length.
-    LLAMA_API int32_t llama_split_prefix(char * split_prefix, size_t maxlen, const char * split_path, int32_t split_no, int32_t split_count);
+    LLAMA_API int llama_split_prefix(char * split_prefix, size_t maxlen, const char * split_path, int split_no, int split_count);

    // Print system information
    LLAMA_API const char * llama_print_system_info(void);
--- a/llama/llama.cpp/src/llama-adapter.cpp
+++ b/llama/llama.cpp/src/llama-adapter.cpp
@@ -411,9 +411,6 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
        }
    }

-    // register adapter with model
-    model.loras.insert(&adapter);
-
    LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }

@@ -471,8 +468,8 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
    return snprintf(buf, buf_size, "%s", it->second.c_str());
 }

-void llama_adapter_lora_free(llama_adapter_lora *) {
-    // deprecated: adapters are freed by llama_model's destructor
+void llama_adapter_lora_free(llama_adapter_lora * adapter) {
+    delete adapter;
 }

 uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
--- a/llama/llama.cpp/src/llama-adapter.h
+++ b/llama/llama.cpp/src/llama-adapter.h
@@ -77,10 +77,6 @@ struct llama_adapter_lora {
    ~llama_adapter_lora() = default;

    llama_adapter_lora_weight * get_weight(ggml_tensor * w);
-
-    uint32_t get_n_nodes() const {
-        return ab_map.size() * 6u; // a, b, scale, add, 2 x mul_mat
-    }
 };

 using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
--- a/llama/llama.cpp/src/llama-arch.cpp
+++ b/llama/llama.cpp/src/llama-arch.cpp
@@ -20,7 +20,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
    { LLM_ARCH_STARCODER,        "starcoder"        },
    { LLM_ARCH_REFACT,           "refact"           },
    { LLM_ARCH_BERT,             "bert"             },
-    { LLM_ARCH_MODERN_BERT,      "modern-bert"      },
    { LLM_ARCH_NOMIC_BERT,       "nomic-bert"       },
    { LLM_ARCH_NOMIC_BERT_MOE,   "nomic-bert-moe"   },
    { LLM_ARCH_NEO_BERT,         "neo-bert"         },
@@ -42,7 +41,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
    { LLM_ARCH_PHIMOE,           "phimoe"           },
    { LLM_ARCH_PLAMO,            "plamo"            },
    { LLM_ARCH_PLAMO2,           "plamo2"           },
-    { LLM_ARCH_PLAMO3,           "plamo3"           },
    { LLM_ARCH_CODESHELL,        "codeshell"        },
    { LLM_ARCH_ORION,            "orion"            },
    { LLM_ARCH_INTERNLM2,        "internlm2"        },
@@ -81,7 +79,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
    { LLM_ARCH_NEMOTRON_H_MOE,   "nemotron_h_moe"   },
    { LLM_ARCH_EXAONE,           "exaone"           },
    { LLM_ARCH_EXAONE4,          "exaone4"          },
-    { LLM_ARCH_EXAONE_MOE,       "exaone-moe"       },
    { LLM_ARCH_RWKV6,            "rwkv6"            },
    { LLM_ARCH_RWKV6QWEN2,       "rwkv6qwen2"       },
    { LLM_ARCH_RWKV7,            "rwkv7"            },
@@ -118,9 +115,6 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
    { LLM_ARCH_RND1,             "rnd1"             },
    { LLM_ARCH_PANGU_EMBED,      "pangu-embedded"   },
    { LLM_ARCH_MISTRAL3,         "mistral3"         },
-    { LLM_ARCH_MIMO2,            "mimo2"           },
-    { LLM_ARCH_LLAMA_EMBED,      "llama-embed"      },
-    { LLM_ARCH_MAINCODER,        "maincoder"        },
    { LLM_ARCH_UNKNOWN,          "(unknown)"        },
 };

@@ -154,7 +148,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
    { LLM_KV_VOCAB_SIZE,                        "%s.vocab_size"                        },
    { LLM_KV_CONTEXT_LENGTH,                    "%s.context_length"                    },
    { LLM_KV_EMBEDDING_LENGTH,                  "%s.embedding_length"                  },
-    { LLM_KV_EMBEDDING_LENGTH_OUT,              "%s.embedding_length_out"              },
    { LLM_KV_FEATURES_LENGTH,                   "%s.features_length"                   },
    { LLM_KV_BLOCK_COUNT,                       "%s.block_count"                       },
    { LLM_KV_LEADING_DENSE_BLOCK_COUNT,         "%s.leading_dense_block_count"         },
@@ -212,7 +205,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
    { LLM_KV_ATTENTION_GATE_LORA_RANK,               "%s.attention.gate_lora_rank"               },
    { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,       "%s.attention.relative_buckets_count"       },
    { LLM_KV_ATTENTION_SLIDING_WINDOW,               "%s.attention.sliding_window"               },
-    { LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,       "%s.attention.sliding_window_pattern"       },
    { LLM_KV_ATTENTION_SCALE,                        "%s.attention.scale"                        },
    { LLM_KV_ATTENTION_OUTPUT_SCALE,                 "%s.attention.output_scale"                 },
    { LLM_KV_ATTENTION_TEMPERATURE_LENGTH,           "%s.attention.temperature_length"           },
@@ -224,7 +216,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
    { LLM_KV_ROPE_DIMENSION_COUNT,          "%s.rope.dimension_count"                 },
    { LLM_KV_ROPE_DIMENSION_SECTIONS,       "%s.rope.dimension_sections"              },
    { LLM_KV_ROPE_FREQ_BASE,                "%s.rope.freq_base"                       },
-    { LLM_KV_ROPE_FREQ_BASE_SWA,            "%s.rope.freq_base_swa"                   },
    { LLM_KV_ROPE_SCALE_LINEAR,             "%s.rope.scale_linear"                    },
    { LLM_KV_ROPE_SCALING_TYPE,             "%s.rope.scaling.type"                    },
    { LLM_KV_ROPE_SCALING_FACTOR,           "%s.rope.scaling.factor"                  },
@@ -509,7 +500,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
        case LLM_ARCH_LLAMA:
        case LLM_ARCH_DECI:
        case LLM_ARCH_MISTRAL3:
-        case LLM_ARCH_LLAMA_EMBED:
            return {
                LLM_TENSOR_TOKEN_EMBD,
                LLM_TENSOR_OUTPUT_NORM,
@@ -791,20 +781,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_CLS,
                LLM_TENSOR_CLS_OUT,
            };
-        case LLM_ARCH_MODERN_BERT:
-            return {
-                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_TOKEN_EMBD_NORM,
-                LLM_TENSOR_OUTPUT_NORM,
-                LLM_TENSOR_ATTN_NORM,
-                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_ATTN_QKV,
-                LLM_TENSOR_FFN_DOWN,
-                LLM_TENSOR_FFN_UP,
-                LLM_TENSOR_FFN_NORM,
-                LLM_TENSOR_CLS,
-                LLM_TENSOR_CLS_OUT,
-            };
        case LLM_ARCH_JINA_BERT_V2:
            return {
                LLM_TENSOR_TOKEN_EMBD,
@@ -954,8 +930,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_ATTN_K_NORM,
                LLM_TENSOR_ATTN_V,
                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_ATTN_QKV,
-                LLM_TENSOR_ATTN_GATE,
                LLM_TENSOR_FFN_NORM,
                LLM_TENSOR_FFN_GATE_INP,
                LLM_TENSOR_FFN_GATE_EXPS,
@@ -1086,22 +1060,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_ATTN_POST_NORM,
                LLM_TENSOR_FFN_POST_NORM,
            };
-        case LLM_ARCH_PLAMO3:
-            return {
-                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_OUTPUT_NORM,
-                LLM_TENSOR_OUTPUT,
-                LLM_TENSOR_ATTN_NORM,
-                LLM_TENSOR_ATTN_QKV,
-                LLM_TENSOR_ATTN_Q_NORM,
-                LLM_TENSOR_ATTN_K_NORM,
-                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_ATTN_POST_NORM,
-                LLM_TENSOR_FFN_NORM,
-                LLM_TENSOR_FFN_POST_NORM,
-                LLM_TENSOR_FFN_DOWN,
-                LLM_TENSOR_FFN_UP,
-            };
        case LLM_ARCH_CODESHELL:
            return {
                LLM_TENSOR_TOKEN_EMBD,
@@ -1732,38 +1690,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_FFN_UP,
                LLM_TENSOR_FFN_POST_NORM,
            };
-        case LLM_ARCH_EXAONE_MOE:
-            return {
-                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_OUTPUT_NORM,
-                LLM_TENSOR_OUTPUT,
-                LLM_TENSOR_ROPE_FREQS,
-                LLM_TENSOR_ATTN_NORM,
-                LLM_TENSOR_ATTN_Q,
-                LLM_TENSOR_ATTN_Q_NORM,
-                LLM_TENSOR_ATTN_K,
-                LLM_TENSOR_ATTN_K_NORM,
-                LLM_TENSOR_ATTN_V,
-                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_FFN_NORM,
-                LLM_TENSOR_FFN_GATE,
-                LLM_TENSOR_FFN_DOWN,
-                LLM_TENSOR_FFN_UP,
-                LLM_TENSOR_FFN_GATE_INP,
-                LLM_TENSOR_FFN_GATE_EXPS,
-                LLM_TENSOR_FFN_DOWN_EXPS,
-                LLM_TENSOR_FFN_UP_EXPS,
-                LLM_TENSOR_FFN_GATE_SHEXP,
-                LLM_TENSOR_FFN_UP_SHEXP,
-                LLM_TENSOR_FFN_DOWN_SHEXP,
-                LLM_TENSOR_FFN_EXP_PROBS_B,
-                LLM_TENSOR_NEXTN_EH_PROJ,
-                LLM_TENSOR_NEXTN_EMBED_TOKENS,
-                LLM_TENSOR_NEXTN_ENORM,
-                LLM_TENSOR_NEXTN_HNORM,
-                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
-                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
-            };
        case LLM_ARCH_RWKV6:
            return {
                LLM_TENSOR_TOKEN_EMBD,
@@ -2114,7 +2040,6 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_TOKEN_EMBD,
                LLM_TENSOR_OUTPUT_NORM_LFM2,
                LLM_TENSOR_OUTPUT,
-                LLM_TENSOR_DENSE_2_OUT,
            };
        case LLM_ARCH_LFM2MOE:
            return {
@@ -2133,7 +2058,7 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_SHORTCONV_INPROJ,
                LLM_TENSOR_SHORTCONV_OUTPROJ,
                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_OUTPUT_NORM_LFM2,
+                LLM_TENSOR_OUTPUT_NORM,
                LLM_TENSOR_FFN_GATE_INP,
                LLM_TENSOR_FFN_GATE_EXPS,
                LLM_TENSOR_FFN_DOWN_EXPS,
@@ -2249,49 +2174,11 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                LLM_TENSOR_VISEXP_FFN_DOWN,
                LLM_TENSOR_VISEXP_FFN_UP,
            };
-        case LLM_ARCH_MIMO2:
-            return {
-                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_OUTPUT_NORM,
-                LLM_TENSOR_OUTPUT,
-                LLM_TENSOR_ATTN_NORM,
-                LLM_TENSOR_ATTN_Q,
-                LLM_TENSOR_ATTN_K,
-                LLM_TENSOR_ATTN_V,
-                LLM_TENSOR_ATTN_SINKS,
-                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_FFN_NORM,
-                LLM_TENSOR_FFN_GATE,
-                LLM_TENSOR_FFN_DOWN,
-                LLM_TENSOR_FFN_UP,
-                LLM_TENSOR_FFN_GATE_INP,
-                LLM_TENSOR_FFN_GATE_EXPS,
-                LLM_TENSOR_FFN_DOWN_EXPS,
-                LLM_TENSOR_FFN_UP_EXPS,
-                LLM_TENSOR_FFN_EXP_PROBS_B,
-            };
        case LLM_ARCH_GPTJ:
        case LLM_ARCH_UNKNOWN:
            return {
                LLM_TENSOR_TOKEN_EMBD,
            };
-        case LLM_ARCH_MAINCODER:
-            return {
-                LLM_TENSOR_TOKEN_EMBD,
-                LLM_TENSOR_OUTPUT_NORM,
-                LLM_TENSOR_OUTPUT,
-                LLM_TENSOR_ATTN_NORM,
-                LLM_TENSOR_ATTN_Q,
-                LLM_TENSOR_ATTN_Q_NORM,
-                LLM_TENSOR_ATTN_K,
-                LLM_TENSOR_ATTN_K_NORM,
-                LLM_TENSOR_ATTN_V,
-                LLM_TENSOR_ATTN_OUT,
-                LLM_TENSOR_FFN_NORM,
-                LLM_TENSOR_FFN_GATE,
-                LLM_TENSOR_FFN_DOWN,
-                LLM_TENSOR_FFN_UP,
-            };
        case LLM_ARCH_SOLAR:
            return {
                LLM_TENSOR_TOKEN_EMBD,
--- a/llama/llama.cpp/src/llama-arch.h
+++ b/llama/llama.cpp/src/llama-arch.h
@@ -24,7 +24,6 @@ enum llm_arch {
    LLM_ARCH_STARCODER,
    LLM_ARCH_REFACT,
    LLM_ARCH_BERT,
-    LLM_ARCH_MODERN_BERT,
    LLM_ARCH_NOMIC_BERT,
    LLM_ARCH_NOMIC_BERT_MOE,
    LLM_ARCH_NEO_BERT,
@@ -46,7 +45,6 @@ enum llm_arch {
    LLM_ARCH_PHIMOE,
    LLM_ARCH_PLAMO,
    LLM_ARCH_PLAMO2,
-    LLM_ARCH_PLAMO3,
    LLM_ARCH_CODESHELL,
    LLM_ARCH_ORION,
    LLM_ARCH_INTERNLM2,
@@ -85,7 +83,6 @@ enum llm_arch {
    LLM_ARCH_NEMOTRON_H_MOE,
    LLM_ARCH_EXAONE,
    LLM_ARCH_EXAONE4,
-    LLM_ARCH_EXAONE_MOE,
    LLM_ARCH_RWKV6,
    LLM_ARCH_RWKV6QWEN2,
    LLM_ARCH_RWKV7,
@@ -122,9 +119,6 @@ enum llm_arch {
    LLM_ARCH_RND1,
    LLM_ARCH_PANGU_EMBED,
    LLM_ARCH_MISTRAL3,
-    LLM_ARCH_MIMO2,
-    LLM_ARCH_LLAMA_EMBED,
-    LLM_ARCH_MAINCODER,
    LLM_ARCH_UNKNOWN,
 };

@@ -158,7 +152,6 @@ enum llm_kv {
    LLM_KV_VOCAB_SIZE,
    LLM_KV_CONTEXT_LENGTH,
    LLM_KV_EMBEDDING_LENGTH,
-    LLM_KV_EMBEDDING_LENGTH_OUT,
    LLM_KV_FEATURES_LENGTH,
    LLM_KV_BLOCK_COUNT,
    LLM_KV_LEADING_DENSE_BLOCK_COUNT,
@@ -216,7 +209,6 @@ enum llm_kv {
    LLM_KV_ATTENTION_GATE_LORA_RANK,
    LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
    LLM_KV_ATTENTION_SLIDING_WINDOW,
-    LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,
    LLM_KV_ATTENTION_SCALE,
    LLM_KV_ATTENTION_OUTPUT_SCALE,
    LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
@@ -228,7 +220,6 @@ enum llm_kv {
    LLM_KV_ROPE_DIMENSION_COUNT,
    LLM_KV_ROPE_DIMENSION_SECTIONS,
    LLM_KV_ROPE_FREQ_BASE,
-    LLM_KV_ROPE_FREQ_BASE_SWA,
    LLM_KV_ROPE_SCALE_LINEAR,
    LLM_KV_ROPE_SCALING_TYPE,
    LLM_KV_ROPE_SCALING_FACTOR,
--- a/llama/llama.cpp/src/llama-chat.cpp
+++ b/llama/llama.cpp/src/llama-chat.cpp
@@ -57,7 +57,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
    { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
    { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
    { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
-    { "exaone-moe",        LLM_CHAT_TEMPLATE_EXAONE_MOE        },
    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
@@ -75,7 +74,6 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
    { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
    { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
    { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
-    { "solar-open",        LLM_CHAT_TEMPLATE_SOLAR_OPEN        },
 };

 llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -138,9 +136,6 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
    } else if (tmpl_contains("[gMASK]<sop>")) {
        return LLM_CHAT_TEMPLATE_CHATGLM_4;
    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
-        if (tmpl_contains("<|tool_declare|>")) {
-            return LLM_CHAT_TEMPLATE_EXAONE_MOE;
-        }
        return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
    } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
        return LLM_CHAT_TEMPLATE_GLMEDGE;
@@ -221,8 +216,6 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
        return LLM_CHAT_TEMPLATE_GROK_2;
    } else if (tmpl_contains(LU8("[unused9]系统：[unused10]"))) {
        return LLM_CHAT_TEMPLATE_PANGU_EMBED;
-    } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
-        return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
    }
    return LLM_CHAT_TEMPLATE_UNKNOWN;
 }
@@ -580,22 +573,6 @@ int32_t llm_chat_apply_template(
        if (add_ass) {
            ss << "[|assistant|]";
        }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) {
-        for (auto message : chat) {
-            std::string role(message->role);
-            if (role == "system") {
-                ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n";
-            } else if (role == "user") {
-                ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n";
-            } else if (role == "assistant") {
-                ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n";
-            } else if (role == "tool") {
-                ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n";
-            }
-        }
-        if (add_ass) {
-            ss << "<|assistant|>\n";
-        }
    } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
        // this template requires the model to have "\n\n" as EOT token
        for (size_t i = 0; i < chat.size(); i++) {
@@ -868,14 +845,6 @@ int32_t llm_chat_apply_template(
        if (add_ass) {
            ss << "[unused9]助手：";
        }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
-        for (auto message : chat) {
-            std::string role(message->role);
-            ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
-        }
-        if (add_ass) {
-            ss << "<|begin|>assistant";
-        }
    } else {
        // template not supported
        return -1;
--- a/llama/llama.cpp/src/llama-chat.h
+++ b/llama/llama.cpp/src/llama-chat.h
@@ -36,7 +36,6 @@ enum llm_chat_template {
    LLM_CHAT_TEMPLATE_MINICPM,
    LLM_CHAT_TEMPLATE_EXAONE_3,
    LLM_CHAT_TEMPLATE_EXAONE_4,
-    LLM_CHAT_TEMPLATE_EXAONE_MOE,
    LLM_CHAT_TEMPLATE_RWKV_WORLD,
    LLM_CHAT_TEMPLATE_GRANITE,
    LLM_CHAT_TEMPLATE_GIGACHAT,
@@ -55,7 +54,6 @@ enum llm_chat_template {
    LLM_CHAT_TEMPLATE_SEED_OSS,
    LLM_CHAT_TEMPLATE_GROK_2,
    LLM_CHAT_TEMPLATE_PANGU_EMBED,
-    LLM_CHAT_TEMPLATE_SOLAR_OPEN,
    LLM_CHAT_TEMPLATE_UNKNOWN,
 };

--- a/llama/llama.cpp/src/llama-context.cpp
+++ b/llama/llama.cpp/src/llama-context.cpp
--- a/llama/llama.cpp/src/llama-context.h
+++ b/llama/llama.cpp/src/llama-context.h
@@ -40,14 +40,6 @@ struct llama_context {

    ~llama_context();

-    // reserve a new backend scheduler (if needed)
-    // for example, when:
-    //   - changing loras
-    //   - changing samplers
-    //   - changing attention type
-    //   - etc.
-    void sched_reserve();
-
    void synchronize();

    const llama_model   & get_model()   const;
@@ -78,18 +70,6 @@ struct llama_context {
    float * get_embeddings_ith(int32_t i);
    float * get_embeddings_seq(llama_seq_id seq_id);

-    llama_token * get_sampled_tokens() const;
-    llama_token   get_sampled_token_ith(int32_t idx);
-
-    float * get_sampled_logits_ith(int32_t idx);
-    size_t  get_sampled_logits_count(int32_t idx);
-
-    float * get_sampled_probs_ith(int32_t idx);
-    size_t  get_sampled_probs_count(int32_t idx);
-
-    const llama_token * get_sampled_candidates_ith(int32_t idx);
-    size_t get_sampled_candidates_count(int32_t idx);
-
    void attach_threadpool(
            ggml_threadpool_t threadpool,
            ggml_threadpool_t threadpool_batch);
@@ -212,13 +192,10 @@ private:

    // Make sure enough space is available for outputs.
    // Returns max number of outputs for which space was reserved.
-    uint32_t output_reserve(int32_t n_outputs, const llama_batch & batch);
+    uint32_t output_reserve(int32_t n_outputs);

    void output_reorder();

-    // map the output row index `i` to batch index
-    int64_t output_resolve_row(int32_t i) const;
-
    //
    // graph
    //
@@ -236,8 +213,6 @@ public:
    ggml_cgraph * graph_reserve(
        uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx, bool split_only = false, size_t * sizes = nullptr);

-    bool set_sampler(llama_seq_id seq_id, llama_sampler * sampler);
-
 private:
    llm_graph_params graph_params(
                        llm_graph_result * res,
@@ -277,31 +252,6 @@ private:
    size_t  embd_size = 0; // capacity (of floats) for embeddings
    float * embd      = nullptr;

-    // TODO: simplify
-    struct sampling_info {
-        std::map<llama_seq_id, llama_sampler *> samplers;
-
-        float       * logits      = nullptr;
-        size_t        logits_size = 0;
-
-        llama_token * sampled      = nullptr;
-        size_t        sampled_size = 0;
-
-        float       * probs        = nullptr;
-        size_t        probs_size   = 0;
-
-        llama_token * candidates   = nullptr;
-        size_t        candidates_size = 0;
-
-        std::vector<uint32_t> logits_count;
-        std::vector<uint32_t> probs_count;
-        std::vector<uint32_t> candidates_count;
-
-        std::vector<llama_token> token_ids_full_vocab;
-    };
-
-    sampling_info sampling;
-
    // sequence embeddings output (map of [n_embd] vectors)
    // populated only when pooling_type != LLAMA_POOLING_TYPE_NONE
    std::map<llama_seq_id, std::vector<float>> embd_seq;
@@ -322,8 +272,6 @@ private:

    ggml_backend_sched_ptr sched;

-    bool sched_need_reserve = true;
-
    ggml_backend_t backend_cpu = nullptr;
    std::vector<ggml_backend_ptr> backends;

--- a/llama/llama.cpp/src/llama-cparams.h
+++ b/llama/llama.cpp/src/llama-cparams.h
@@ -30,12 +30,10 @@ struct llama_cparams {
    bool causal_attn;
    bool offload_kqv;
    bool flash_attn;
-    bool auto_fa;
    bool no_perf;
    bool warmup;
    bool op_offload;
    bool kv_unified;
-    bool pipeline_parallel;

    enum llama_pooling_type pooling_type;

--- a/llama/llama.cpp/src/llama-grammar.cpp
+++ b/llama/llama.cpp/src/llama-grammar.cpp
@@ -369,44 +369,6 @@ static void print_rule(
    fprintf(file, "\n");
 }

-//
-// Regex utilities
-//
-
-size_t llama_grammar_trigger_pattern::find(const std::string & input) const {
-    auto find_start_pos = [](const std::smatch & match) {
-        // get from the first matched capturing group to the end of the string
-        size_t start = std::string::npos;
-        for (auto i = 1u; i < match.size(); i++) {
-            if (match.length(i) > 0) {
-                start = match.position(i);
-                break;
-            }
-        }
-        if (start == std::string::npos) {
-            start = match.position(0);
-        }
-        return start;
-    };
-
-    if (!pattern.empty() && pattern.front() == '^' && pattern.back() == '$') {
-        // match against the entire input
-        std::smatch match;
-        if (std::regex_match(input, match, regex)) {
-            return find_start_pos(match);
-        }
-    }
-
-    // search anywhere
-    std::smatch match;
-    if (std::regex_search(input, match, regex)) {
-        return find_start_pos(match);
-    }
-
-    return std::string::npos;
-}
-
-
 //
 // implementation
 //
@@ -1359,10 +1321,21 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
            grammar.trigger_buffer_positions.push_back(std::make_pair(token, position));
            grammar.trigger_buffer += piece;

+            std::smatch match;
            for (const auto & trigger_pattern : grammar.trigger_patterns) {
-                auto start = trigger_pattern.find(grammar.trigger_buffer);
-                if (start != std::string::npos) {
+                if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
                    grammar.awaiting_trigger = false;
+                    // get from the first matched capturing group to the end of the string
+                    size_t start = std::string::npos;
+                    for (auto i = 1u; i < match.size(); i++) {
+                        if (match.length(i) > 0) {
+                            start = match.position(i);
+                            break;
+                        }
+                    }
+                    if (start == std::string::npos) {
+                        start = match.position(0);
+                    }

                    // replay tokens that overlap with [start, end)
                    for (const auto & [tok, tok_pos] : grammar.trigger_buffer_positions) {
--- a/llama/llama.cpp/src/llama-grammar.h
+++ b/llama/llama.cpp/src/llama-grammar.h
@@ -130,8 +130,6 @@ struct llama_grammar_parser {
 struct llama_grammar_trigger_pattern {
    std::string pattern;
    std::regex  regex;
-
-    size_t find(const std::string & input) const;
 };

 struct llama_grammar {
--- a/Show More
+++ b/Show More