fix: window app crash on startup when update is pending (#14451 )

app: fix first update check delayed by 1 hour (#14427 )
mlxrunner: Cancel in-flight requests when the client disconnects
2026-02-27 04:27:01 -05:00 · 2026-02-26 16:47:12 -05:00 · 2026-02-25 18:29:55 -05:00 · 2026-02-25 14:00:42 -08:00 · 2026-02-25 14:00:42 -08:00 · 2026-02-25 01:09:58 -08:00
210 changed files with 23994 additions and 4401 deletions
--- a/9
+++ b/9
@@ -9,15 +9,10 @@ ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
 ARG VULKANVERSION=1.4.321.1

-# We require gcc v10 minimum.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
-RUN yum install -y yum-utils \
-    && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
-    && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
-    && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \
-    && dnf install -y ccache \
+RUN dnf install -y yum-utils ccache gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ gcc-toolset-11-binutils \
    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
-ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
 ARG VULKANVERSION
 RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
    && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
--- a/2
+++ b/2
@@ -1 +1 @@
-v0.4.1
+v0.5.0
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Start building with open models.
 curl -fsSL https://ollama.com/install.sh | sh
 ```

-or [download manually](http://localhost:8080/download/Ollama.dmg)
+or [download manually](https://ollama.com/download/Ollama.dmg)

 ### Windows

--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -1,17 +1,25 @@
 package anthropic

 import (
+	"bytes"
+	"context"
 	"crypto/rand"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
+	"io"
 	"log/slog"
 	"net/http"
+	"net/url"
+	"strconv"
 	"strings"
 	"time"

 	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/auth"
+	internalcloud "github.com/ollama/ollama/internal/cloud"
+	"github.com/ollama/ollama/logutil"
 )

 // Error types matching Anthropic API
@@ -82,22 +90,25 @@ type MessageParam struct {
 // Text and Thinking use pointers so they serialize as the field being present (even if empty)
 // only when set, which is required for SDK streaming accumulation.
 type ContentBlock struct {
-	Type string `json:"type"` // text, image, tool_use, tool_result, thinking
+	Type string `json:"type"` // text, image, tool_use, tool_result, thinking, server_tool_use, web_search_tool_result

 	// For text blocks - pointer so field only appears when set (SDK requires it for accumulation)
 	Text *string `json:"text,omitempty"`

+	// For text blocks with citations
+	Citations []Citation `json:"citations,omitempty"`
+
 	// For image blocks
 	Source *ImageSource `json:"source,omitempty"`

-	// For tool_use blocks
+	// For tool_use and server_tool_use blocks
 	ID    string `json:"id,omitempty"`
 	Name  string `json:"name,omitempty"`
 	Input any    `json:"input,omitempty"`

-	// For tool_result blocks
+	// For tool_result and web_search_tool_result blocks
 	ToolUseID string `json:"tool_use_id,omitempty"`
-	Content   any    `json:"content,omitempty"` // string or []ContentBlock
+	Content   any    `json:"content,omitempty"` // string, []ContentBlock, []WebSearchResult, or WebSearchToolResultError
 	IsError   bool   `json:"is_error,omitempty"`

 	// For thinking blocks - pointer so field only appears when set (SDK requires it for accumulation)
@@ -105,6 +116,30 @@ type ContentBlock struct {
 	Signature string  `json:"signature,omitempty"`
 }

+// Citation represents a citation in a text block
+type Citation struct {
+	Type           string `json:"type"` // "web_search_result_location"
+	URL            string `json:"url"`
+	Title          string `json:"title"`
+	EncryptedIndex string `json:"encrypted_index,omitempty"`
+	CitedText      string `json:"cited_text,omitempty"`
+}
+
+// WebSearchResult represents a single web search result
+type WebSearchResult struct {
+	Type             string `json:"type"` // "web_search_result"
+	URL              string `json:"url"`
+	Title            string `json:"title"`
+	EncryptedContent string `json:"encrypted_content,omitempty"`
+	PageAge          string `json:"page_age,omitempty"`
+}
+
+// WebSearchToolResultError represents an error from web search
+type WebSearchToolResultError struct {
+	Type      string `json:"type"` // "web_search_tool_result_error"
+	ErrorCode string `json:"error_code"`
+}
+
 // ImageSource represents the source of an image
 type ImageSource struct {
 	Type      string `json:"type"` // "base64" or "url"
@@ -115,10 +150,13 @@ type ImageSource struct {

 // Tool represents a tool definition
 type Tool struct {
-	Type        string          `json:"type,omitempty"` // "custom" for user-defined tools
+	Type        string          `json:"type,omitempty"` // "custom" for user-defined tools, or "web_search_20250305" for web search
 	Name        string          `json:"name"`
 	Description string          `json:"description,omitempty"`
 	InputSchema json.RawMessage `json:"input_schema,omitempty"`
+
+	// Web search specific fields
+	MaxUses int `json:"max_uses,omitempty"`
 }

 // ToolChoice controls how the model uses tools
@@ -233,6 +271,8 @@ type StreamErrorEvent struct {

 // FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest
 func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
+	logutil.Trace("anthropic: converting request", "req", TraceMessagesRequest(r))
+
 	var messages []api.Message

 	if r.System != nil {
@@ -259,9 +299,10 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 		}
 	}

-	for _, msg := range r.Messages {
+	for i, msg := range r.Messages {
 		converted, err := convertMessage(msg)
 		if err != nil {
+			logutil.Trace("anthropic: message conversion failed", "index", i, "role", msg.Role, "err", err)
 			return nil, err
 		}
 		messages = append(messages, converted...)
@@ -288,8 +329,24 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 	}

 	var tools api.Tools
+	hasBuiltinWebSearch := false
 	for _, t := range r.Tools {
-		tool, err := convertTool(t)
+		if strings.HasPrefix(t.Type, "web_search") {
+			hasBuiltinWebSearch = true
+			break
+		}
+	}
+
+	for _, t := range r.Tools {
+		// Anthropic built-in web_search maps to Ollama function name "web_search".
+		// If a user-defined tool also uses that name in the same request, drop the
+		// user-defined one to avoid ambiguous tool-call routing.
+		if hasBuiltinWebSearch && !strings.HasPrefix(t.Type, "web_search") && t.Name == "web_search" {
+			logutil.Trace("anthropic: dropping colliding custom web_search tool", "tool", TraceTool(t))
+			continue
+		}
+
+		tool, _, err := convertTool(t)
 		if err != nil {
 			return nil, err
 		}
@@ -302,15 +359,17 @@ func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
 	}

 	stream := r.Stream
-
-	return &api.ChatRequest{
+	convertedRequest := &api.ChatRequest{
 		Model:    r.Model,
 		Messages: messages,
 		Options:  options,
 		Stream:   &stream,
 		Tools:    tools,
 		Think:    think,
-	}, nil
+	}
+	logutil.Trace("anthropic: converted request", "req", TraceChatRequest(convertedRequest))
+
+	return convertedRequest, nil
 }

 // convertMessage converts an Anthropic MessageParam to Ollama api.Message(s)
@@ -328,10 +387,19 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 		var toolCalls []api.ToolCall
 		var thinking string
 		var toolResults []api.Message
+		textBlocks := 0
+		imageBlocks := 0
+		toolUseBlocks := 0
+		toolResultBlocks := 0
+		serverToolUseBlocks := 0
+		webSearchToolResultBlocks := 0
+		thinkingBlocks := 0
+		unknownBlocks := 0

 		for _, block := range content {
 			blockMap, ok := block.(map[string]any)
 			if !ok {
+				logutil.Trace("anthropic: invalid content block format", "role", role)
 				return nil, errors.New("invalid content block format")
 			}

@@ -339,13 +407,16 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {

 			switch blockType {
 			case "text":
+				textBlocks++
 				if text, ok := blockMap["text"].(string); ok {
 					textContent.WriteString(text)
 				}

 			case "image":
+				imageBlocks++
 				source, ok := blockMap["source"].(map[string]any)
 				if !ok {
+					logutil.Trace("anthropic: invalid image source", "role", role)
 					return nil, errors.New("invalid image source")
 				}

@@ -354,21 +425,26 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 					data, _ := source["data"].(string)
 					decoded, err := base64.StdEncoding.DecodeString(data)
 					if err != nil {
+						logutil.Trace("anthropic: invalid base64 image data", "role", role, "error", err)
 						return nil, fmt.Errorf("invalid base64 image data: %w", err)
 					}
 					images = append(images, decoded)
 				} else {
+					logutil.Trace("anthropic: unsupported image source type", "role", role, "source_type", sourceType)
 					return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", sourceType)
 				}
 				// URL images would need to be fetched - skip for now

 			case "tool_use":
+				toolUseBlocks++
 				id, ok := blockMap["id"].(string)
 				if !ok {
+					logutil.Trace("anthropic: tool_use block missing id", "role", role)
 					return nil, errors.New("tool_use block missing required 'id' field")
 				}
 				name, ok := blockMap["name"].(string)
 				if !ok {
+					logutil.Trace("anthropic: tool_use block missing name", "role", role)
 					return nil, errors.New("tool_use block missing required 'name' field")
 				}
 				tc := api.ToolCall{
@@ -383,6 +459,7 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 				toolCalls = append(toolCalls, tc)

 			case "tool_result":
+				toolResultBlocks++
 				toolUseID, _ := blockMap["tool_use_id"].(string)
 				var resultContent string

@@ -408,9 +485,36 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 				})

 			case "thinking":
+				thinkingBlocks++
 				if t, ok := blockMap["thinking"].(string); ok {
 					thinking = t
 				}
+
+			case "server_tool_use":
+				serverToolUseBlocks++
+				id, _ := blockMap["id"].(string)
+				name, _ := blockMap["name"].(string)
+				tc := api.ToolCall{
+					ID: id,
+					Function: api.ToolCallFunction{
+						Name: name,
+					},
+				}
+				if input, ok := blockMap["input"].(map[string]any); ok {
+					tc.Function.Arguments = mapToArgs(input)
+				}
+				toolCalls = append(toolCalls, tc)
+
+			case "web_search_tool_result":
+				webSearchToolResultBlocks++
+				toolUseID, _ := blockMap["tool_use_id"].(string)
+				toolResults = append(toolResults, api.Message{
+					Role:       "tool",
+					Content:    formatWebSearchToolResultContent(blockMap["content"]),
+					ToolCallID: toolUseID,
+				})
+			default:
+				unknownBlocks++
 			}
 		}

@@ -427,6 +531,19 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {

 		// Add tool results as separate messages
 		messages = append(messages, toolResults...)
+		logutil.Trace("anthropic: converted block message",
+			"role", role,
+			"blocks", len(content),
+			"text", textBlocks,
+			"image", imageBlocks,
+			"tool_use", toolUseBlocks,
+			"tool_result", toolResultBlocks,
+			"server_tool_use", serverToolUseBlocks,
+			"web_search_result", webSearchToolResultBlocks,
+			"thinking", thinkingBlocks,
+			"unknown", unknownBlocks,
+			"messages", TraceAPIMessages(messages),
+		)

 	default:
 		return nil, fmt.Errorf("invalid message content type: %T", content)
@@ -435,12 +552,94 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 	return messages, nil
 }

-// convertTool converts an Anthropic Tool to an Ollama api.Tool
-func convertTool(t Tool) (api.Tool, error) {
+func formatWebSearchToolResultContent(content any) string {
+	switch c := content.(type) {
+	case string:
+		return c
+	case []WebSearchResult:
+		var resultContent strings.Builder
+		for _, item := range c {
+			if item.Type != "web_search_result" {
+				continue
+			}
+			fmt.Fprintf(&resultContent, "- %s: %s\n", item.Title, item.URL)
+		}
+		return resultContent.String()
+	case []any:
+		var resultContent strings.Builder
+		for _, item := range c {
+			itemMap, ok := item.(map[string]any)
+			if !ok {
+				continue
+			}
+			switch itemMap["type"] {
+			case "web_search_result":
+				title, _ := itemMap["title"].(string)
+				url, _ := itemMap["url"].(string)
+				fmt.Fprintf(&resultContent, "- %s: %s\n", title, url)
+			case "web_search_tool_result_error":
+				errorCode, _ := itemMap["error_code"].(string)
+				if errorCode == "" {
+					return "web_search_tool_result_error"
+				}
+				return "web_search_tool_result_error: " + errorCode
+			}
+		}
+		return resultContent.String()
+	case map[string]any:
+		if c["type"] == "web_search_tool_result_error" {
+			errorCode, _ := c["error_code"].(string)
+			if errorCode == "" {
+				return "web_search_tool_result_error"
+			}
+			return "web_search_tool_result_error: " + errorCode
+		}
+		data, err := json.Marshal(c)
+		if err != nil {
+			return ""
+		}
+		return string(data)
+	case WebSearchToolResultError:
+		if c.ErrorCode == "" {
+			return "web_search_tool_result_error"
+		}
+		return "web_search_tool_result_error: " + c.ErrorCode
+	default:
+		data, err := json.Marshal(c)
+		if err != nil {
+			return ""
+		}
+		return string(data)
+	}
+}
+
+// convertTool converts an Anthropic Tool to an Ollama api.Tool, returning true if it's a server tool
+func convertTool(t Tool) (api.Tool, bool, error) {
+	if strings.HasPrefix(t.Type, "web_search") {
+		props := api.NewToolPropertiesMap()
+		props.Set("query", api.ToolProperty{
+			Type:        api.PropertyType{"string"},
+			Description: "The search query to look up on the web",
+		})
+		return api.Tool{
+			Type: "function",
+			Function: api.ToolFunction{
+				Name:        "web_search",
+				Description: "Search the web for current information. Use this to find up-to-date information about any topic.",
+				Parameters: api.ToolFunctionParameters{
+					Type:       "object",
+					Required:   []string{"query"},
+					Properties: props,
+				},
+			},
+		}, true, nil
+	}
+
 	var params api.ToolFunctionParameters
 	if len(t.InputSchema) > 0 {
 		if err := json.Unmarshal(t.InputSchema, &params); err != nil {
-			return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
+			logutil.Trace("anthropic: invalid tool schema", "tool", t.Name, "err", err)
+			return api.Tool{}, false, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
 		}
 	}

@@ -451,7 +650,7 @@ func convertTool(t Tool) (api.Tool, error) {
 			Description: t.Description,
 			Parameters:  params,
 		},
-	}, nil
+	}, false, nil
 }

 // ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse
@@ -899,3 +1098,113 @@ func countContentBlock(block any) int {

 	return total
 }
+
+// OllamaWebSearchRequest represents a request to the Ollama web search API
+type OllamaWebSearchRequest struct {
+	Query      string `json:"query"`
+	MaxResults int    `json:"max_results,omitempty"`
+}
+
+// OllamaWebSearchResult represents a single search result from Ollama API
+type OllamaWebSearchResult struct {
+	Title   string `json:"title"`
+	URL     string `json:"url"`
+	Content string `json:"content"`
+}
+
+// OllamaWebSearchResponse represents the response from the Ollama web search API
+type OllamaWebSearchResponse struct {
+	Results []OllamaWebSearchResult `json:"results"`
+}
+
+var WebSearchEndpoint = "https://ollama.com/api/web_search"
+
+func WebSearch(ctx context.Context, query string, maxResults int) (*OllamaWebSearchResponse, error) {
+	if internalcloud.Disabled() {
+		logutil.TraceContext(ctx, "anthropic: web search blocked", "reason", "cloud_disabled")
+		return nil, errors.New(internalcloud.DisabledError("web search is unavailable"))
+	}
+
+	if maxResults <= 0 {
+		maxResults = 5
+	}
+	if maxResults > 10 {
+		maxResults = 10
+	}
+
+	reqBody := OllamaWebSearchRequest{
+		Query:      query,
+		MaxResults: maxResults,
+	}
+
+	body, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal web search request: %w", err)
+	}
+
+	searchURL, err := url.Parse(WebSearchEndpoint)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse web search URL: %w", err)
+	}
+	logutil.TraceContext(ctx, "anthropic: web search request",
+		"query", TraceTruncateString(query),
+		"max_results", maxResults,
+		"url", searchURL.String(),
+	)
+
+	q := searchURL.Query()
+	q.Set("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	searchURL.RawQuery = q.Encode()
+
+	signature := ""
+	if strings.EqualFold(searchURL.Hostname(), "ollama.com") {
+		challenge := fmt.Sprintf("%s,%s", http.MethodPost, searchURL.RequestURI())
+		signature, err = auth.Sign(ctx, []byte(challenge))
+		if err != nil {
+			return nil, fmt.Errorf("failed to sign web search request: %w", err)
+		}
+	}
+	logutil.TraceContext(ctx, "anthropic: web search auth", "signed", signature != "")
+
+	req, err := http.NewRequestWithContext(ctx, "POST", searchURL.String(), bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create web search request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	if signature != "" {
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", signature))
+	}
+
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("web search request failed: %w", err)
+	}
+	defer resp.Body.Close()
+	logutil.TraceContext(ctx, "anthropic: web search response", "status", resp.StatusCode)
+
+	if resp.StatusCode != http.StatusOK {
+		respBody, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("web search returned status %d: %s", resp.StatusCode, string(respBody))
+	}
+
+	var searchResp OllamaWebSearchResponse
+	if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil {
+		return nil, fmt.Errorf("failed to decode web search response: %w", err)
+	}
+	logutil.TraceContext(ctx, "anthropic: web search results", "count", len(searchResp.Results))
+
+	return &searchResp, nil
+}
+
+func ConvertOllamaToAnthropicResults(ollamaResults *OllamaWebSearchResponse) []WebSearchResult {
+	var results []WebSearchResult
+	for _, r := range ollamaResults.Results {
+		results = append(results, WebSearchResult{
+			Type:  "web_search_result",
+			URL:   r.URL,
+			Title: r.Title,
+		})
+	}
+	return results
+}
--- a/anthropic/anthropic_test.go
+++ b/anthropic/anthropic_test.go
@@ -3,6 +3,7 @@ package anthropic
 import (
 	"encoding/base64"
 	"encoding/json"
+	"strings"
 	"testing"

 	"github.com/google/go-cmp/cmp"
@@ -300,6 +301,78 @@ func TestFromMessagesRequest_WithTools(t *testing.T) {
 	}
 }

+func TestFromMessagesRequest_DropsCustomWebSearchWhenBuiltinPresent(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Tools: []Tool{
+			{
+				Type: "web_search_20250305",
+				Name: "web_search",
+			},
+			{
+				Type:        "custom",
+				Name:        "web_search",
+				Description: "User-defined web search that should be dropped",
+				InputSchema: json.RawMessage(`{"type":"invalid"}`),
+			},
+			{
+				Type:        "custom",
+				Name:        "get_weather",
+				Description: "Get current weather",
+				InputSchema: json.RawMessage(`{"type":"object","properties":{"location":{"type":"string"}},"required":["location"]}`),
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Tools) != 2 {
+		t.Fatalf("expected 2 tools after dropping custom web_search, got %d", len(result.Tools))
+	}
+	if result.Tools[0].Function.Name != "web_search" {
+		t.Fatalf("expected first tool to be built-in web_search, got %q", result.Tools[0].Function.Name)
+	}
+	if result.Tools[1].Function.Name != "get_weather" {
+		t.Fatalf("expected second tool to be get_weather, got %q", result.Tools[1].Function.Name)
+	}
+}
+
+func TestFromMessagesRequest_KeepsCustomWebSearchWhenBuiltinAbsent(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Tools: []Tool{
+			{
+				Type:        "custom",
+				Name:        "web_search",
+				Description: "User-defined web search",
+				InputSchema: json.RawMessage(`{"type":"object","properties":{"query":{"type":"string"}},"required":["query"]}`),
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Tools) != 1 {
+		t.Fatalf("expected 1 custom tool, got %d", len(result.Tools))
+	}
+	if result.Tools[0].Function.Name != "web_search" {
+		t.Fatalf("expected custom tool name web_search, got %q", result.Tools[0].Function.Name)
+	}
+	if result.Tools[0].Function.Description != "User-defined web search" {
+		t.Fatalf("expected custom description preserved, got %q", result.Tools[0].Function.Description)
+	}
+}
+
 func TestFromMessagesRequest_WithThinking(t *testing.T) {
 	req := MessagesRequest{
 		Model:     "test-model",
@@ -1063,3 +1136,320 @@ func TestEstimateTokens_EmptyContent(t *testing.T) {
 		t.Errorf("expected 0 tokens for empty content, got %d", tokens)
 	}
 }
+
+// Web Search Tests
+
+func TestConvertTool_WebSearch(t *testing.T) {
+	tool := Tool{
+		Type:    "web_search_20250305",
+		Name:    "web_search",
+		MaxUses: 5,
+	}
+
+	result, isServerTool, err := convertTool(tool)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if !isServerTool {
+		t.Error("expected isServerTool to be true for web_search tool")
+	}
+
+	if result.Type != "function" {
+		t.Errorf("expected type 'function', got %q", result.Type)
+	}
+
+	if result.Function.Name != "web_search" {
+		t.Errorf("expected name 'web_search', got %q", result.Function.Name)
+	}
+
+	if result.Function.Description == "" {
+		t.Error("expected non-empty description for web_search tool")
+	}
+
+	// Check that query parameter is defined
+	if result.Function.Parameters.Properties == nil {
+		t.Fatal("expected properties to be defined")
+	}
+
+	queryProp, ok := result.Function.Parameters.Properties.Get("query")
+	if !ok {
+		t.Error("expected 'query' property to be defined")
+	}
+
+	if len(queryProp.Type) == 0 || queryProp.Type[0] != "string" {
+		t.Errorf("expected query type to be 'string', got %v", queryProp.Type)
+	}
+}
+
+func TestConvertTool_RegularTool(t *testing.T) {
+	tool := Tool{
+		Type:        "custom",
+		Name:        "get_weather",
+		Description: "Get the weather",
+		InputSchema: json.RawMessage(`{"type":"object","properties":{"location":{"type":"string"}}}`),
+	}
+
+	result, isServerTool, err := convertTool(tool)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if isServerTool {
+		t.Error("expected isServerTool to be false for regular tool")
+	}
+
+	if result.Function.Name != "get_weather" {
+		t.Errorf("expected name 'get_weather', got %q", result.Function.Name)
+	}
+}
+
+func TestConvertMessage_ServerToolUse(t *testing.T) {
+	msg := MessageParam{
+		Role: "assistant",
+		Content: []any{
+			map[string]any{
+				"type":  "server_tool_use",
+				"id":    "srvtoolu_123",
+				"name":  "web_search",
+				"input": map[string]any{"query": "test query"},
+			},
+		},
+	}
+
+	messages, err := convertMessage(msg)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(messages))
+	}
+
+	if len(messages[0].ToolCalls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(messages[0].ToolCalls))
+	}
+
+	tc := messages[0].ToolCalls[0]
+	if tc.ID != "srvtoolu_123" {
+		t.Errorf("expected tool call ID 'srvtoolu_123', got %q", tc.ID)
+	}
+
+	if tc.Function.Name != "web_search" {
+		t.Errorf("expected tool name 'web_search', got %q", tc.Function.Name)
+	}
+}
+
+func TestConvertMessage_WebSearchToolResult(t *testing.T) {
+	msg := MessageParam{
+		Role: "user",
+		Content: []any{
+			map[string]any{
+				"type":        "web_search_tool_result",
+				"tool_use_id": "srvtoolu_123",
+				"content": []any{
+					map[string]any{
+						"type":  "web_search_result",
+						"title": "Test Result",
+						"url":   "https://example.com",
+					},
+				},
+			},
+		},
+	}
+
+	messages, err := convertMessage(msg)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Should have a tool result message
+	if len(messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(messages))
+	}
+
+	if messages[0].Role != "tool" {
+		t.Errorf("expected role 'tool', got %q", messages[0].Role)
+	}
+
+	if messages[0].ToolCallID != "srvtoolu_123" {
+		t.Errorf("expected tool_call_id 'srvtoolu_123', got %q", messages[0].ToolCallID)
+	}
+
+	if messages[0].Content == "" {
+		t.Error("expected non-empty content from web search results")
+	}
+}
+
+func TestConvertMessage_WebSearchToolResultEmptyStillCreatesToolMessage(t *testing.T) {
+	msg := MessageParam{
+		Role: "user",
+		Content: []any{
+			map[string]any{
+				"type":        "web_search_tool_result",
+				"tool_use_id": "srvtoolu_empty",
+				"content":     []any{},
+			},
+		},
+	}
+
+	messages, err := convertMessage(msg)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(messages))
+	}
+	if messages[0].Role != "tool" {
+		t.Fatalf("expected role tool, got %q", messages[0].Role)
+	}
+	if messages[0].ToolCallID != "srvtoolu_empty" {
+		t.Fatalf("expected tool_call_id srvtoolu_empty, got %q", messages[0].ToolCallID)
+	}
+	if messages[0].Content != "" {
+		t.Fatalf("expected empty content for empty web search results, got %q", messages[0].Content)
+	}
+}
+
+func TestConvertMessage_WebSearchToolResultErrorStillCreatesToolMessage(t *testing.T) {
+	msg := MessageParam{
+		Role: "user",
+		Content: []any{
+			map[string]any{
+				"type":        "web_search_tool_result",
+				"tool_use_id": "srvtoolu_error",
+				"content": map[string]any{
+					"type":       "web_search_tool_result_error",
+					"error_code": "max_uses_exceeded",
+				},
+			},
+		},
+	}
+
+	messages, err := convertMessage(msg)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(messages))
+	}
+	if messages[0].Role != "tool" {
+		t.Fatalf("expected role tool, got %q", messages[0].Role)
+	}
+	if messages[0].ToolCallID != "srvtoolu_error" {
+		t.Fatalf("expected tool_call_id srvtoolu_error, got %q", messages[0].ToolCallID)
+	}
+	if !strings.Contains(messages[0].Content, "max_uses_exceeded") {
+		t.Fatalf("expected error code in converted tool content, got %q", messages[0].Content)
+	}
+}
+
+func TestConvertOllamaToAnthropicResults(t *testing.T) {
+	ollamaResp := &OllamaWebSearchResponse{
+		Results: []OllamaWebSearchResult{
+			{
+				Title:   "Test Title",
+				URL:     "https://example.com",
+				Content: "Test content",
+			},
+			{
+				Title:   "Another Result",
+				URL:     "https://example.org",
+				Content: "More content",
+			},
+		},
+	}
+
+	results := ConvertOllamaToAnthropicResults(ollamaResp)
+
+	if len(results) != 2 {
+		t.Fatalf("expected 2 results, got %d", len(results))
+	}
+
+	if results[0].Type != "web_search_result" {
+		t.Errorf("expected type 'web_search_result', got %q", results[0].Type)
+	}
+
+	if results[0].Title != "Test Title" {
+		t.Errorf("expected title 'Test Title', got %q", results[0].Title)
+	}
+
+	if results[0].URL != "https://example.com" {
+		t.Errorf("expected URL 'https://example.com', got %q", results[0].URL)
+	}
+}
+
+func TestWebSearchTypes(t *testing.T) {
+	// Test that WebSearchResult serializes correctly
+	result := WebSearchResult{
+		Type:             "web_search_result",
+		URL:              "https://example.com",
+		Title:            "Test",
+		EncryptedContent: "abc123",
+		PageAge:          "2025-01-01",
+	}
+
+	data, err := json.Marshal(result)
+	if err != nil {
+		t.Fatalf("failed to marshal WebSearchResult: %v", err)
+	}
+
+	var unmarshaled WebSearchResult
+	if err := json.Unmarshal(data, &unmarshaled); err != nil {
+		t.Fatalf("failed to unmarshal WebSearchResult: %v", err)
+	}
+
+	if unmarshaled.Type != result.Type {
+		t.Errorf("type mismatch: expected %q, got %q", result.Type, unmarshaled.Type)
+	}
+
+	// Test WebSearchToolResultError
+	errResult := WebSearchToolResultError{
+		Type:      "web_search_tool_result_error",
+		ErrorCode: "max_uses_exceeded",
+	}
+
+	data, err = json.Marshal(errResult)
+	if err != nil {
+		t.Fatalf("failed to marshal WebSearchToolResultError: %v", err)
+	}
+
+	var unmarshaledErr WebSearchToolResultError
+	if err := json.Unmarshal(data, &unmarshaledErr); err != nil {
+		t.Fatalf("failed to unmarshal WebSearchToolResultError: %v", err)
+	}
+
+	if unmarshaledErr.ErrorCode != "max_uses_exceeded" {
+		t.Errorf("error_code mismatch: expected 'max_uses_exceeded', got %q", unmarshaledErr.ErrorCode)
+	}
+}
+
+func TestCitation(t *testing.T) {
+	citation := Citation{
+		Type:           "web_search_result_location",
+		URL:            "https://example.com",
+		Title:          "Example",
+		EncryptedIndex: "enc123",
+		CitedText:      "Some cited text...",
+	}
+
+	data, err := json.Marshal(citation)
+	if err != nil {
+		t.Fatalf("failed to marshal Citation: %v", err)
+	}
+
+	var unmarshaled Citation
+	if err := json.Unmarshal(data, &unmarshaled); err != nil {
+		t.Fatalf("failed to unmarshal Citation: %v", err)
+	}
+
+	if unmarshaled.Type != "web_search_result_location" {
+		t.Errorf("type mismatch: expected 'web_search_result_location', got %q", unmarshaled.Type)
+	}
+
+	if unmarshaled.CitedText != "Some cited text..." {
+		t.Errorf("cited_text mismatch: expected 'Some cited text...', got %q", unmarshaled.CitedText)
+	}
+}
--- a/anthropic/trace.go
+++ b/anthropic/trace.go
@@ -0,0 +1,352 @@
+package anthropic
+
+import (
+	"encoding/json"
+	"fmt"
+	"sort"
+
+	"github.com/ollama/ollama/api"
+)
+
+// Trace truncation limits.
+const (
+	TraceMaxStringRunes = 240
+	TraceMaxSliceItems  = 8
+	TraceMaxMapEntries  = 16
+	TraceMaxDepth       = 4
+)
+
+// TraceTruncateString shortens s to TraceMaxStringRunes, appending a count of
+// omitted characters when truncated.
+func TraceTruncateString(s string) string {
+	if len(s) == 0 {
+		return s
+	}
+	runes := []rune(s)
+	if len(runes) <= TraceMaxStringRunes {
+		return s
+	}
+	return fmt.Sprintf("%s...(+%d chars)", string(runes[:TraceMaxStringRunes]), len(runes)-TraceMaxStringRunes)
+}
+
+// TraceJSON round-trips v through JSON and returns a compacted representation.
+func TraceJSON(v any) any {
+	if v == nil {
+		return nil
+	}
+	data, err := json.Marshal(v)
+	if err != nil {
+		return map[string]any{"marshal_error": err.Error(), "type": fmt.Sprintf("%T", v)}
+	}
+	var out any
+	if err := json.Unmarshal(data, &out); err != nil {
+		return TraceTruncateString(string(data))
+	}
+	return TraceCompactValue(out, 0)
+}
+
+// TraceCompactValue recursively truncates strings, slices, and maps for trace
+// output. depth tracks recursion to enforce TraceMaxDepth.
+func TraceCompactValue(v any, depth int) any {
+	if v == nil {
+		return nil
+	}
+	if depth >= TraceMaxDepth {
+		switch t := v.(type) {
+		case string:
+			return TraceTruncateString(t)
+		case []any:
+			return fmt.Sprintf("<array len=%d>", len(t))
+		case map[string]any:
+			return fmt.Sprintf("<object keys=%d>", len(t))
+		default:
+			return fmt.Sprintf("<%T>", v)
+		}
+	}
+	switch t := v.(type) {
+	case string:
+		return TraceTruncateString(t)
+	case []any:
+		limit := min(len(t), TraceMaxSliceItems)
+		out := make([]any, 0, limit+1)
+		for i := range limit {
+			out = append(out, TraceCompactValue(t[i], depth+1))
+		}
+		if len(t) > limit {
+			out = append(out, fmt.Sprintf("... +%d more items", len(t)-limit))
+		}
+		return out
+	case map[string]any:
+		keys := make([]string, 0, len(t))
+		for k := range t {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		limit := min(len(keys), TraceMaxMapEntries)
+		out := make(map[string]any, limit+1)
+		for i := range limit {
+			out[keys[i]] = TraceCompactValue(t[keys[i]], depth+1)
+		}
+		if len(keys) > limit {
+			out["__truncated_keys"] = len(keys) - limit
+		}
+		return out
+	default:
+		return t
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Anthropic request/response tracing
+// ---------------------------------------------------------------------------
+
+// TraceMessagesRequest returns a compact trace representation of a MessagesRequest.
+func TraceMessagesRequest(r MessagesRequest) map[string]any {
+	return map[string]any{
+		"model":          r.Model,
+		"max_tokens":     r.MaxTokens,
+		"messages":       traceMessageParams(r.Messages),
+		"system":         traceAnthropicContent(r.System),
+		"stream":         r.Stream,
+		"tools":          traceTools(r.Tools),
+		"tool_choice":    TraceJSON(r.ToolChoice),
+		"thinking":       TraceJSON(r.Thinking),
+		"stop_sequences": r.StopSequences,
+		"temperature":    ptrVal(r.Temperature),
+		"top_p":          ptrVal(r.TopP),
+		"top_k":          ptrVal(r.TopK),
+	}
+}
+
+// TraceMessagesResponse returns a compact trace representation of a MessagesResponse.
+func TraceMessagesResponse(r MessagesResponse) map[string]any {
+	return map[string]any{
+		"id":          r.ID,
+		"model":       r.Model,
+		"content":     TraceJSON(r.Content),
+		"stop_reason": r.StopReason,
+		"usage":       r.Usage,
+	}
+}
+
+func traceMessageParams(msgs []MessageParam) []map[string]any {
+	out := make([]map[string]any, 0, len(msgs))
+	for _, m := range msgs {
+		out = append(out, map[string]any{
+			"role":    m.Role,
+			"content": traceAnthropicContent(m.Content),
+		})
+	}
+	return out
+}
+
+func traceAnthropicContent(content any) any {
+	switch c := content.(type) {
+	case nil:
+		return nil
+	case string:
+		return TraceTruncateString(c)
+	case []any:
+		blocks := make([]any, 0, len(c))
+		for _, block := range c {
+			blockMap, ok := block.(map[string]any)
+			if !ok {
+				blocks = append(blocks, TraceCompactValue(block, 0))
+				continue
+			}
+			blocks = append(blocks, traceAnthropicBlock(blockMap))
+		}
+		return blocks
+	default:
+		return TraceJSON(c)
+	}
+}
+
+func traceAnthropicBlock(block map[string]any) map[string]any {
+	blockType, _ := block["type"].(string)
+	out := map[string]any{"type": blockType}
+	switch blockType {
+	case "text":
+		if text, ok := block["text"].(string); ok {
+			out["text"] = TraceTruncateString(text)
+		} else {
+			out["text"] = TraceCompactValue(block["text"], 0)
+		}
+	case "thinking":
+		if thinking, ok := block["thinking"].(string); ok {
+			out["thinking"] = TraceTruncateString(thinking)
+		} else {
+			out["thinking"] = TraceCompactValue(block["thinking"], 0)
+		}
+	case "tool_use", "server_tool_use":
+		out["id"] = block["id"]
+		out["name"] = block["name"]
+		out["input"] = TraceCompactValue(block["input"], 0)
+	case "tool_result", "web_search_tool_result":
+		out["tool_use_id"] = block["tool_use_id"]
+		out["content"] = TraceCompactValue(block["content"], 0)
+	case "image":
+		if source, ok := block["source"].(map[string]any); ok {
+			out["source"] = map[string]any{
+				"type":       source["type"],
+				"media_type": source["media_type"],
+				"url":        source["url"],
+				"data_len":   len(fmt.Sprint(source["data"])),
+			}
+		}
+	default:
+		out["block"] = TraceCompactValue(block, 0)
+	}
+	return out
+}
+
+func traceTools(tools []Tool) []map[string]any {
+	out := make([]map[string]any, 0, len(tools))
+	for _, t := range tools {
+		out = append(out, TraceTool(t))
+	}
+	return out
+}
+
+// TraceTool returns a compact trace representation of an Anthropic Tool.
+func TraceTool(t Tool) map[string]any {
+	return map[string]any{
+		"type":         t.Type,
+		"name":         t.Name,
+		"description":  TraceTruncateString(t.Description),
+		"input_schema": TraceJSON(t.InputSchema),
+		"max_uses":     t.MaxUses,
+	}
+}
+
+// ContentBlockTypes returns the type strings from content (when it's []any blocks).
+func ContentBlockTypes(content any) []string {
+	blocks, ok := content.([]any)
+	if !ok {
+		return nil
+	}
+	types := make([]string, 0, len(blocks))
+	for _, block := range blocks {
+		blockMap, ok := block.(map[string]any)
+		if !ok {
+			types = append(types, fmt.Sprintf("%T", block))
+			continue
+		}
+		t, _ := blockMap["type"].(string)
+		types = append(types, t)
+	}
+	return types
+}
+
+func ptrVal[T any](v *T) any {
+	if v == nil {
+		return nil
+	}
+	return *v
+}
+
+// ---------------------------------------------------------------------------
+// Ollama api.* tracing (shared between anthropic and middleware packages)
+// ---------------------------------------------------------------------------
+
+// TraceChatRequest returns a compact trace representation of an Ollama ChatRequest.
+func TraceChatRequest(req *api.ChatRequest) map[string]any {
+	if req == nil {
+		return nil
+	}
+	stream := false
+	if req.Stream != nil {
+		stream = *req.Stream
+	}
+	return map[string]any{
+		"model":    req.Model,
+		"messages": TraceAPIMessages(req.Messages),
+		"tools":    TraceAPITools(req.Tools),
+		"stream":   stream,
+		"options":  req.Options,
+		"think":    TraceJSON(req.Think),
+	}
+}
+
+// TraceChatResponse returns a compact trace representation of an Ollama ChatResponse.
+func TraceChatResponse(resp api.ChatResponse) map[string]any {
+	return map[string]any{
+		"model":       resp.Model,
+		"done":        resp.Done,
+		"done_reason": resp.DoneReason,
+		"message":     TraceAPIMessage(resp.Message),
+		"metrics":     TraceJSON(resp.Metrics),
+	}
+}
+
+// TraceAPIMessages returns compact trace representations for a slice of api.Message.
+func TraceAPIMessages(msgs []api.Message) []map[string]any {
+	out := make([]map[string]any, 0, len(msgs))
+	for _, m := range msgs {
+		out = append(out, TraceAPIMessage(m))
+	}
+	return out
+}
+
+// TraceAPIMessage returns a compact trace representation of a single api.Message.
+func TraceAPIMessage(m api.Message) map[string]any {
+	return map[string]any{
+		"role":         m.Role,
+		"content":      TraceTruncateString(m.Content),
+		"thinking":     TraceTruncateString(m.Thinking),
+		"images":       traceImageSizes(m.Images),
+		"tool_calls":   traceToolCalls(m.ToolCalls),
+		"tool_name":    m.ToolName,
+		"tool_call_id": m.ToolCallID,
+	}
+}
+
+func traceImageSizes(images []api.ImageData) []int {
+	if len(images) == 0 {
+		return nil
+	}
+	sizes := make([]int, 0, len(images))
+	for _, img := range images {
+		sizes = append(sizes, len(img))
+	}
+	return sizes
+}
+
+// TraceAPITools returns compact trace representations for a slice of api.Tool.
+func TraceAPITools(tools api.Tools) []map[string]any {
+	out := make([]map[string]any, 0, len(tools))
+	for _, t := range tools {
+		out = append(out, TraceAPITool(t))
+	}
+	return out
+}
+
+// TraceAPITool returns a compact trace representation of a single api.Tool.
+func TraceAPITool(t api.Tool) map[string]any {
+	return map[string]any{
+		"type":        t.Type,
+		"name":        t.Function.Name,
+		"description": TraceTruncateString(t.Function.Description),
+		"parameters":  TraceJSON(t.Function.Parameters),
+	}
+}
+
+// TraceToolCall returns a compact trace representation of an api.ToolCall.
+func TraceToolCall(tc api.ToolCall) map[string]any {
+	return map[string]any{
+		"id":   tc.ID,
+		"name": tc.Function.Name,
+		"args": TraceJSON(tc.Function.Arguments),
+	}
+}
+
+func traceToolCalls(tcs []api.ToolCall) []map[string]any {
+	if len(tcs) == 0 {
+		return nil
+	}
+	out := make([]map[string]any, 0, len(tcs))
+	for _, tc := range tcs {
+		out = append(out, TraceToolCall(tc))
+	}
+	return out
+}
--- a/api/client.go
+++ b/api/client.go
@@ -449,6 +449,16 @@ func (c *Client) Version(ctx context.Context) (string, error) {
 	return version.Version, nil
 }

+// CloudStatusExperimental returns whether cloud features are disabled on the server.
+func (c *Client) CloudStatusExperimental(ctx context.Context) (*StatusResponse, error) {
+	var status StatusResponse
+	if err := c.do(ctx, http.MethodGet, "/api/status", nil, &status); err != nil {
+		return nil, err
+	}
+
+	return &status, nil
+}
+
 // Signout will signout a client for a local ollama server.
 func (c *Client) Signout(ctx context.Context) error {
 	return c.do(ctx, http.MethodPost, "/api/signout", nil, nil)
--- a/api/types.go
+++ b/api/types.go
@@ -834,6 +834,16 @@ type TokenResponse struct {
 	Token string `json:"token"`
 }

+type CloudStatus struct {
+	Disabled bool   `json:"disabled"`
+	Source   string `json:"source"`
+}
+
+// StatusResponse is the response from [Client.CloudStatusExperimental].
+type StatusResponse struct {
+	Cloud CloudStatus `json:"cloud"`
+}
+
 // GenerateResponse is the response passed into [GenerateResponseFunc].
 type GenerateResponse struct {
 	// Model is the model name that generated the response.
--- a/app/cmd/app/app.go
+++ b/app/cmd/app/app.go
@@ -35,6 +35,7 @@ import (
 var (
 	wv           = &Webview{}
 	uiServerPort int
+	appStore     *store.Store
 )

 var debug = strings.EqualFold(os.Getenv("OLLAMA_DEBUG"), "true") || os.Getenv("OLLAMA_DEBUG") == "1"
@@ -208,6 +209,7 @@ func main() {
 	uiServerPort = port

 	st := &store.Store{}
+	appStore = st

 	// Enable CORS in development mode
 	if devMode {
@@ -253,6 +255,8 @@ func main() {
 		done <- osrv.Run(octx)
 	}()

+	upd := &updater.Updater{Store: st}
+
 	uiServer := ui.Server{
 		Token: token,
 		Restart: func() {
@@ -267,6 +271,10 @@ func main() {
 		ToolRegistry: toolRegistry,
 		Dev:          devMode,
 		Logger:       slog.Default(),
+		Updater:      upd,
+		UpdateAvailableFunc: func() {
+			UpdateAvailable("")
+		},
 	}

 	srv := &http.Server{
@@ -284,8 +292,20 @@ func main() {
 		slog.Debug("background desktop server done")
 	}()

-	updater := &updater.Updater{Store: st}
-	updater.StartBackgroundUpdaterChecker(ctx, UpdateAvailable)
+	upd.StartBackgroundUpdaterChecker(ctx, UpdateAvailable)
+
+	// Check for pending updates on startup (show tray notification if update is ready)
+	if updater.IsUpdatePending() {
+		// On Windows, the tray is initialized in osRun(). Calling UpdateAvailable
+		// before that would dereference a nil tray callback.
+		// TODO: refactor so the update check runs after platform init on all platforms.
+		if runtime.GOOS == "windows" {
+			slog.Debug("update pending on startup, deferring tray notification until tray initialization")
+		} else {
+			slog.Debug("update pending on startup, showing tray notification")
+			UpdateAvailable("")
+		}
+	}

 	hasCompletedFirstRun, err := st.HasCompletedFirstRun()
 	if err != nil {
@@ -348,6 +368,17 @@ func startHiddenTasks() {
 			// CLI triggered app startup use-case
 			slog.Info("deferring pending update for fast startup")
 		} else {
+			// Check if auto-update is enabled before automatically upgrading
+			settings, err := appStore.Settings()
+			if err != nil {
+				slog.Warn("failed to load settings for upgrade check", "error", err)
+			} else if !settings.AutoUpdateEnabled {
+				slog.Info("auto-update disabled, skipping automatic upgrade at startup")
+				// Still show tray notification so user knows update is ready
+				UpdateAvailable("")
+				return
+			}
+
 			if err := updater.DoUpgradeAtStartup(); err != nil {
 				slog.Info("unable to perform upgrade at startup", "error", err)
 				// Make sure the restart to upgrade menu shows so we can attempt an interactive upgrade to get authorization
--- a/app/cmd/app/app_windows.go
+++ b/app/cmd/app/app_windows.go
@@ -154,6 +154,10 @@ func handleURLSchemeRequest(urlScheme string) {
 }

 func UpdateAvailable(ver string) error {
+	if app.t == nil {
+		slog.Debug("tray not yet initialized, skipping update notification")
+		return nil
+	}
 	return app.t.UpdateAvailable(ver)
 }

@@ -165,6 +169,14 @@ func osRun(shutdown func(), hasCompletedFirstRun, startHidden bool) {
 		log.Fatalf("Failed to start: %s", err)
 	}

+	// Check for pending updates now that the tray is initialized.
+	// The platform-independent check in app.go fires before osRun,
+	// when app.t is still nil, so we must re-check here.
+	if updater.IsUpdatePending() {
+		slog.Debug("update pending on startup, showing tray notification")
+		UpdateAvailable("")
+	}
+
 	signals := make(chan os.Signal, 1)
 	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)

--- a/app/server/server.go
+++ b/app/server/server.go
@@ -41,6 +41,11 @@ type InferenceCompute struct {
 	VRAM    string
 }

+type InferenceInfo struct {
+	Computes             []InferenceCompute
+	DefaultContextLength int
+}
+
 func New(s *store.Store, devMode bool) *Server {
 	p := resolvePath("ollama")
 	return &Server{store: s, bin: p, dev: devMode}
@@ -205,6 +210,11 @@ func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
 		return nil, err
 	}

+	cloudDisabled, err := s.store.CloudDisabled()
+	if err != nil {
+		return nil, err
+	}
+
 	cmd := commandContext(ctx, s.bin, "serve")
 	cmd.Stdout, cmd.Stderr = s.log, s.log

@@ -230,6 +240,11 @@ func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
 	if settings.ContextLength > 0 {
 		env["OLLAMA_CONTEXT_LENGTH"] = strconv.Itoa(settings.ContextLength)
 	}
+	if cloudDisabled {
+		env["OLLAMA_NO_CLOUD"] = "1"
+	} else {
+		env["OLLAMA_NO_CLOUD"] = "0"
+	}
 	cmd.Env = []string{}
 	for k, v := range env {
 		cmd.Env = append(cmd.Env, k+"="+v)
@@ -262,9 +277,12 @@ func openRotatingLog() (io.WriteCloser, error) {

 // Attempt to retrieve inference compute information from the server
 // log.  Set ctx to timeout to control how long to wait for the logs to appear
-func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
-	inference := []InferenceCompute{}
-	marker := regexp.MustCompile(`inference compute.*library=`)
+func GetInferenceInfo(ctx context.Context) (*InferenceInfo, error) {
+	info := &InferenceInfo{}
+	computeMarker := regexp.MustCompile(`inference compute.*library=`)
+	defaultCtxMarker := regexp.MustCompile(`vram-based default context`)
+	defaultCtxRegex := regexp.MustCompile(`default_num_ctx=(\d+)`)
+
 	q := `inference compute.*%s=["]([^"]*)["]`
 	nq := `inference compute.*%s=(\S+)\s`
 	type regex struct {
@@ -330,8 +348,8 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 		scanner := bufio.NewScanner(file)
 		for scanner.Scan() {
 			line := scanner.Text()
-			match := marker.FindStringSubmatch(line)
-			if len(match) > 0 {
+			// Check for inference compute lines
+			if computeMarker.MatchString(line) {
 				ic := InferenceCompute{
 					Library: get("library", line),
 					Variant: get("variant", line),
@@ -342,12 +360,25 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 				}

 				slog.Info("Matched", "inference compute", ic)
-				inference = append(inference, ic)
-			} else {
-				// Break out on first non matching line after we start matching
-				if len(inference) > 0 {
-					return inference, nil
+				info.Computes = append(info.Computes, ic)
+				continue
+			}
+			// Check for default context length line
+			if defaultCtxMarker.MatchString(line) {
+				match := defaultCtxRegex.FindStringSubmatch(line)
+				if len(match) > 1 {
+					numCtx, err := strconv.Atoi(match[1])
+					if err == nil {
+						info.DefaultContextLength = numCtx
+						slog.Info("Matched default context length", "default_num_ctx", numCtx)
+					}
 				}
+				return info, nil
+			}
+			// If we've found compute info but hit a non-matching line, return what we have
+			// This handles older server versions that don't log the default context line
+			if len(info.Computes) > 0 {
+				return info, nil
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
--- a/app/server/server_test.go
+++ b/app/server/server_test.go
@@ -111,7 +111,7 @@ func TestServerCmd(t *testing.T) {
 			for _, want := range tt.want {
 				found := false
 				for _, env := range cmd.Env {
-					if strings.Contains(env, want) {
+					if strings.HasPrefix(env, want) {
 						found = true
 						break
 					}
@@ -123,7 +123,7 @@ func TestServerCmd(t *testing.T) {

 			for _, dont := range tt.dont {
 				for _, env := range cmd.Env {
-					if strings.Contains(env, dont) {
+					if strings.HasPrefix(env, dont) {
 						t.Errorf("unexpected environment variable: %s", env)
 					}
 				}
@@ -136,44 +136,119 @@ func TestServerCmd(t *testing.T) {
 	}
 }

-func TestGetInferenceComputer(t *testing.T) {
+func TestServerCmdCloudSettingEnv(t *testing.T) {
 	tests := []struct {
-		name string
-		log  string
-		exp  []InferenceCompute
+		name          string
+		envValue      string
+		configContent string
+		want          string
+	}{
+		{
+			name: "default cloud enabled",
+			want: "OLLAMA_NO_CLOUD=0",
+		},
+		{
+			name:     "env disables cloud",
+			envValue: "1",
+			want:     "OLLAMA_NO_CLOUD=1",
+		},
+		{
+			name:          "config disables cloud",
+			configContent: `{"disable_ollama_cloud": true}`,
+			want:          "OLLAMA_NO_CLOUD=1",
+		},
+		{
+			name:     "invalid env disables cloud",
+			envValue: "invalid",
+			want:     "OLLAMA_NO_CLOUD=1",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpHome := t.TempDir()
+			t.Setenv("HOME", tmpHome)
+			t.Setenv("USERPROFILE", tmpHome)
+			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
+
+			if tt.configContent != "" {
+				configDir := filepath.Join(tmpHome, ".ollama")
+				if err := os.MkdirAll(configDir, 0o755); err != nil {
+					t.Fatalf("mkdir config dir: %v", err)
+				}
+				configPath := filepath.Join(configDir, "server.json")
+				if err := os.WriteFile(configPath, []byte(tt.configContent), 0o644); err != nil {
+					t.Fatalf("write config: %v", err)
+				}
+			}
+
+			st := &store.Store{DBPath: filepath.Join(t.TempDir(), "db.sqlite")}
+			defer st.Close()
+
+			s := &Server{store: st}
+			cmd, err := s.cmd(t.Context())
+			if err != nil {
+				t.Fatalf("s.cmd() error = %v", err)
+			}
+
+			found := false
+			for _, env := range cmd.Env {
+				if env == tt.want {
+					found = true
+					break
+				}
+			}
+			if !found {
+				t.Fatalf("expected environment variable %q in command env", tt.want)
+			}
+		})
+	}
+}
+
+func TestGetInferenceInfo(t *testing.T) {
+	tests := []struct {
+		name             string
+		log              string
+		expComputes      []InferenceCompute
+		expDefaultCtxLen int
 	}{
 		{
 			name: "metal",
 			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
 time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
+time=2025-06-30T09:23:07.417-07:00 level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="96.0 GiB" default_num_ctx=262144
 time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "metal",
 				Driver:  "0.0",
 				VRAM:    "96.0 GiB",
 			}},
+			expDefaultCtxLen: 262144,
 		},
 		{
 			name: "cpu",
 			log: `time=2025-07-01T17:59:51.470Z level=INFO source=gpu.go:377 msg="no compatible GPUs were discovered"
 time=2025-07-01T17:59:51.470Z level=INFO source=types.go:130 msg="inference compute" id=0 library=cpu variant="" compute="" driver=0.0 name="" total="31.3 GiB" available="30.4 GiB"
+time=2025-07-01T17:59:51.471Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="31.3 GiB" default_num_ctx=32768
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cpu",
 				Driver:  "0.0",
 				VRAM:    "31.3 GiB",
 			}},
+			expDefaultCtxLen: 32768,
 		},
 		{
 			name: "cuda1",
 			log: `time=2025-07-01T19:33:43.162Z level=DEBUG source=amd_linux.go:419 msg="amdgpu driver not detected /sys/module/amdgpu"
 releasing cuda driver library
 time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference compute" id=GPU-452cac9f-6960-839c-4fb3-0cec83699196 library=cuda variant=v12 compute=6.1 driver=12.7 name="NVIDIA GeForce GT 1030" total="3.9 GiB" available="3.9 GiB"
+time=2025-07-01T19:33:43.163Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="3.9 GiB" default_num_ctx=4096
 [GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 `,
-			exp: []InferenceCompute{{
+			expComputes: []InferenceCompute{{
 				Library: "cuda",
 				Variant: "v12",
 				Compute: "6.1",
@@ -181,6 +256,7 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 				Name:    "NVIDIA GeForce GT 1030",
 				VRAM:    "3.9 GiB",
 			}},
+			expDefaultCtxLen: 4096,
 		},
 		{
 			name: "frank",
@@ -188,9 +264,10 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 		releasing cuda driver library
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-d6de3398-9932-6902-11ec-fee8e424c8a2 library=cuda variant=v12 compute=7.5 driver=12.8 name="NVIDIA GeForce RTX 2080 Ti" total="10.6 GiB" available="10.4 GiB"
 		time=2025-07-01T19:36:13.315Z level=INFO source=types.go:130 msg="inference compute" id=GPU-9abb57639fa80c50 library=rocm variant="" compute=gfx1030 driver=6.3 name=1002:73bf total="16.0 GiB" available="1.3 GiB"
+		time=2025-07-01T19:36:13.316Z level=INFO source=routes.go:1721 msg="vram-based default context" total_vram="26.6 GiB" default_num_ctx=32768
 		[GIN] 2025/07/01 - 18:00:09 | 200 |      50.263µs | 100.126.204.152 | HEAD     "/"
 		`,
-			exp: []InferenceCompute{
+			expComputes: []InferenceCompute{
 				{
 					Library: "cuda",
 					Variant: "v12",
@@ -207,6 +284,20 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 					VRAM:    "16.0 GiB",
 				},
 			},
+			expDefaultCtxLen: 32768,
+		},
+		{
+			name: "missing_default_context",
+			log: `time=2025-06-30T09:23:07.374-07:00 level=DEBUG source=sched.go:108 msg="starting llm scheduler"
+time=2025-06-30T09:23:07.416-07:00 level=INFO source=types.go:130 msg="inference compute" id=0 library=metal variant="" compute="" driver=0.0 name="" total="96.0 GiB" available="96.0 GiB"
+time=2025-06-30T09:25:56.197-07:00 level=DEBUG source=ggml.go:155 msg="key not found" key=general.alignment default=32
+`,
+			expComputes: []InferenceCompute{{
+				Library: "metal",
+				Driver:  "0.0",
+				VRAM:    "96.0 GiB",
+			}},
+			expDefaultCtxLen: 0, // No default context line, should return 0
 		},
 	}
 	for _, tt := range tests {
@@ -219,18 +310,21 @@ time=2025-07-01T19:33:43.162Z level=INFO source=types.go:130 msg="inference comp
 			}
 			ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 			defer cancel()
-			ics, err := GetInferenceComputer(ctx)
+			info, err := GetInferenceInfo(ctx)
 			if err != nil {
-				t.Fatalf(" failed to get inference compute: %v", err)
+				t.Fatalf("failed to get inference info: %v", err)
 			}
-			if !reflect.DeepEqual(ics, tt.exp) {
-				t.Fatalf("got:\n%#v\nwant:\n%#v", ics, tt.exp)
+			if !reflect.DeepEqual(info.Computes, tt.expComputes) {
+				t.Fatalf("computes mismatch\ngot:\n%#v\nwant:\n%#v", info.Computes, tt.expComputes)
+			}
+			if info.DefaultContextLength != tt.expDefaultCtxLen {
+				t.Fatalf("default context length mismatch: got %d, want %d", info.DefaultContextLength, tt.expDefaultCtxLen)
 			}
 		})
 	}
 }

-func TestGetInferenceComputerTimeout(t *testing.T) {
+func TestGetInferenceInfoTimeout(t *testing.T) {
 	ctx, cancel := context.WithTimeout(t.Context(), 10*time.Millisecond)
 	defer cancel()
 	tmpDir := t.TempDir()
@@ -239,7 +333,7 @@ func TestGetInferenceComputerTimeout(t *testing.T) {
 	if err != nil {
 		t.Fatalf("failed to write log file %s: %s", serverLogPath, err)
 	}
-	_, err = GetInferenceComputer(ctx)
+	_, err = GetInferenceInfo(ctx)
 	if err == nil {
 		t.Fatal("expected timeout")
 	}
--- a/app/store/cloud_config.go
+++ b/app/store/cloud_config.go
@@ -0,0 +1,128 @@
+//go:build windows || darwin
+
+package store
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+const serverConfigFilename = "server.json"
+
+type serverConfig struct {
+	DisableOllamaCloud bool `json:"disable_ollama_cloud,omitempty"`
+}
+
+// CloudDisabled returns whether cloud features should be disabled.
+// The source of truth is: OLLAMA_NO_CLOUD OR ~/.ollama/server.json:disable_ollama_cloud.
+func (s *Store) CloudDisabled() (bool, error) {
+	disabled, _, err := s.CloudStatus()
+	return disabled, err
+}
+
+// CloudStatus returns whether cloud is disabled and the source of that decision.
+// Source is one of: "none", "env", "config", "both".
+func (s *Store) CloudStatus() (bool, string, error) {
+	if err := s.ensureDB(); err != nil {
+		return false, "", err
+	}
+
+	configDisabled, err := readServerConfigCloudDisabled()
+	if err != nil {
+		return false, "", err
+	}
+
+	envDisabled := envconfig.NoCloudEnv()
+	return envDisabled || configDisabled, cloudStatusSource(envDisabled, configDisabled), nil
+}
+
+// SetCloudEnabled writes the cloud setting to ~/.ollama/server.json.
+func (s *Store) SetCloudEnabled(enabled bool) error {
+	if err := s.ensureDB(); err != nil {
+		return err
+	}
+	return setCloudEnabled(enabled)
+}
+
+func setCloudEnabled(enabled bool) error {
+	configPath, err := serverConfigPath()
+	if err != nil {
+		return err
+	}
+
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return fmt.Errorf("create server config directory: %w", err)
+	}
+
+	configMap := map[string]any{}
+	if data, err := os.ReadFile(configPath); err == nil {
+		if err := json.Unmarshal(data, &configMap); err != nil {
+			// If the existing file is invalid JSON, overwrite with a fresh object.
+			configMap = map[string]any{}
+		}
+	} else if !errors.Is(err, os.ErrNotExist) {
+		return fmt.Errorf("read server config: %w", err)
+	}
+
+	configMap["disable_ollama_cloud"] = !enabled
+
+	data, err := json.MarshalIndent(configMap, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal server config: %w", err)
+	}
+	data = append(data, '\n')
+
+	if err := os.WriteFile(configPath, data, 0o644); err != nil {
+		return fmt.Errorf("write server config: %w", err)
+	}
+
+	return nil
+}
+
+func readServerConfigCloudDisabled() (bool, error) {
+	configPath, err := serverConfigPath()
+	if err != nil {
+		return false, err
+	}
+
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return false, nil
+		}
+		return false, fmt.Errorf("read server config: %w", err)
+	}
+
+	var cfg serverConfig
+	// Invalid or unexpected JSON should not block startup; treat as default.
+	if json.Unmarshal(data, &cfg) == nil {
+		return cfg.DisableOllamaCloud, nil
+	}
+	return false, nil
+}
+
+func serverConfigPath() (string, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", fmt.Errorf("resolve home directory: %w", err)
+	}
+	return filepath.Join(home, ".ollama", serverConfigFilename), nil
+}
+
+func cloudStatusSource(envDisabled bool, configDisabled bool) string {
+	switch {
+	case envDisabled && configDisabled:
+		return "both"
+	case envDisabled:
+		return "env"
+	case configDisabled:
+		return "config"
+	default:
+		return "none"
+	}
+}
--- a/app/store/cloud_config_test.go
+++ b/app/store/cloud_config_test.go
@@ -0,0 +1,130 @@
+//go:build windows || darwin
+
+package store
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestCloudDisabled(t *testing.T) {
+	tests := []struct {
+		name          string
+		envValue      string
+		configContent string
+		wantDisabled  bool
+		wantSource    string
+	}{
+		{
+			name:         "default enabled",
+			wantDisabled: false,
+			wantSource:   "none",
+		},
+		{
+			name:         "env disables cloud",
+			envValue:     "1",
+			wantDisabled: true,
+			wantSource:   "env",
+		},
+		{
+			name:          "config disables cloud",
+			configContent: `{"disable_ollama_cloud": true}`,
+			wantDisabled:  true,
+			wantSource:    "config",
+		},
+		{
+			name:          "env and config",
+			envValue:      "1",
+			configContent: `{"disable_ollama_cloud": false}`,
+			wantDisabled:  true,
+			wantSource:    "env",
+		},
+		{
+			name:          "invalid config is ignored",
+			configContent: `{bad`,
+			wantDisabled:  false,
+			wantSource:    "none",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tmpHome := t.TempDir()
+			setTestHome(t, tmpHome)
+			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
+
+			if tt.configContent != "" {
+				configDir := filepath.Join(tmpHome, ".ollama")
+				if err := os.MkdirAll(configDir, 0o755); err != nil {
+					t.Fatalf("mkdir config dir: %v", err)
+				}
+				configPath := filepath.Join(configDir, serverConfigFilename)
+				if err := os.WriteFile(configPath, []byte(tt.configContent), 0o644); err != nil {
+					t.Fatalf("write config: %v", err)
+				}
+			}
+
+			s := &Store{DBPath: filepath.Join(tmpHome, "db.sqlite")}
+			defer s.Close()
+
+			disabled, err := s.CloudDisabled()
+			if err != nil {
+				t.Fatalf("CloudDisabled() error = %v", err)
+			}
+			if disabled != tt.wantDisabled {
+				t.Fatalf("CloudDisabled() = %v, want %v", disabled, tt.wantDisabled)
+			}
+
+			statusDisabled, source, err := s.CloudStatus()
+			if err != nil {
+				t.Fatalf("CloudStatus() error = %v", err)
+			}
+			if statusDisabled != tt.wantDisabled {
+				t.Fatalf("CloudStatus() disabled = %v, want %v", statusDisabled, tt.wantDisabled)
+			}
+			if source != tt.wantSource {
+				t.Fatalf("CloudStatus() source = %v, want %v", source, tt.wantSource)
+			}
+		})
+	}
+}
+
+func TestSetCloudEnabled(t *testing.T) {
+	tmpHome := t.TempDir()
+	setTestHome(t, tmpHome)
+
+	configDir := filepath.Join(tmpHome, ".ollama")
+	if err := os.MkdirAll(configDir, 0o755); err != nil {
+		t.Fatalf("mkdir config dir: %v", err)
+	}
+	configPath := filepath.Join(configDir, serverConfigFilename)
+	if err := os.WriteFile(configPath, []byte(`{"another_key":"value","disable_ollama_cloud":true}`), 0o644); err != nil {
+		t.Fatalf("seed config: %v", err)
+	}
+
+	s := &Store{DBPath: filepath.Join(tmpHome, "db.sqlite")}
+	defer s.Close()
+
+	if err := s.SetCloudEnabled(true); err != nil {
+		t.Fatalf("SetCloudEnabled(true) error = %v", err)
+	}
+
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		t.Fatalf("read config: %v", err)
+	}
+
+	var got map[string]any
+	if err := json.Unmarshal(data, &got); err != nil {
+		t.Fatalf("unmarshal config: %v", err)
+	}
+
+	if got["disable_ollama_cloud"] != false {
+		t.Fatalf("disable_ollama_cloud = %v, want false", got["disable_ollama_cloud"])
+	}
+	if got["another_key"] != "value" {
+		t.Fatalf("another_key = %v, want value", got["another_key"])
+	}
+}
--- a/app/store/database.go
+++ b/app/store/database.go
@@ -9,12 +9,12 @@ import (
 	"strings"
 	"time"

-	sqlite3 "github.com/mattn/go-sqlite3"
+	_ "github.com/mattn/go-sqlite3"
 )

 // currentSchemaVersion defines the current database schema version.
 // Increment this when making schema changes that require migrations.
-const currentSchemaVersion = 12
+const currentSchemaVersion = 15

 // database wraps the SQLite connection.
 // SQLite handles its own locking for concurrent access:
@@ -73,7 +73,7 @@ func (db *database) init() error {
 		agent BOOLEAN NOT NULL DEFAULT 0,
 		tools BOOLEAN NOT NULL DEFAULT 0,
 		working_dir TEXT NOT NULL DEFAULT '',
-		context_length INTEGER NOT NULL DEFAULT 4096,
+		context_length INTEGER NOT NULL DEFAULT 0,
 		window_width INTEGER NOT NULL DEFAULT 0,
 		window_height INTEGER NOT NULL DEFAULT 0,
 		config_migrated BOOLEAN NOT NULL DEFAULT 0,
@@ -84,7 +84,9 @@ func (db *database) init() error {
 		sidebar_open BOOLEAN NOT NULL DEFAULT 0,
 		think_enabled BOOLEAN NOT NULL DEFAULT 0,
 		think_level TEXT NOT NULL DEFAULT '',
+		cloud_setting_migrated BOOLEAN NOT NULL DEFAULT 0,
 		remote TEXT NOT NULL DEFAULT '', -- deprecated
+		auto_update_enabled BOOLEAN NOT NULL DEFAULT 1,
 		schema_version INTEGER NOT NULL DEFAULT %d
 	);

@@ -244,6 +246,24 @@ func (db *database) migrate() error {
 				return fmt.Errorf("migrate v11 to v12: %w", err)
 			}
 			version = 12
+		case 12:
+			// add cloud_setting_migrated column to settings table
+			if err := db.migrateV12ToV13(); err != nil {
+				return fmt.Errorf("migrate v12 to v13: %w", err)
+			}
+			version = 13
+		case 13:
+			// change default context_length from 4096 to 0 (VRAM-based tiered defaults)
+			if err := db.migrateV13ToV14(); err != nil {
+				return fmt.Errorf("migrate v13 to v14: %w", err)
+			}
+			version = 14
+		case 14:
+			// add auto_update_enabled column to settings table
+			if err := db.migrateV14ToV15(); err != nil {
+				return fmt.Errorf("migrate v14 to v15: %w", err)
+			}
+			version = 15
 		default:
 			// If we have a version we don't recognize, just set it to current
 			// This might happen during development
@@ -452,6 +472,52 @@ func (db *database) migrateV11ToV12() error {
 	return nil
 }

+// migrateV12ToV13 adds cloud_setting_migrated to settings.
+func (db *database) migrateV12ToV13() error {
+	_, err := db.conn.Exec(`ALTER TABLE settings ADD COLUMN cloud_setting_migrated BOOLEAN NOT NULL DEFAULT 0`)
+	if err != nil && !duplicateColumnError(err) {
+		return fmt.Errorf("add cloud_setting_migrated column: %w", err)
+	}
+
+	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 13`)
+	if err != nil {
+		return fmt.Errorf("update schema version: %w", err)
+	}
+
+	return nil
+}
+
+// migrateV13ToV14 changes the default context_length from 4096 to 0.
+// When context_length is 0, the ollama server uses VRAM-based tiered defaults.
+func (db *database) migrateV13ToV14() error {
+	_, err := db.conn.Exec(`UPDATE settings SET context_length = 0 WHERE context_length = 4096`)
+	if err != nil {
+		return fmt.Errorf("update context_length default: %w", err)
+	}
+
+	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 14`)
+	if err != nil {
+		return fmt.Errorf("update schema version: %w", err)
+	}
+
+	return nil
+}
+
+// migrateV14ToV15 adds the auto_update_enabled column to the settings table
+func (db *database) migrateV14ToV15() error {
+	_, err := db.conn.Exec(`ALTER TABLE settings ADD COLUMN auto_update_enabled BOOLEAN NOT NULL DEFAULT 1`)
+	if err != nil && !duplicateColumnError(err) {
+		return fmt.Errorf("add auto_update_enabled column: %w", err)
+	}
+
+	_, err = db.conn.Exec(`UPDATE settings SET schema_version = 15`)
+	if err != nil {
+		return fmt.Errorf("update schema version: %w", err)
+	}
+
+	return nil
+}
+
 // cleanupOrphanedData removes orphaned records that may exist due to the foreign key bug
 func (db *database) cleanupOrphanedData() error {
 	_, err := db.conn.Exec(`
@@ -482,19 +548,11 @@ func (db *database) cleanupOrphanedData() error {
 }

 func duplicateColumnError(err error) bool {
-	if sqlite3Err, ok := err.(sqlite3.Error); ok {
-		return sqlite3Err.Code == sqlite3.ErrError &&
-			strings.Contains(sqlite3Err.Error(), "duplicate column name")
-	}
-	return false
+	return err != nil && strings.Contains(err.Error(), "duplicate column name")
 }

 func columnNotExists(err error) bool {
-	if sqlite3Err, ok := err.(sqlite3.Error); ok {
-		return sqlite3Err.Code == sqlite3.ErrError &&
-			strings.Contains(sqlite3Err.Error(), "no such column")
-	}
-	return false
+	return err != nil && strings.Contains(err.Error(), "no such column")
 }

 func (db *database) getAllChats() ([]Chat, error) {
@@ -1108,9 +1166,9 @@ func (db *database) getSettings() (Settings, error) {
 	var s Settings

 	err := db.conn.QueryRow(`
-		SELECT expose, survey, browser, models, agent, tools, working_dir, context_length, airplane_mode, turbo_enabled, websearch_enabled, selected_model, sidebar_open, think_enabled, think_level 
+		SELECT expose, survey, browser, models, agent, tools, working_dir, context_length, turbo_enabled, websearch_enabled, selected_model, sidebar_open, think_enabled, think_level, auto_update_enabled
 		FROM settings
-	`).Scan(&s.Expose, &s.Survey, &s.Browser, &s.Models, &s.Agent, &s.Tools, &s.WorkingDir, &s.ContextLength, &s.AirplaneMode, &s.TurboEnabled, &s.WebSearchEnabled, &s.SelectedModel, &s.SidebarOpen, &s.ThinkEnabled, &s.ThinkLevel)
+	`).Scan(&s.Expose, &s.Survey, &s.Browser, &s.Models, &s.Agent, &s.Tools, &s.WorkingDir, &s.ContextLength, &s.TurboEnabled, &s.WebSearchEnabled, &s.SelectedModel, &s.SidebarOpen, &s.ThinkEnabled, &s.ThinkLevel, &s.AutoUpdateEnabled)
 	if err != nil {
 		return Settings{}, fmt.Errorf("get settings: %w", err)
 	}
@@ -1120,15 +1178,41 @@ func (db *database) getSettings() (Settings, error) {

 func (db *database) setSettings(s Settings) error {
 	_, err := db.conn.Exec(`
-		UPDATE settings 
-		SET expose = ?, survey = ?, browser = ?, models = ?, agent = ?, tools = ?, working_dir = ?, context_length = ?, airplane_mode = ?, turbo_enabled = ?, websearch_enabled = ?, selected_model = ?, sidebar_open = ?, think_enabled = ?, think_level = ?
-	`, s.Expose, s.Survey, s.Browser, s.Models, s.Agent, s.Tools, s.WorkingDir, s.ContextLength, s.AirplaneMode, s.TurboEnabled, s.WebSearchEnabled, s.SelectedModel, s.SidebarOpen, s.ThinkEnabled, s.ThinkLevel)
+		UPDATE settings
+		SET expose = ?, survey = ?, browser = ?, models = ?, agent = ?, tools = ?, working_dir = ?, context_length = ?, turbo_enabled = ?, websearch_enabled = ?, selected_model = ?, sidebar_open = ?, think_enabled = ?, think_level = ?, auto_update_enabled = ?
+	`, s.Expose, s.Survey, s.Browser, s.Models, s.Agent, s.Tools, s.WorkingDir, s.ContextLength, s.TurboEnabled, s.WebSearchEnabled, s.SelectedModel, s.SidebarOpen, s.ThinkEnabled, s.ThinkLevel, s.AutoUpdateEnabled)
 	if err != nil {
 		return fmt.Errorf("set settings: %w", err)
 	}
 	return nil
 }

+func (db *database) isCloudSettingMigrated() (bool, error) {
+	var migrated bool
+	err := db.conn.QueryRow("SELECT cloud_setting_migrated FROM settings").Scan(&migrated)
+	if err != nil {
+		return false, fmt.Errorf("get cloud setting migration status: %w", err)
+	}
+	return migrated, nil
+}
+
+func (db *database) setCloudSettingMigrated(migrated bool) error {
+	_, err := db.conn.Exec("UPDATE settings SET cloud_setting_migrated = ?", migrated)
+	if err != nil {
+		return fmt.Errorf("set cloud setting migration status: %w", err)
+	}
+	return nil
+}
+
+func (db *database) getAirplaneMode() (bool, error) {
+	var airplaneMode bool
+	err := db.conn.QueryRow("SELECT airplane_mode FROM settings").Scan(&airplaneMode)
+	if err != nil {
+		return false, fmt.Errorf("get airplane_mode: %w", err)
+	}
+	return airplaneMode, nil
+}
+
 func (db *database) getWindowSize() (int, int, error) {
 	var width, height int
 	err := db.conn.QueryRow("SELECT window_width, window_height FROM settings").Scan(&width, &height)
--- a/app/store/database_test.go
+++ b/app/store/database_test.go
@@ -98,6 +98,43 @@ func TestSchemaMigrations(t *testing.T) {
 	})
 }

+func TestMigrationV13ToV14ContextLength(t *testing.T) {
+	tmpDir := t.TempDir()
+	dbPath := filepath.Join(tmpDir, "test.db")
+
+	db, err := newDatabase(dbPath)
+	if err != nil {
+		t.Fatalf("failed to create database: %v", err)
+	}
+	defer db.Close()
+
+	_, err = db.conn.Exec("UPDATE settings SET context_length = 4096, schema_version = 13")
+	if err != nil {
+		t.Fatalf("failed to seed v13 settings row: %v", err)
+	}
+
+	if err := db.migrate(); err != nil {
+		t.Fatalf("migration from v13 to v14 failed: %v", err)
+	}
+
+	var contextLength int
+	if err := db.conn.QueryRow("SELECT context_length FROM settings").Scan(&contextLength); err != nil {
+		t.Fatalf("failed to read context_length: %v", err)
+	}
+
+	if contextLength != 0 {
+		t.Fatalf("expected context_length to migrate to 0, got %d", contextLength)
+	}
+
+	version, err := db.getSchemaVersion()
+	if err != nil {
+		t.Fatalf("failed to get schema version: %v", err)
+	}
+	if version != currentSchemaVersion {
+		t.Fatalf("expected schema version %d, got %d", currentSchemaVersion, version)
+	}
+}
+
 func TestChatDeletionWithCascade(t *testing.T) {
 	t.Run("chat deletion cascades to related messages", func(t *testing.T) {
 		tmpDir := t.TempDir()
--- a/app/store/migration_test.go
+++ b/app/store/migration_test.go
@@ -127,6 +127,65 @@ func TestNoConfigToMigrate(t *testing.T) {
 	}
 }

+func TestCloudMigrationFromAirplaneMode(t *testing.T) {
+	tmpHome := t.TempDir()
+	setTestHome(t, tmpHome)
+	t.Setenv("OLLAMA_NO_CLOUD", "")
+
+	dbPath := filepath.Join(tmpHome, "db.sqlite")
+	db, err := newDatabase(dbPath)
+	if err != nil {
+		t.Fatalf("failed to create database: %v", err)
+	}
+
+	if _, err := db.conn.Exec("UPDATE settings SET airplane_mode = 1, cloud_setting_migrated = 0"); err != nil {
+		db.Close()
+		t.Fatalf("failed to seed airplane migration state: %v", err)
+	}
+	db.Close()
+
+	s := Store{DBPath: dbPath}
+	defer s.Close()
+
+	// Trigger DB initialization + one-time cloud migration.
+	if _, err := s.ID(); err != nil {
+		t.Fatalf("failed to initialize store: %v", err)
+	}
+
+	disabled, err := s.CloudDisabled()
+	if err != nil {
+		t.Fatalf("CloudDisabled() error: %v", err)
+	}
+	if !disabled {
+		t.Fatal("expected cloud to be disabled after migrating airplane_mode=true")
+	}
+
+	configPath := filepath.Join(tmpHome, ".ollama", serverConfigFilename)
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		t.Fatalf("failed to read migrated server config: %v", err)
+	}
+
+	var cfg map[string]any
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		t.Fatalf("failed to parse migrated server config: %v", err)
+	}
+	if cfg["disable_ollama_cloud"] != true {
+		t.Fatalf("disable_ollama_cloud = %v, want true", cfg["disable_ollama_cloud"])
+	}
+
+	var airplaneMode, migrated bool
+	if err := s.db.conn.QueryRow("SELECT airplane_mode, cloud_setting_migrated FROM settings").Scan(&airplaneMode, &migrated); err != nil {
+		t.Fatalf("failed to read migration flags from DB: %v", err)
+	}
+	if !airplaneMode {
+		t.Fatal("expected legacy airplane_mode value to remain unchanged")
+	}
+	if !migrated {
+		t.Fatal("expected cloud_setting_migrated to be true")
+	}
+}
+
 const (
 	v1Schema = `
 	CREATE TABLE IF NOT EXISTS settings (
--- a/app/store/store.go
+++ b/app/store/store.go
@@ -149,9 +149,6 @@ type Settings struct {
 	// ContextLength specifies the context length for the ollama server (using OLLAMA_CONTEXT_LENGTH)
 	ContextLength int

-	// AirplaneMode when true, turns off Ollama Turbo features and only uses local models
-	AirplaneMode bool
-
 	// TurboEnabled indicates if Ollama Turbo features are enabled
 	TurboEnabled bool

@@ -169,6 +166,9 @@ type Settings struct {

 	// SidebarOpen indicates if the chat sidebar is open
 	SidebarOpen bool
+
+	// AutoUpdateEnabled indicates if automatic updates should be downloaded
+	AutoUpdateEnabled bool
 }

 type Store struct {
@@ -259,6 +259,40 @@ func (s *Store) ensureDB() error {
 		}
 	}

+	// Run one-time migration from legacy airplane_mode behavior.
+	if err := s.migrateCloudSetting(database); err != nil {
+		return fmt.Errorf("migrate cloud setting: %w", err)
+	}
+
+	return nil
+}
+
+// migrateCloudSetting migrates legacy airplane_mode into server.json exactly once.
+// After this, cloud state is sourced from server.json OR OLLAMA_NO_CLOUD.
+func (s *Store) migrateCloudSetting(database *database) error {
+	migrated, err := database.isCloudSettingMigrated()
+	if err != nil {
+		return err
+	}
+	if migrated {
+		return nil
+	}
+
+	airplaneMode, err := database.getAirplaneMode()
+	if err != nil {
+		return err
+	}
+
+	if airplaneMode {
+		if err := setCloudEnabled(false); err != nil {
+			return fmt.Errorf("migrate airplane_mode to cloud disabled: %w", err)
+		}
+	}
+
+	if err := database.setCloudSettingMigrated(true); err != nil {
+		return err
+	}
+
 	return nil
 }

--- a/app/store/test_home_test.go
+++ b/app/store/test_home_test.go
@@ -0,0 +1,11 @@
+//go:build windows || darwin
+
+package store
+
+import "testing"
+
+func setTestHome(t *testing.T, home string) {
+	t.Helper()
+	t.Setenv("HOME", home)
+	t.Setenv("USERPROFILE", home)
+}
--- a/app/store/testdata/schema.sql
+++ b/app/store/testdata/schema.sql
@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS settings (
    agent BOOLEAN NOT NULL DEFAULT 0,
    tools BOOLEAN NOT NULL DEFAULT 0,
    working_dir TEXT NOT NULL DEFAULT '',
-    context_length INTEGER NOT NULL DEFAULT 4096,
+    context_length INTEGER NOT NULL DEFAULT 0,
    window_width INTEGER NOT NULL DEFAULT 0,
    window_height INTEGER NOT NULL DEFAULT 0,
    config_migrated BOOLEAN NOT NULL DEFAULT 0,
--- a/app/tools/cloud_policy.go
+++ b/app/tools/cloud_policy.go
@@ -0,0 +1,35 @@
+//go:build windows || darwin
+
+package tools
+
+import (
+	"context"
+	"errors"
+
+	"github.com/ollama/ollama/api"
+	internalcloud "github.com/ollama/ollama/internal/cloud"
+)
+
+// ensureCloudEnabledForTool checks cloud policy from the connected Ollama server.
+// If policy cannot be determined, this fails closed and blocks the operation.
+func ensureCloudEnabledForTool(ctx context.Context, operation string) error {
+	// Reuse shared message formatting; policy evaluation is still done via
+	// the connected server's /api/status endpoint below.
+	disabledMessage := internalcloud.DisabledError(operation)
+
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return errors.New(disabledMessage + " (unable to verify server cloud policy)")
+	}
+
+	status, err := client.CloudStatusExperimental(ctx)
+	if err != nil {
+		return errors.New(disabledMessage + " (unable to verify server cloud policy)")
+	}
+
+	if status.Cloud.Disabled {
+		return errors.New(disabledMessage)
+	}
+
+	return nil
+}
--- a/app/tools/cloud_policy_test.go
+++ b/app/tools/cloud_policy_test.go
@@ -0,0 +1,73 @@
+//go:build windows || darwin
+
+package tools
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestEnsureCloudEnabledForTool(t *testing.T) {
+	const op = "web search is unavailable"
+	const disabledPrefix = "ollama cloud is disabled: web search is unavailable"
+
+	t.Run("enabled allows tool execution", func(t *testing.T) {
+		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path != "/api/status" {
+				http.NotFound(w, r)
+				return
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(`{"cloud":{"disabled":false,"source":"none"}}`))
+		}))
+		t.Cleanup(ts.Close)
+		t.Setenv("OLLAMA_HOST", ts.URL)
+
+		if err := ensureCloudEnabledForTool(context.Background(), op); err != nil {
+			t.Fatalf("expected nil error, got %v", err)
+		}
+	})
+
+	t.Run("disabled blocks tool execution", func(t *testing.T) {
+		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path != "/api/status" {
+				http.NotFound(w, r)
+				return
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(`{"cloud":{"disabled":true,"source":"config"}}`))
+		}))
+		t.Cleanup(ts.Close)
+		t.Setenv("OLLAMA_HOST", ts.URL)
+
+		err := ensureCloudEnabledForTool(context.Background(), op)
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+		if got := err.Error(); got != disabledPrefix {
+			t.Fatalf("unexpected error: %q", got)
+		}
+	})
+
+	t.Run("status unavailable fails closed", func(t *testing.T) {
+		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			http.NotFound(w, r)
+		}))
+		t.Cleanup(ts.Close)
+		t.Setenv("OLLAMA_HOST", ts.URL)
+
+		err := ensureCloudEnabledForTool(context.Background(), op)
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+		if got := err.Error(); !strings.Contains(got, disabledPrefix) {
+			t.Fatalf("expected disabled prefix, got %q", got)
+		}
+		if got := err.Error(); !strings.Contains(got, "unable to verify server cloud policy") {
+			t.Fatalf("expected verification failure detail, got %q", got)
+		}
+	})
+}
--- a/app/tools/web_fetch.go
+++ b/app/tools/web_fetch.go
@@ -77,6 +77,10 @@ func (w *WebFetch) Execute(ctx context.Context, args map[string]any) (any, strin
 }

 func performWebFetch(ctx context.Context, targetURL string) (*FetchResponse, error) {
+	if err := ensureCloudEnabledForTool(ctx, "web fetch is unavailable"); err != nil {
+		return nil, err
+	}
+
 	reqBody := FetchRequest{URL: targetURL}
 	jsonBody, err := json.Marshal(reqBody)
 	if err != nil {
--- a/app/tools/web_search.go
+++ b/app/tools/web_search.go
@@ -93,6 +93,10 @@ func (w *WebSearch) Execute(ctx context.Context, args map[string]any) (any, stri
 }

 func performWebSearch(ctx context.Context, query string, maxResults int) (*SearchResponse, error) {
+	if err := ensureCloudEnabledForTool(ctx, "web search is unavailable"); err != nil {
+		return nil, err
+	}
+
 	reqBody := SearchRequest{Query: query, MaxResults: maxResults}

 	jsonBody, err := json.Marshal(reqBody)
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -289,10 +289,12 @@ export class InferenceCompute {
 }
 export class InferenceComputeResponse {
    inferenceComputes: InferenceCompute[];
+    defaultContextLength: number;

    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
        this.inferenceComputes = this.convertValues(source["inferenceComputes"], InferenceCompute);
+        this.defaultContextLength = source["defaultContextLength"];
    }

 	convertValues(a: any, classs: any, asMap: boolean = false): any {
@@ -406,13 +408,13 @@ export class Settings {
    Tools: boolean;
    WorkingDir: string;
    ContextLength: number;
-    AirplaneMode: boolean;
    TurboEnabled: boolean;
    WebSearchEnabled: boolean;
    ThinkEnabled: boolean;
    ThinkLevel: string;
    SelectedModel: string;
    SidebarOpen: boolean;
+    AutoUpdateEnabled: boolean;

    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
@@ -424,13 +426,13 @@ export class Settings {
        this.Tools = source["Tools"];
        this.WorkingDir = source["WorkingDir"];
        this.ContextLength = source["ContextLength"];
-        this.AirplaneMode = source["AirplaneMode"];
        this.TurboEnabled = source["TurboEnabled"];
        this.WebSearchEnabled = source["WebSearchEnabled"];
        this.ThinkEnabled = source["ThinkEnabled"];
        this.ThinkLevel = source["ThinkLevel"];
        this.SelectedModel = source["SelectedModel"];
        this.SidebarOpen = source["SidebarOpen"];
+        this.AutoUpdateEnabled = source["AutoUpdateEnabled"];
    }
 }
 export class SettingsResponse {
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -4,7 +4,6 @@ import {
  ChatEvent,
  DownloadEvent,
  ErrorEvent,
-  InferenceCompute,
  InferenceComputeResponse,
  ModelCapabilitiesResponse,
  Model,
@@ -27,6 +26,12 @@ declare module "@/gotypes" {
 Model.prototype.isCloud = function (): boolean {
  return this.model.endsWith("cloud");
 };
+
+export type CloudStatusSource = "env" | "config" | "both" | "none";
+export interface CloudStatusResponse {
+  disabled: boolean;
+  source: CloudStatusSource;
+}
 // Helper function to convert Uint8Array to base64
 function uint8ArrayToBase64(uint8Array: Uint8Array): string {
  const chunkSize = 0x8000; // 32KB chunks to avoid stack overflow
@@ -285,6 +290,28 @@ export async function updateSettings(settings: Settings): Promise<{
  };
 }

+export async function updateCloudSetting(
+  enabled: boolean,
+): Promise<CloudStatusResponse> {
+  const response = await fetch(`${API_BASE}/api/v1/cloud`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({ enabled }),
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(error || "Failed to update cloud setting");
+  }
+
+  const data = await response.json();
+  return {
+    disabled: Boolean(data.disabled),
+    source: (data.source as CloudStatusSource) || "none",
+  };
+}
+
 export async function renameChat(chatId: string, title: string): Promise<void> {
  const response = await fetch(`${API_BASE}/api/v1/chat/${chatId}/rename`, {
    method: "PUT",
@@ -379,7 +406,7 @@ export async function* pullModel(
  }
 }

-export async function getInferenceCompute(): Promise<InferenceCompute[]> {
+export async function getInferenceCompute(): Promise<InferenceComputeResponse> {
  const response = await fetch(`${API_BASE}/api/v1/inference-compute`);
  if (!response.ok) {
    throw new Error(
@@ -388,8 +415,7 @@ export async function getInferenceCompute(): Promise<InferenceCompute[]> {
  }

  const data = await response.json();
-  const inferenceComputeResponse = new InferenceComputeResponse(data);
-  return inferenceComputeResponse.inferenceComputes || [];
+  return new InferenceComputeResponse(data);
 }

 export async function fetchHealth(): Promise<boolean> {
@@ -414,3 +440,16 @@ export async function fetchHealth(): Promise<boolean> {
    return false;
  }
 }
+
+export async function getCloudStatus(): Promise<CloudStatusResponse | null> {
+  const response = await fetch(`${API_BASE}/api/v1/cloud`);
+  if (!response.ok) {
+    throw new Error(`Failed to fetch cloud status: ${response.status}`);
+  }
+
+  const data = await response.json();
+  return {
+    disabled: Boolean(data.disabled),
+    source: (data.source as CloudStatusSource) || "none",
+  };
+}
--- a/app/ui/app/src/components/ChatForm.tsx
+++ b/app/ui/app/src/components/ChatForm.tsx
@@ -17,11 +17,15 @@ import {
 } from "@/hooks/useChats";
 import { useNavigate } from "@tanstack/react-router";
 import { useSelectedModel } from "@/hooks/useSelectedModel";
-import { useHasVisionCapability } from "@/hooks/useModelCapabilities";
+import {
+  useHasVisionCapability,
+  useHasToolsCapability,
+} from "@/hooks/useModelCapabilities";
 import { useUser } from "@/hooks/useUser";
 import { DisplayLogin } from "@/components/DisplayLogin";
 import { ErrorEvent, Message } from "@/gotypes";
 import { useSettings } from "@/hooks/useSettings";
+import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { ThinkButton } from "./ThinkButton";
 import { ErrorMessage } from "./ErrorMessage";
 import { processFiles } from "@/utils/fileValidation";
@@ -141,19 +145,14 @@ function ChatForm({
  const {
    settings: {
      webSearchEnabled,
-      airplaneMode,
      thinkEnabled,
      thinkLevel: settingsThinkLevel,
    },
    setSettings,
  } = useSettings();
+  const { cloudDisabled } = useCloudStatus();

-  // current supported models for web search
-  const modelLower = selectedModel?.model.toLowerCase() || "";
-  const supportsWebSearch =
-    modelLower.startsWith("gpt-oss") ||
-    modelLower.startsWith("qwen3") ||
-    modelLower.startsWith("deepseek-v3");
+  const supportsWebSearch = useHasToolsCapability(selectedModel?.model);
  // Use per-chat thinking level instead of global
  const thinkLevel: ThinkingLevel =
    settingsThinkLevel === "none" || !settingsThinkLevel
@@ -180,6 +179,12 @@ function ChatForm({
    setSettings,
  ]);

+  useEffect(() => {
+    if (cloudDisabled && webSearchEnabled) {
+      setSettings({ WebSearchEnabled: false });
+    }
+  }, [cloudDisabled, webSearchEnabled, setSettings]);
+
  const removeFile = (index: number) => {
    setMessage((prev) => ({
      ...prev,
@@ -234,19 +239,19 @@ function ChatForm({

  // Determine if login banner should be shown
  const shouldShowLoginBanner =
+    !cloudDisabled &&
    !isLoadingUser &&
    !isAuthenticated &&
-    ((webSearchEnabled && supportsWebSearch) ||
-      (selectedModel?.isCloud() && !airplaneMode));
+    ((webSearchEnabled && supportsWebSearch) || selectedModel?.isCloud());

  // Determine which feature to highlight in the banner
  const getActiveFeatureForBanner = () => {
+    if (cloudDisabled) return null;
    if (!isAuthenticated) {
      if (loginPromptFeature) return loginPromptFeature;
-      if (webSearchEnabled && selectedModel?.isCloud() && !airplaneMode)
-        return "webSearch";
+      if (webSearchEnabled && selectedModel?.isCloud()) return "webSearch";
      if (webSearchEnabled) return "webSearch";
-      if (selectedModel?.isCloud() && !airplaneMode) return "turbo";
+      if (selectedModel?.isCloud()) return "turbo";
    }
    return null;
  };
@@ -269,11 +274,12 @@ function ChatForm({
  useEffect(() => {
    if (
      isAuthenticated ||
-      (!webSearchEnabled && !!selectedModel?.isCloud() && !airplaneMode)
+      cloudDisabled ||
+      (!webSearchEnabled && !!selectedModel?.isCloud())
    ) {
      setLoginPromptFeature(null);
    }
-  }, [isAuthenticated, webSearchEnabled, selectedModel, airplaneMode]);
+  }, [isAuthenticated, webSearchEnabled, selectedModel, cloudDisabled]);

  // When entering edit mode, populate the composition with existing data
  useEffect(() => {
@@ -465,6 +471,10 @@ function ChatForm({
  const handleSubmit = async () => {
    if (!message.content.trim() || isStreaming || isDownloading) return;

+    if (cloudDisabled && selectedModel?.isCloud()) {
+      return;
+    }
+
    // Check if cloud mode is enabled but user is not authenticated
    if (shouldShowLoginBanner) {
      return;
@@ -478,7 +488,8 @@ function ChatForm({
      }),
    );

-    const useWebSearch = supportsWebSearch && webSearchEnabled && !airplaneMode;
+    const useWebSearch =
+      supportsWebSearch && webSearchEnabled && !cloudDisabled;
    const useThink = modelSupportsThinkingLevels
      ? thinkLevel
      : supportsThinkToggling
@@ -899,7 +910,7 @@ function ChatForm({
                )}
                <WebSearchButton
                  ref={webSearchButtonRef}
-                  isVisible={supportsWebSearch && airplaneMode === false}
+                  isVisible={supportsWebSearch && cloudDisabled === false}
                  isActive={webSearchEnabled}
                  onToggle={() => {
                    if (!webSearchEnabled && !isAuthenticated) {
@@ -940,6 +951,7 @@ function ChatForm({
                !isDownloading &&
                (!message.content.trim() ||
                  shouldShowLoginBanner ||
+                  (cloudDisabled && selectedModel?.isCloud()) ||
                  message.fileErrors.length > 0)
              }
              className={`flex items-center justify-center h-9 w-9 rounded-full disabled:cursor-default cursor-pointer bg-black text-white dark:bg-white dark:text-black disabled:opacity-10 focus:outline-none focus:ring-2 focus:ring-blue-500`}
--- a/app/ui/app/src/components/ModelPicker.tsx
+++ b/app/ui/app/src/components/ModelPicker.tsx
@@ -8,7 +8,7 @@ import {
 } from "react";
 import { Model } from "@/gotypes";
 import { useSelectedModel } from "@/hooks/useSelectedModel";
-import { useSettings } from "@/hooks/useSettings";
+import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { useQueryClient } from "@tanstack/react-query";
 import { getModelUpstreamInfo } from "@/api";
 import { ArrowDownTrayIcon } from "@heroicons/react/24/outline";
@@ -34,7 +34,7 @@ export const ModelPicker = forwardRef<
    chatId,
    searchQuery,
  );
-  const { settings } = useSettings();
+  const { cloudDisabled } = useCloudStatus();
  const dropdownRef = useRef<HTMLDivElement>(null);
  const searchInputRef = useRef<HTMLInputElement>(null);
  const queryClient = useQueryClient();
@@ -219,7 +219,7 @@ export const ModelPicker = forwardRef<
            models={models}
            selectedModel={selectedModel}
            onModelSelect={handleModelSelect}
-            airplaneMode={settings.airplaneMode}
+            cloudDisabled={cloudDisabled}
            isOpen={isOpen}
          />
        </div>
@@ -233,13 +233,13 @@ export const ModelList = forwardRef(function ModelList(
    models,
    selectedModel,
    onModelSelect,
-    airplaneMode,
+    cloudDisabled,
    isOpen,
  }: {
    models: Model[];
    selectedModel: Model | null;
    onModelSelect: (model: Model) => void;
-    airplaneMode: boolean;
+    cloudDisabled: boolean;
    isOpen: boolean;
  },
  ref,
@@ -348,7 +348,7 @@ export const ModelList = forwardRef(function ModelList(
                  </svg>
                )}
                {model.digest === undefined &&
-                  (airplaneMode || !model.isCloud()) && (
+                  (cloudDisabled || !model.isCloud()) && (
                    <ArrowDownTrayIcon
                      className="h-4 w-4 text-neutral-500 dark:text-neutral-400"
                      strokeWidth={1.75}
--- a/app/ui/app/src/components/Settings.tsx
+++ b/app/ui/app/src/components/Settings.tsx
@@ -11,15 +11,24 @@ import {
  FolderIcon,
  BoltIcon,
  WrenchIcon,
+  CloudIcon,
  XMarkIcon,
  CogIcon,
  ArrowLeftIcon,
+  ArrowDownTrayIcon,
 } from "@heroicons/react/20/solid";
 import { Settings as SettingsType } from "@/gotypes";
 import { useNavigate } from "@tanstack/react-router";
 import { useUser } from "@/hooks/useUser";
+import { useCloudStatus } from "@/hooks/useCloudStatus";
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
-import { getSettings, updateSettings } from "@/api";
+import {
+  getSettings,
+  type CloudStatusResponse,
+  updateCloudSetting,
+  updateSettings,
+  getInferenceCompute,
+} from "@/api";

 function AnimatedDots() {
  return (
@@ -53,6 +62,11 @@ export default function Settings() {
  const [connectionError, setConnectionError] = useState<string | null>(null);
  const [pollingInterval, setPollingInterval] = useState<number | null>(null);
  const navigate = useNavigate();
+  const {
+    cloudDisabled,
+    cloudStatus,
+    isLoading: cloudStatusLoading,
+  } = useCloudStatus();

  const {
    data: settingsData,
@@ -65,6 +79,13 @@ export default function Settings() {

  const settings = settingsData?.settings || null;

+  const { data: inferenceComputeResponse } = useQuery({
+    queryKey: ["inferenceCompute"],
+    queryFn: getInferenceCompute,
+  });
+
+  const defaultContextLength = inferenceComputeResponse?.defaultContextLength;
+
  const updateSettingsMutation = useMutation({
    mutationFn: updateSettings,
    onSuccess: () => {
@@ -74,6 +95,50 @@ export default function Settings() {
    },
  });

+  const updateCloudMutation = useMutation({
+    mutationFn: (enabled: boolean) => updateCloudSetting(enabled),
+    onMutate: async (enabled: boolean) => {
+      await queryClient.cancelQueries({ queryKey: ["cloudStatus"] });
+
+      const previous = queryClient.getQueryData<CloudStatusResponse | null>([
+        "cloudStatus",
+      ]);
+      const envForcesDisabled =
+        previous?.source === "env" || previous?.source === "both";
+
+      queryClient.setQueryData<CloudStatusResponse | null>(
+        ["cloudStatus"],
+        previous
+          ? {
+              ...previous,
+              disabled: !enabled || envForcesDisabled,
+            }
+          : {
+              disabled: !enabled,
+              source: "config",
+            },
+      );
+
+      return { previous };
+    },
+    onError: (_error, _enabled, context) => {
+      if (context?.previous !== undefined) {
+        queryClient.setQueryData(["cloudStatus"], context.previous);
+      }
+    },
+    onSuccess: (status) => {
+      queryClient.setQueryData<CloudStatusResponse | null>(
+        ["cloudStatus"],
+        status,
+      );
+      queryClient.invalidateQueries({ queryKey: ["models"] });
+      queryClient.invalidateQueries({ queryKey: ["cloudStatus"] });
+
+      setShowSaved(true);
+      setTimeout(() => setShowSaved(false), 1500);
+    },
+  });
+
  useEffect(() => {
    refetchUser();
  }, []); // eslint-disable-line react-hooks/exhaustive-deps
@@ -148,13 +213,17 @@ export default function Settings() {
        Models: "",
        Agent: false,
        Tools: false,
-        ContextLength: 4096,
-        AirplaneMode: false,
+        ContextLength: 0,
      });
      updateSettingsMutation.mutate(defaultSettings);
    }
  };

+  const cloudOverriddenByEnv =
+    cloudStatus?.source === "env" || cloudStatus?.source === "both";
+  const cloudToggleDisabled =
+    cloudStatusLoading || updateCloudMutation.isPending || cloudOverriddenByEnv;
+
  const handleConnectOllamaAccount = async () => {
    setConnectionError(null);

@@ -237,7 +306,7 @@ export default function Settings() {
        <div className="space-y-4 max-w-2xl mx-auto">
          {/* Connect Ollama Account */}
          <div className="overflow-hidden rounded-xl bg-white dark:bg-neutral-800">
-            <div className="p-4 border-b border-neutral-200 dark:border-neutral-800">
+            <div className="p-4">
              <Field>
                {isLoading ? (
                  // Loading skeleton, this will only happen if the app started recently
@@ -344,6 +413,57 @@ export default function Settings() {
          {/* Local Configuration */}
          <div className="relative overflow-hidden rounded-xl bg-white dark:bg-neutral-800">
            <div className="space-y-4 p-4">
+              <Field>
+                <div className="flex items-start justify-between gap-4">
+                  <div className="flex items-start space-x-3 flex-1">
+                    <CloudIcon className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100" />
+                    <div>
+                      <Label>Cloud</Label>
+                      <Description>
+                        {cloudOverriddenByEnv
+                          ? "The OLLAMA_NO_CLOUD environment variable is currently forcing cloud off."
+                          : "Enable cloud models and web search."}
+                      </Description>
+                    </div>
+                  </div>
+                  <div className="flex-shrink-0">
+                    <Switch
+                      checked={!cloudDisabled}
+                      disabled={cloudToggleDisabled}
+                      onChange={(checked) => {
+                        if (cloudOverriddenByEnv) {
+                          return;
+                        }
+                        updateCloudMutation.mutate(checked);
+                      }}
+                    />
+                  </div>
+                </div>
+              </Field>
+
+              {/* Auto Update */}
+              <Field>
+                <div className="flex items-start justify-between gap-4">
+                  <div className="flex items-start space-x-3 flex-1">
+                    <ArrowDownTrayIcon className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100" />
+                    <div>
+                      <Label>Auto-download updates</Label>
+                      <Description>
+                        {settings.AutoUpdateEnabled
+                          ? "Automatically download updates when available."
+                          : "Updates will not be downloaded automatically."}
+                      </Description>
+                    </div>
+                  </div>
+                  <div className="flex-shrink-0">
+                    <Switch
+                      checked={settings.AutoUpdateEnabled}
+                      onChange={(checked) => handleChange("AutoUpdateEnabled", checked)}
+                    />
+                  </div>
+                </div>
+              </Field>
+
              {/* Expose Ollama */}
              <Field>
                <div className="flex items-start justify-between gap-4">
@@ -419,13 +539,11 @@ export default function Settings() {
                    </Description>
                    <div className="mt-3">
                      <Slider
-                        value={(() => {
-                          // Otherwise use the settings value
-                          return settings.ContextLength || 4096;
-                        })()}
+                        value={settings.ContextLength || defaultContextLength || 0}
                        onChange={(value) => {
                          handleChange("ContextLength", value);
                        }}
+                        disabled={!defaultContextLength}
                        options={[
                          { value: 4096, label: "4k" },
                          { value: 8192, label: "8k" },
@@ -440,35 +558,6 @@ export default function Settings() {
                  </div>
                </div>
              </Field>
-              {/* Airplane Mode */}
-              <Field>
-                <div className="flex items-start justify-between gap-4">
-                  <div className="flex items-start space-x-3 flex-1">
-                    <svg
-                      className="mt-1 h-5 w-5 flex-shrink-0 text-black dark:text-neutral-100"
-                      viewBox="0 0 21.5508 17.9033"
-                      fill="currentColor"
-                    >
-                      <path d="M21.5508 8.94727C21.542 7.91895 20.1445 7.17188 18.4658 7.17188L14.9238 7.17188C14.4316 7.17188 14.2471 7.09277 13.957 6.75879L8.05078 0.316406C7.86621 0.105469 7.6377 0 7.37402 0L6.35449 0C6.12598 0 5.99414 0.202148 6.1084 0.448242L9.14941 7.17188L4.68457 7.68164L3.09375 4.76367C2.97949 4.54395 2.78613 4.44727 2.49609 4.44727L2.11816 4.44727C1.88965 4.44727 1.74023 4.59668 1.74023 4.8252L1.74023 13.0693C1.74023 13.2979 1.88965 13.4385 2.11816 13.4385L2.49609 13.4385C2.78613 13.4385 2.97949 13.3418 3.09375 13.1309L4.68457 10.2129L9.14941 10.7227L6.1084 17.4463C5.99414 17.6836 6.12598 17.8945 6.35449 17.8945L7.37402 17.8945C7.6377 17.8945 7.86621 17.7803 8.05078 17.5781L13.957 11.127C14.2471 10.8018 14.4316 10.7227 14.9238 10.7227L18.4658 10.7227C20.1445 10.7227 21.542 9.9668 21.5508 8.94727Z" />
-                    </svg>
-                    <div>
-                      <Label>Airplane mode</Label>
-                      <Description>
-                        Airplane mode keeps data local, disabling cloud models
-                        and web search.
-                      </Description>
-                    </div>
-                  </div>
-                  <div className="flex-shrink-0">
-                    <Switch
-                      checked={settings.AirplaneMode}
-                      onChange={(checked) =>
-                        handleChange("AirplaneMode", checked)
-                      }
-                    />
-                  </div>
-                </div>
-              </Field>
            </div>
          </div>

--- a/app/ui/app/src/components/ui/slider.tsx
+++ b/app/ui/app/src/components/ui/slider.tsx
@@ -6,10 +6,11 @@ export interface SliderProps {
  value?: number;
  onChange?: (value: number) => void;
  className?: string;
+  disabled?: boolean;
 }

 const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
-  ({ label, options, value = 0, onChange }, ref) => {
+  ({ label, options, value = 0, onChange, disabled = false }, ref) => {
    const [selectedValue, setSelectedValue] = React.useState(value);
    const [isDragging, setIsDragging] = React.useState(false);
    const containerRef = React.useRef<HTMLDivElement>(null);
@@ -20,6 +21,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }, [value]);

    const handleClick = (optionValue: number) => {
+      if (disabled) return;
      setSelectedValue(optionValue);
      onChange?.(optionValue);
    };
@@ -39,6 +41,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    };

    const handleMouseDown = (e: React.MouseEvent) => {
+      if (disabled) return;
      setIsDragging(true);
      e.preventDefault();
    };
@@ -77,7 +80,7 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
    }

    return (
-      <div className="space-y-2" ref={ref}>
+      <div className={`space-y-2 ${disabled ? "opacity-50" : ""}`} ref={ref}>
        {label && <label className="text-sm font-medium">{label}</label>}
        <div className="relative">
          <div className="absolute top-[9px] left-2 right-2 h-1 bg-neutral-200 dark:bg-neutral-700 pointer-events-none rounded-full" />
@@ -88,10 +91,11 @@ const Slider = React.forwardRef<HTMLDivElement, SliderProps>(
                <button
                  onClick={() => handleClick(option.value)}
                  onMouseDown={handleMouseDown}
-                  className="relative px-3 py-6 -mx-3 -my-6 z-10 cursor-pointer"
+                  disabled={disabled}
+                  className={`relative px-3 py-6 -mx-3 -my-6 z-10 ${disabled ? "cursor-not-allowed" : "cursor-pointer"}`}
                >
                  <div className="relative w-5 h-5 flex items-center justify-center">
-                    {selectedValue === option.value && (
+                    {selectedValue === option.value && !disabled && (
                      <div className="w-4 h-4 bg-white dark:bg-white border border-neutral-400 dark:border-neutral-500 rounded-full cursor-grab active:cursor-grabbing" />
                    )}
                  </div>
--- a/app/ui/app/src/hooks/useChats.ts
+++ b/app/ui/app/src/hooks/useChats.ts
@@ -6,8 +6,8 @@ import { useSelectedModel } from "./useSelectedModel";
 import { createQueryBatcher } from "./useQueryBatcher";
 import { useRefetchModels } from "./useModels";
 import { useStreamingContext } from "@/contexts/StreamingContext";
-import { useSettings } from "./useSettings";
 import { getModelCapabilities } from "@/api";
+import { useCloudStatus } from "./useCloudStatus";

 export const useChats = () => {
  return useQuery({
@@ -116,11 +116,9 @@ export const useIsModelStale = (modelName: string) => {
 export const useShouldShowStaleDisplay = (model: Model | null) => {
  const isStale = useIsModelStale(model?.model || "");
  const { data: dismissedModels } = useDismissedStaleModels();
-  const {
-    settings: { airplaneMode },
-  } = useSettings();
+  const { cloudDisabled } = useCloudStatus();

-  if (model?.isCloud() && !airplaneMode) {
+  if (model?.isCloud() && !cloudDisabled) {
    return false;
  }

--- a/app/ui/app/src/hooks/useCloudStatus.ts
+++ b/app/ui/app/src/hooks/useCloudStatus.ts
@@ -0,0 +1,20 @@
+import { useQuery } from "@tanstack/react-query";
+import { getCloudStatus, type CloudStatusResponse } from "@/api";
+
+export function useCloudStatus() {
+  const cloudQuery = useQuery<CloudStatusResponse | null>({
+    queryKey: ["cloudStatus"],
+    queryFn: getCloudStatus,
+    retry: false,
+    staleTime: 60 * 1000,
+  });
+
+  return {
+    cloudStatus: cloudQuery.data,
+    cloudDisabled: cloudQuery.data?.disabled ?? false,
+    isKnown: cloudQuery.data !== null && cloudQuery.data !== undefined,
+    isLoading: cloudQuery.isLoading,
+    isError: cloudQuery.isError,
+    error: cloudQuery.error,
+  };
+}
--- a/app/ui/app/src/hooks/useModelCapabilities.ts
+++ b/app/ui/app/src/hooks/useModelCapabilities.ts
@@ -20,3 +20,8 @@ export function useHasVisionCapability(modelName: string | undefined) {
  const { data: capabilitiesResponse } = useModelCapabilities(modelName);
  return capabilitiesResponse?.capabilities?.includes("vision") ?? false;
 }
+
+export function useHasToolsCapability(modelName: string | undefined) {
+  const { data: capabilitiesResponse } = useModelCapabilities(modelName);
+  return capabilitiesResponse?.capabilities?.includes("tools") ?? false;
+}
--- a/app/ui/app/src/hooks/useModels.ts
+++ b/app/ui/app/src/hooks/useModels.ts
@@ -2,11 +2,11 @@ import { useQuery } from "@tanstack/react-query";
 import { Model } from "@/gotypes";
 import { getModels } from "@/api";
 import { mergeModels } from "@/utils/mergeModels";
-import { useSettings } from "./useSettings";
 import { useMemo } from "react";
+import { useCloudStatus } from "./useCloudStatus";

 export function useModels(searchQuery = "") {
-  const { settings } = useSettings();
+  const { cloudDisabled } = useCloudStatus();
  const localQuery = useQuery<Model[], Error>({
    queryKey: ["models", searchQuery],
    queryFn: () => getModels(searchQuery),
@@ -20,7 +20,7 @@ export function useModels(searchQuery = "") {
  });

  const allModels = useMemo(() => {
-    const models = mergeModels(localQuery.data || [], settings.airplaneMode);
+    const models = mergeModels(localQuery.data || [], cloudDisabled);

    if (searchQuery && searchQuery.trim()) {
      const query = searchQuery.toLowerCase().trim();
@@ -40,7 +40,7 @@ export function useModels(searchQuery = "") {
    }

    return models;
-  }, [localQuery.data, searchQuery, settings.airplaneMode]);
+  }, [localQuery.data, searchQuery, cloudDisabled]);

  return {
    ...localQuery,
--- a/app/ui/app/src/hooks/useSelectedModel.ts
+++ b/app/ui/app/src/hooks/useSelectedModel.ts
@@ -7,6 +7,7 @@ import { Model } from "@/gotypes";
 import { FEATURED_MODELS } from "@/utils/mergeModels";
 import { getTotalVRAM } from "@/utils/vram.ts";
 import { getInferenceCompute } from "@/api";
+import { useCloudStatus } from "./useCloudStatus";

 export function recommendDefaultModel(totalVRAM: number): string {
  const vram = Math.max(0, Number(totalVRAM) || 0);
@@ -22,16 +23,19 @@ export function recommendDefaultModel(totalVRAM: number): string {
 export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
  const { settings, setSettings } = useSettings();
  const { data: models = [], isLoading } = useModels(searchQuery || "");
+  const { cloudDisabled } = useCloudStatus();
  const { data: chatData, isLoading: isChatLoading } = useChat(
    currentChatId && currentChatId !== "new" ? currentChatId : "",
  );

-  const { data: inferenceComputes = [] } = useQuery({
-    queryKey: ["inference-compute"],
+  const { data: inferenceComputeResponse } = useQuery({
+    queryKey: ["inferenceCompute"],
    queryFn: getInferenceCompute,
    enabled: !settings.selectedModel, // Only fetch if no model is selected
  });

+  const inferenceComputes = inferenceComputeResponse?.inferenceComputes || [];
+
  const totalVRAM = useMemo(
    () => getTotalVRAM(inferenceComputes),
    [inferenceComputes],
@@ -46,12 +50,11 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
  const restoredChatRef = useRef<string | null>(null);

  const selectedModel: Model | null = useMemo(() => {
-    // if airplane mode is on and selected model ends with cloud,
-    // switch to recommended default model
-    if (settings.airplaneMode && settings.selectedModel?.endsWith("cloud")) {
+    // If cloud is disabled and selected model ends with cloud, switch to a local default.
+    if (cloudDisabled && settings.selectedModel?.endsWith("cloud")) {
      return (
        models.find((m) => m.model === recommendedModel) ||
-        models.find((m) => m.isCloud) ||
+        models.find((m) => !m.isCloud()) ||
        models.find((m) => m.digest === undefined || m.digest === "") ||
        models[0] ||
        null
@@ -68,7 +71,7 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
      "qwen3-coder:480b",
    ];
    const shouldMigrate =
-      !settings.airplaneMode &&
+      !cloudDisabled &&
      settings.turboEnabled &&
      baseModelsToMigrate.includes(settings.selectedModel);

@@ -96,13 +99,18 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
        })) ||
      null
    );
-  }, [models, settings.selectedModel, settings.airplaneMode, recommendedModel]);
+  }, [
+    models,
+    settings.selectedModel,
+    cloudDisabled,
+    recommendedModel,
+  ]);

  useEffect(() => {
    if (!selectedModel) return;

    if (
-      settings.airplaneMode &&
+      cloudDisabled &&
      settings.selectedModel?.endsWith("cloud") &&
      selectedModel.model !== settings.selectedModel
    ) {
@@ -110,13 +118,17 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    }

    if (
-      !settings.airplaneMode &&
+      !cloudDisabled &&
      settings.turboEnabled &&
      selectedModel.model !== settings.selectedModel
    ) {
      setSettings({ SelectedModel: selectedModel.model, TurboEnabled: false });
    }
-  }, [selectedModel, settings.airplaneMode, settings.selectedModel]);
+  }, [
+    selectedModel,
+    cloudDisabled,
+    settings.selectedModel,
+  ]);

  // Set model from chat history when chat data loads
  useEffect(() => {
@@ -169,7 +181,9 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {

    const defaultModel =
      models.find((m) => m.model === recommendedModel) ||
-      models.find((m) => m.isCloud()) ||
+      (cloudDisabled
+        ? models.find((m) => !m.isCloud())
+        : models.find((m) => m.isCloud())) ||
      models.find((m) => m.digest === undefined || m.digest === "") ||
      models[0];

@@ -181,6 +195,7 @@ export function useSelectedModel(currentChatId?: string, searchQuery?: string) {
    inferenceComputes.length,
    models.length,
    settings.selectedModel,
+    cloudDisabled,
  ]);

  // Add the selected model to the models list if it's not already there
--- a/app/ui/app/src/hooks/useSettings.ts
+++ b/app/ui/app/src/hooks/useSettings.ts
@@ -9,7 +9,6 @@ interface SettingsState {
  webSearchEnabled: boolean;
  selectedModel: string;
  sidebarOpen: boolean;
-  airplaneMode: boolean;
  thinkEnabled: boolean;
  thinkLevel: string;
 }
@@ -51,7 +50,6 @@ export function useSettings() {
      thinkLevel: settingsData?.settings?.ThinkLevel ?? "none",
      selectedModel: settingsData?.settings?.SelectedModel ?? "",
      sidebarOpen: settingsData?.settings?.SidebarOpen ?? false,
-      airplaneMode: settingsData?.settings?.AirplaneMode ?? false,
    }),
    [settingsData?.settings],
  );
--- a/app/ui/app/src/routes/__root.tsx
+++ b/app/ui/app/src/routes/__root.tsx
@@ -2,6 +2,7 @@ import type { QueryClient } from "@tanstack/react-query";
 import { createRootRouteWithContext, Outlet } from "@tanstack/react-router";
 import { getSettings } from "@/api";
 import { useQuery } from "@tanstack/react-query";
+import { useCloudStatus } from "@/hooks/useCloudStatus";

 function RootComponent() {
  // This hook ensures settings are fetched on app startup
@@ -9,6 +10,8 @@ function RootComponent() {
    queryKey: ["settings"],
    queryFn: getSettings,
  });
+  // Fetch cloud status on startup (best-effort)
+  useCloudStatus();

  return (
    <div>
--- a/app/ui/app/src/utils/mergeModels.test.ts
+++ b/app/ui/app/src/utils/mergeModels.test.ts
@@ -41,14 +41,14 @@ describe("Model merging logic", () => {
    expect(merged.length).toBe(FEATURED_MODELS.length + 2);
  });

-  it("should hide cloud models in airplane mode", () => {
+  it("should hide cloud models when cloud is disabled", () => {
    const localModels: Model[] = [
      new Model({ model: "gpt-oss:120b-cloud" }),
      new Model({ model: "llama3:latest" }),
      new Model({ model: "mistral:latest" }),
    ];

-    const merged = mergeModels(localModels, true); // airplane mode = true
+    const merged = mergeModels(localModels, true); // cloud disabled = true

    // No cloud models should be present
    const cloudModels = merged.filter((m) => m.isCloud());
--- a/app/ui/app/src/utils/mergeModels.ts
+++ b/app/ui/app/src/utils/mergeModels.ts
@@ -32,7 +32,7 @@ function alphabeticalSort(a: Model, b: Model): number {
 //Merges models, sorting cloud models first, then other models
 export function mergeModels(
  localModels: Model[],
-  airplaneMode: boolean = false,
+  hideCloudModels: boolean = false,
 ): Model[] {
  const allModels = (localModels || []).map((model) => model);

@@ -95,7 +95,7 @@ export function mergeModels(

  remainingModels.sort(alphabeticalSort);

-  return airplaneMode
+  return hideCloudModels
    ? [...featuredModels, ...remainingModels]
    : [...cloudModels, ...featuredModels, ...remainingModels];
 }
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -45,7 +45,8 @@ type InferenceCompute struct {
 }

 type InferenceComputeResponse struct {
-	InferenceComputes []InferenceCompute `json:"inferenceComputes"`
+	InferenceComputes    []InferenceCompute `json:"inferenceComputes"`
+	DefaultContextLength int                `json:"defaultContextLength"`
 }

 type ModelCapabilitiesResponse struct {
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -28,6 +28,7 @@ import (
 	"github.com/ollama/ollama/app/tools"
 	"github.com/ollama/ollama/app/types/not"
 	"github.com/ollama/ollama/app/ui/responses"
+	"github.com/ollama/ollama/app/updater"
 	"github.com/ollama/ollama/app/version"
 	ollamaAuth "github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
@@ -106,6 +107,10 @@ type Server struct {

 	// Dev is true if the server is running in development mode
 	Dev bool
+
+	// Updater for checking and downloading updates
+	Updater             *updater.Updater
+	UpdateAvailableFunc func()
 }

 func (s *Server) log() *slog.Logger {
@@ -284,12 +289,15 @@ func (s *Server) Handler() http.Handler {
 	mux.Handle("POST /api/v1/model/upstream", handle(s.modelUpstream))
 	mux.Handle("GET /api/v1/settings", handle(s.getSettings))
 	mux.Handle("POST /api/v1/settings", handle(s.settings))
+	mux.Handle("GET /api/v1/cloud", handle(s.getCloudSetting))
+	mux.Handle("POST /api/v1/cloud", handle(s.cloudSetting))

 	// Ollama proxy endpoints
 	ollamaProxy := s.ollamaProxy()
 	mux.Handle("GET /api/tags", ollamaProxy)
 	mux.Handle("POST /api/show", ollamaProxy)
 	mux.Handle("GET /api/version", ollamaProxy)
+	mux.Handle("GET /api/status", ollamaProxy)
 	mux.Handle("HEAD /api/version", ollamaProxy)
 	mux.Handle("POST /api/me", ollamaProxy)
 	mux.Handle("POST /api/signout", ollamaProxy)
@@ -826,8 +834,9 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {

 	if !hasAttachments {
 		WebSearchEnabled := req.WebSearch != nil && *req.WebSearch
+		hasToolsCapability := slices.Contains(details.Capabilities, model.CapabilityTools)

-		if WebSearchEnabled {
+		if WebSearchEnabled && hasToolsCapability {
 			if supportsBrowserTools(req.Model) {
 				browserState, ok := s.browserState(chat)
 				if !ok {
@@ -837,7 +846,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 				registry.Register(tools.NewBrowserSearch(browser))
 				registry.Register(tools.NewBrowserOpen(browser))
 				registry.Register(tools.NewBrowserFind(browser))
-			} else if supportsWebSearchTools(req.Model) {
+			} else {
 				registry.Register(&tools.WebSearch{})
 				registry.Register(&tools.WebFetch{})
 			}
@@ -1417,11 +1426,6 @@ func (s *Server) getSettings(w http.ResponseWriter, r *http.Request) error {
 		settings.Models = envconfig.Models()
 	}

-	// set default context length if not set
-	if settings.ContextLength == 0 {
-		settings.ContextLength = 4096
-	}
-
 	// Include current runtime settings
 	settings.Agent = s.Agent
 	settings.Tools = s.Tools
@@ -1448,6 +1452,24 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 		return fmt.Errorf("failed to save settings: %w", err)
 	}

+	// Handle auto-update toggle changes
+	if old.AutoUpdateEnabled != settings.AutoUpdateEnabled {
+		if !settings.AutoUpdateEnabled {
+			// Auto-update disabled: cancel any ongoing download
+			if s.Updater != nil {
+				s.Updater.CancelOngoingDownload()
+			}
+		} else {
+			// Auto-update re-enabled: show notification if update is already staged, or trigger immediate check
+			if (updater.IsUpdatePending() || updater.UpdateDownloaded) && s.UpdateAvailableFunc != nil {
+				s.UpdateAvailableFunc()
+			} else if s.Updater != nil {
+				// Trigger the background checker to run immediately
+				s.Updater.TriggerImmediateCheck()
+			}
+		}
+	}
+
 	if old.ContextLength != settings.ContextLength ||
 		old.Models != settings.Models ||
 		old.Expose != settings.Expose {
@@ -1460,17 +1482,51 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 	})
 }

+func (s *Server) cloudSetting(w http.ResponseWriter, r *http.Request) error {
+	var req struct {
+		Enabled bool `json:"enabled"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		return fmt.Errorf("invalid request body: %w", err)
+	}
+
+	if err := s.Store.SetCloudEnabled(req.Enabled); err != nil {
+		return fmt.Errorf("failed to persist cloud setting: %w", err)
+	}
+
+	s.Restart()
+
+	return s.writeCloudStatus(w)
+}
+
+func (s *Server) getCloudSetting(w http.ResponseWriter, r *http.Request) error {
+	return s.writeCloudStatus(w)
+}
+
+func (s *Server) writeCloudStatus(w http.ResponseWriter) error {
+	disabled, source, err := s.Store.CloudStatus()
+	if err != nil {
+		return fmt.Errorf("failed to load cloud status: %w", err)
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	return json.NewEncoder(w).Encode(map[string]any{
+		"disabled": disabled,
+		"source":   source,
+	})
+}
+
 func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
 	ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
 	defer cancel()
-	serverInferenceComputes, err := server.GetInferenceComputer(ctx)
+	info, err := server.GetInferenceInfo(ctx)
 	if err != nil {
-		s.log().Error("failed to get inference compute", "error", err)
-		return fmt.Errorf("failed to get inference compute: %w", err)
+		s.log().Error("failed to get inference info", "error", err)
+		return fmt.Errorf("failed to get inference info: %w", err)
 	}

-	inferenceComputes := make([]responses.InferenceCompute, len(serverInferenceComputes))
-	for i, ic := range serverInferenceComputes {
+	inferenceComputes := make([]responses.InferenceCompute, len(info.Computes))
+	for i, ic := range info.Computes {
 		inferenceComputes[i] = responses.InferenceCompute{
 			Library: ic.Library,
 			Variant: ic.Variant,
@@ -1482,7 +1538,8 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err
 	}

 	response := responses.InferenceComputeResponse{
-		InferenceComputes: inferenceComputes,
+		InferenceComputes:    inferenceComputes,
+		DefaultContextLength: info.DefaultContextLength,
 	}

 	w.Header().Set("Content-Type", "application/json")
@@ -1615,17 +1672,6 @@ func supportsBrowserTools(model string) bool {
 	return strings.HasPrefix(strings.ToLower(model), "gpt-oss")
 }

-// Web search tools are simpler, providing only basic web search and fetch capabilities (e.g., "web_search", "web_fetch") without simulating a browser. Currently only qwen3 and deepseek-v3 support web search tools.
-func supportsWebSearchTools(model string) bool {
-	model = strings.ToLower(model)
-	prefixes := []string{"qwen3", "deepseek-v3"}
-	for _, p := range prefixes {
-		if strings.HasPrefix(model, p) {
-			return true
-		}
-	}
-	return false
-}

 // buildChatRequest converts store.Chat to api.ChatRequest
 func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
--- a/app/ui/ui_test.go
+++ b/app/ui/ui_test.go
@@ -4,6 +4,7 @@ package ui

 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"io"
 	"net/http"
@@ -11,9 +12,11 @@ import (
 	"path/filepath"
 	"runtime"
 	"strings"
+	"sync/atomic"
 	"testing"

 	"github.com/ollama/ollama/app/store"
+	"github.com/ollama/ollama/app/updater"
 )

 func TestHandlePostApiSettings(t *testing.T) {
@@ -115,6 +118,107 @@ func TestHandlePostApiSettings(t *testing.T) {
 	}
 }

+func TestHandlePostApiCloudSetting(t *testing.T) {
+	tmpHome := t.TempDir()
+	t.Setenv("HOME", tmpHome)
+	t.Setenv("OLLAMA_NO_CLOUD", "")
+
+	testStore := &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
+	}
+	defer testStore.Close()
+
+	restartCount := 0
+	server := &Server{
+		Store: testStore,
+		Restart: func() {
+			restartCount++
+		},
+	}
+
+	for _, tc := range []struct {
+		name        string
+		body        string
+		wantEnabled bool
+	}{
+		{name: "disable cloud", body: `{"enabled": false}`, wantEnabled: false},
+		{name: "enable cloud", body: `{"enabled": true}`, wantEnabled: true},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			req := httptest.NewRequest("POST", "/api/v1/cloud", bytes.NewBufferString(tc.body))
+			req.Header.Set("Content-Type", "application/json")
+			rr := httptest.NewRecorder()
+
+			if err := server.cloudSetting(rr, req); err != nil {
+				t.Fatalf("cloudSetting() error = %v", err)
+			}
+			if rr.Code != http.StatusOK {
+				t.Fatalf("cloudSetting() status = %d, want %d", rr.Code, http.StatusOK)
+			}
+
+			var got map[string]any
+			if err := json.Unmarshal(rr.Body.Bytes(), &got); err != nil {
+				t.Fatalf("cloudSetting() invalid response JSON: %v", err)
+			}
+			if got["disabled"] != !tc.wantEnabled {
+				t.Fatalf("response disabled = %v, want %v", got["disabled"], !tc.wantEnabled)
+			}
+
+			disabled, err := testStore.CloudDisabled()
+			if err != nil {
+				t.Fatalf("CloudDisabled() error = %v", err)
+			}
+			if gotEnabled := !disabled; gotEnabled != tc.wantEnabled {
+				t.Fatalf("cloud enabled = %v, want %v", gotEnabled, tc.wantEnabled)
+			}
+		})
+	}
+
+	if restartCount != 2 {
+		t.Fatalf("Restart called %d times, want 2", restartCount)
+	}
+}
+
+func TestHandleGetApiCloudSetting(t *testing.T) {
+	tmpHome := t.TempDir()
+	t.Setenv("HOME", tmpHome)
+	t.Setenv("OLLAMA_NO_CLOUD", "")
+
+	testStore := &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
+	}
+	defer testStore.Close()
+
+	if err := testStore.SetCloudEnabled(false); err != nil {
+		t.Fatalf("SetCloudEnabled(false) error = %v", err)
+	}
+
+	server := &Server{
+		Store:   testStore,
+		Restart: func() {},
+	}
+
+	req := httptest.NewRequest("GET", "/api/v1/cloud", nil)
+	rr := httptest.NewRecorder()
+	if err := server.getCloudSetting(rr, req); err != nil {
+		t.Fatalf("getCloudSetting() error = %v", err)
+	}
+	if rr.Code != http.StatusOK {
+		t.Fatalf("getCloudSetting() status = %d, want %d", rr.Code, http.StatusOK)
+	}
+
+	var got map[string]any
+	if err := json.Unmarshal(rr.Body.Bytes(), &got); err != nil {
+		t.Fatalf("getCloudSetting() invalid response JSON: %v", err)
+	}
+	if got["disabled"] != true {
+		t.Fatalf("response disabled = %v, want true", got["disabled"])
+	}
+	if got["source"] != "config" {
+		t.Fatalf("response source = %v, want config", got["source"])
+	}
+}
+
 func TestAuthenticationMiddleware(t *testing.T) {
 	tests := []struct {
 		name         string
@@ -421,3 +525,290 @@ func TestUserAgentTransport(t *testing.T) {

 	t.Logf("User-Agent transport successfully set: %s", receivedUA)
 }
+
+func TestSupportsBrowserTools(t *testing.T) {
+	tests := []struct {
+		model string
+		want  bool
+	}{
+		{"gpt-oss", true},
+		{"gpt-oss-latest", true},
+		{"GPT-OSS", true},
+		{"Gpt-Oss-v2", true},
+		{"qwen3", false},
+		{"deepseek-v3", false},
+		{"llama3.3", false},
+		{"", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.model, func(t *testing.T) {
+			if got := supportsBrowserTools(tt.model); got != tt.want {
+				t.Errorf("supportsBrowserTools(%q) = %v, want %v", tt.model, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestWebSearchToolRegistration(t *testing.T) {
+	// Validates that the capability-gating logic in chat() correctly
+	// decides which tools to register based on model capabilities and
+	// the web search flag.
+	tests := []struct {
+		name             string
+		webSearchEnabled bool
+		hasToolsCap      bool
+		model            string
+		wantBrowser      bool // expects browser tools (gpt-oss)
+		wantWebSearch    bool // expects basic web search/fetch tools
+		wantNone         bool // expects no tools registered
+	}{
+		{
+			name:             "web search enabled with tools capability - browser model",
+			webSearchEnabled: true,
+			hasToolsCap:      true,
+			model:            "gpt-oss-latest",
+			wantBrowser:      true,
+		},
+		{
+			name:             "web search enabled with tools capability - non-browser model",
+			webSearchEnabled: true,
+			hasToolsCap:      true,
+			model:            "qwen3",
+			wantWebSearch:    true,
+		},
+		{
+			name:             "web search enabled without tools capability",
+			webSearchEnabled: true,
+			hasToolsCap:      false,
+			model:            "llama3.3",
+			wantNone:         true,
+		},
+		{
+			name:             "web search disabled with tools capability",
+			webSearchEnabled: false,
+			hasToolsCap:      true,
+			model:            "qwen3",
+			wantNone:         true,
+		},
+		{
+			name:             "web search disabled without tools capability",
+			webSearchEnabled: false,
+			hasToolsCap:      false,
+			model:            "llama3.3",
+			wantNone:         true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Replicate the decision logic from chat() handler
+			gotBrowser := false
+			gotWebSearch := false
+
+			if tt.webSearchEnabled && tt.hasToolsCap {
+				if supportsBrowserTools(tt.model) {
+					gotBrowser = true
+				} else {
+					gotWebSearch = true
+				}
+			}
+
+			if tt.wantBrowser && !gotBrowser {
+				t.Error("expected browser tools to be registered")
+			}
+			if tt.wantWebSearch && !gotWebSearch {
+				t.Error("expected web search tools to be registered")
+			}
+			if tt.wantNone && (gotBrowser || gotWebSearch) {
+				t.Error("expected no tools to be registered")
+			}
+			if !tt.wantBrowser && gotBrowser {
+				t.Error("unexpected browser tools registered")
+			}
+			if !tt.wantWebSearch && gotWebSearch {
+				t.Error("unexpected web search tools registered")
+			}
+		})
+	}
+}
+
+func TestSettingsToggleAutoUpdateOff_CancelsDownload(t *testing.T) {
+	testStore := &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
+	}
+	defer testStore.Close()
+
+	// Start with auto-update enabled
+	settings, err := testStore.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = true
+	if err := testStore.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	upd := &updater.Updater{Store: &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db2.sqlite"),
+	}}
+	defer upd.Store.Close()
+
+	// We can't easily mock CancelOngoingDownload, but we can verify
+	// the full settings handler flow works without error
+	server := &Server{
+		Store:   testStore,
+		Restart: func() {},
+		Updater: upd,
+	}
+
+	// Disable auto-update via settings API
+	settings.AutoUpdateEnabled = false
+	body, err := json.Marshal(settings)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	rr := httptest.NewRecorder()
+
+	if err := server.settings(rr, req); err != nil {
+		t.Fatalf("settings() error = %v", err)
+	}
+	if rr.Code != http.StatusOK {
+		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
+	}
+
+	// Verify settings were saved with auto-update disabled
+	saved, err := testStore.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if saved.AutoUpdateEnabled {
+		t.Fatal("expected AutoUpdateEnabled to be false after toggle off")
+	}
+}
+
+func TestSettingsToggleAutoUpdateOn_WithPendingUpdate_ShowsNotification(t *testing.T) {
+	testStore := &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
+	}
+	defer testStore.Close()
+
+	// Start with auto-update disabled
+	settings, err := testStore.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = false
+	if err := testStore.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	// Simulate that an update was previously downloaded
+	oldVal := updater.UpdateDownloaded
+	updater.UpdateDownloaded = true
+	defer func() { updater.UpdateDownloaded = oldVal }()
+
+	var notificationCalled atomic.Bool
+	server := &Server{
+		Store:   testStore,
+		Restart: func() {},
+		UpdateAvailableFunc: func() {
+			notificationCalled.Store(true)
+		},
+	}
+
+	// Re-enable auto-update via settings API
+	settings.AutoUpdateEnabled = true
+	body, err := json.Marshal(settings)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	rr := httptest.NewRecorder()
+
+	if err := server.settings(rr, req); err != nil {
+		t.Fatalf("settings() error = %v", err)
+	}
+	if rr.Code != http.StatusOK {
+		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
+	}
+
+	if !notificationCalled.Load() {
+		t.Fatal("expected UpdateAvailableFunc to be called when re-enabling with a downloaded update")
+	}
+}
+
+func TestSettingsToggleAutoUpdateOn_NoPendingUpdate_TriggersCheck(t *testing.T) {
+	testStore := &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db.sqlite"),
+	}
+	defer testStore.Close()
+
+	// Start with auto-update disabled
+	settings, err := testStore.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = false
+	if err := testStore.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure no pending update - clear both the downloaded flag and the stage dir
+	oldVal := updater.UpdateDownloaded
+	updater.UpdateDownloaded = false
+	defer func() { updater.UpdateDownloaded = oldVal }()
+
+	oldStageDir := updater.UpdateStageDir
+	updater.UpdateStageDir = t.TempDir() // empty dir means IsUpdatePending() returns false
+	defer func() { updater.UpdateStageDir = oldStageDir }()
+
+	upd := &updater.Updater{Store: &store.Store{
+		DBPath: filepath.Join(t.TempDir(), "db2.sqlite"),
+	}}
+	defer upd.Store.Close()
+
+	// Initialize the checkNow channel by starting (and immediately stopping) the checker
+	// so TriggerImmediateCheck doesn't panic on nil channel
+	ctx, cancel := context.WithCancel(t.Context())
+	upd.StartBackgroundUpdaterChecker(ctx, func(string) error { return nil })
+	defer cancel()
+
+	var notificationCalled atomic.Bool
+	server := &Server{
+		Store:   testStore,
+		Restart: func() {},
+		Updater: upd,
+		UpdateAvailableFunc: func() {
+			notificationCalled.Store(true)
+		},
+	}
+
+	// Re-enable auto-update via settings API
+	settings.AutoUpdateEnabled = true
+	body, err := json.Marshal(settings)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	req := httptest.NewRequest("POST", "/api/v1/settings", bytes.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+	rr := httptest.NewRecorder()
+
+	if err := server.settings(rr, req); err != nil {
+		t.Fatalf("settings() error = %v", err)
+	}
+	if rr.Code != http.StatusOK {
+		t.Fatalf("settings() status = %d, want %d", rr.Code, http.StatusOK)
+	}
+
+	// UpdateAvailableFunc should NOT be called since there's no pending update
+	if notificationCalled.Load() {
+		t.Fatal("UpdateAvailableFunc should not be called when there is no pending update")
+	}
+}
--- a/app/updater/updater.go
+++ b/app/updater/updater.go
@@ -19,6 +19,7 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"github.com/ollama/ollama/app/store"
@@ -58,7 +59,8 @@ func (u *Updater) checkForUpdate(ctx context.Context) (bool, UpdateResponse) {
 	query := requestURL.Query()
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
-	query.Add("version", version.Version)
+	currentVersion := version.Version
+	query.Add("version", currentVersion)
 	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))

 	// The original macOS app used to use the device ID
@@ -131,15 +133,27 @@ func (u *Updater) checkForUpdate(ctx context.Context) (bool, UpdateResponse) {
 }

 func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
+	// Create a cancellable context for this download
+	downloadCtx, cancel := context.WithCancel(ctx)
+	u.cancelDownloadLock.Lock()
+	u.cancelDownload = cancel
+	u.cancelDownloadLock.Unlock()
+	defer func() {
+		u.cancelDownloadLock.Lock()
+		u.cancelDownload = nil
+		u.cancelDownloadLock.Unlock()
+		cancel()
+	}()
+
 	// Do a head first to check etag info
-	req, err := http.NewRequestWithContext(ctx, http.MethodHead, updateResp.UpdateURL, nil)
+	req, err := http.NewRequestWithContext(downloadCtx, http.MethodHead, updateResp.UpdateURL, nil)
 	if err != nil {
 		return err
 	}

 	// In case of slow downloads, continue the update check in the background
-	bgctx, cancel := context.WithCancel(ctx)
-	defer cancel()
+	bgctx, bgcancel := context.WithCancel(downloadCtx)
+	defer bgcancel()
 	go func() {
 		for {
 			select {
@@ -176,6 +190,7 @@ func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateRespo
 	_, err = os.Stat(stageFilename)
 	if err == nil {
 		slog.Info("update already downloaded", "bundle", stageFilename)
+		UpdateDownloaded = true
 		return nil
 	}

@@ -244,33 +259,85 @@ func cleanupOldDownloads(stageDir string) {
 }

 type Updater struct {
-	Store *store.Store
+	Store              *store.Store
+	cancelDownload     context.CancelFunc
+	cancelDownloadLock sync.Mutex
+	checkNow           chan struct{}
+}
+
+// CancelOngoingDownload cancels any currently running download
+func (u *Updater) CancelOngoingDownload() {
+	u.cancelDownloadLock.Lock()
+	defer u.cancelDownloadLock.Unlock()
+	if u.cancelDownload != nil {
+		slog.Info("cancelling ongoing update download")
+		u.cancelDownload()
+		u.cancelDownload = nil
+	}
+}
+
+// TriggerImmediateCheck signals the background checker to check for updates immediately
+func (u *Updater) TriggerImmediateCheck() {
+	if u.checkNow != nil {
+		select {
+		case u.checkNow <- struct{}{}:
+		default:
+			// Check already pending, no need to queue another
+		}
+	}
 }

 func (u *Updater) StartBackgroundUpdaterChecker(ctx context.Context, cb func(string) error) {
+	u.checkNow = make(chan struct{}, 1)
+	u.checkNow <- struct{}{} // Trigger first check after initial delay
 	go func() {
 		// Don't blast an update message immediately after startup
 		time.Sleep(UpdateCheckInitialDelay)
 		slog.Info("beginning update checker", "interval", UpdateCheckInterval)
+		ticker := time.NewTicker(UpdateCheckInterval)
+		defer ticker.Stop()
+
 		for {
-			available, resp := u.checkForUpdate(ctx)
-			if available {
-				err := u.DownloadNewRelease(ctx, resp)
-				if err != nil {
-					slog.Error(fmt.Sprintf("failed to download new release: %s", err))
-				} else {
-					err = cb(resp.UpdateVersion)
-					if err != nil {
-						slog.Warn(fmt.Sprintf("failed to register update available with tray: %s", err))
-					}
-				}
-			}
 			select {
 			case <-ctx.Done():
 				slog.Debug("stopping background update checker")
 				return
-			default:
-				time.Sleep(UpdateCheckInterval)
+			case <-u.checkNow:
+				// Immediate check triggered
+			case <-ticker.C:
+				// Regular interval check
+			}
+
+			// Always check for updates
+			available, resp := u.checkForUpdate(ctx)
+			if !available {
+				continue
+			}
+
+			// Update is available - check if auto-update is enabled for downloading
+			settings, err := u.Store.Settings()
+			if err != nil {
+				slog.Error("failed to load settings", "error", err)
+				continue
+			}
+
+			if !settings.AutoUpdateEnabled {
+				// Auto-update disabled - don't download, just log
+				slog.Debug("update available but auto-update disabled", "version", resp.UpdateVersion)
+				continue
+			}
+
+			// Auto-update is enabled - download
+			err = u.DownloadNewRelease(ctx, resp)
+			if err != nil {
+				slog.Error("failed to download new release", "error", err)
+				continue
+			}
+
+			// Download successful - show tray notification
+			err = cb(resp.UpdateVersion)
+			if err != nil {
+				slog.Warn("failed to register update available with tray", "error", err)
 			}
 		}
 	}()
--- a/app/updater/updater_test.go
+++ b/app/updater/updater_test.go
@@ -11,6 +11,8 @@ import (
 	"log/slog"
 	"net/http"
 	"net/http/httptest"
+	"path/filepath"
+	"sync/atomic"
 	"testing"
 	"time"

@@ -33,7 +35,7 @@ func TestIsNewReleaseAvailable(t *testing.T) {
 	defer server.Close()
 	slog.Debug("server", "url", server.URL)

-	updater := &Updater{Store: &store.Store{}}
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
 	defer updater.Store.Close() // Ensure database is closed
 	UpdateCheckURLBase = server.URL + "/update.json"
 	updatePresent, resp := updater.checkForUpdate(t.Context())
@@ -84,8 +86,18 @@ func TestBackgoundChecker(t *testing.T) {
 	defer server.Close()
 	UpdateCheckURLBase = server.URL + "/update.json"

-	updater := &Updater{Store: &store.Store{}}
-	defer updater.Store.Close() // Ensure database is closed
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
+	defer updater.Store.Close()
+
+	settings, err := updater.Store.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = true
+	if err := updater.Store.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
 	updater.StartBackgroundUpdaterChecker(ctx, cb)
 	select {
 	case <-stallTimer.C:
@@ -99,3 +111,267 @@ func TestBackgoundChecker(t *testing.T) {
 		}
 	}
 }
+
+func TestAutoUpdateDisabledSkipsDownload(t *testing.T) {
+	UpdateStageDir = t.TempDir()
+	var downloadAttempted atomic.Bool
+	done := make(chan struct{})
+
+	ctx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	UpdateCheckInitialDelay = 5 * time.Millisecond
+	UpdateCheckInterval = 5 * time.Millisecond
+	VerifyDownload = func() error {
+		return nil
+	}
+
+	var server *httptest.Server
+	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/update.json" {
+			w.Write([]byte(
+				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
+					server.URL+"/9.9.9/"+Installer)))
+		} else if r.URL.Path == "/9.9.9/"+Installer {
+			downloadAttempted.Store(true)
+			buf := &bytes.Buffer{}
+			zw := zip.NewWriter(buf)
+			zw.Close()
+			io.Copy(w, buf)
+		}
+	}))
+	defer server.Close()
+	UpdateCheckURLBase = server.URL + "/update.json"
+
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
+	defer updater.Store.Close()
+
+	// Ensure auto-update is disabled
+	settings, err := updater.Store.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = false
+	if err := updater.Store.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	cb := func(ver string) error {
+		t.Fatal("callback should not be called when auto-update is disabled")
+		return nil
+	}
+
+	updater.StartBackgroundUpdaterChecker(ctx, cb)
+
+	// Wait enough time for multiple check cycles
+	time.Sleep(50 * time.Millisecond)
+	close(done)
+
+	if downloadAttempted.Load() {
+		t.Fatal("download should not be attempted when auto-update is disabled")
+	}
+}
+
+func TestAutoUpdateReenabledDownloadsUpdate(t *testing.T) {
+	UpdateStageDir = t.TempDir()
+	var downloadAttempted atomic.Bool
+	callbackCalled := make(chan struct{}, 1)
+
+	ctx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	UpdateCheckInitialDelay = 5 * time.Millisecond
+	UpdateCheckInterval = 5 * time.Millisecond
+	VerifyDownload = func() error {
+		return nil
+	}
+
+	var server *httptest.Server
+	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/update.json" {
+			w.Write([]byte(
+				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
+					server.URL+"/9.9.9/"+Installer)))
+		} else if r.URL.Path == "/9.9.9/"+Installer {
+			downloadAttempted.Store(true)
+			buf := &bytes.Buffer{}
+			zw := zip.NewWriter(buf)
+			zw.Close()
+			io.Copy(w, buf)
+		}
+	}))
+	defer server.Close()
+	UpdateCheckURLBase = server.URL + "/update.json"
+
+	upd := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
+	defer upd.Store.Close()
+
+	// Start with auto-update disabled
+	settings, err := upd.Store.Settings()
+	if err != nil {
+		t.Fatal(err)
+	}
+	settings.AutoUpdateEnabled = false
+	if err := upd.Store.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	cb := func(ver string) error {
+		select {
+		case callbackCalled <- struct{}{}:
+		default:
+		}
+		return nil
+	}
+
+	upd.StartBackgroundUpdaterChecker(ctx, cb)
+
+	// Wait for a few cycles with auto-update disabled - no download should happen
+	time.Sleep(50 * time.Millisecond)
+	if downloadAttempted.Load() {
+		t.Fatal("download should not happen while auto-update is disabled")
+	}
+
+	// Re-enable auto-update
+	settings.AutoUpdateEnabled = true
+	if err := upd.Store.SetSettings(settings); err != nil {
+		t.Fatal(err)
+	}
+
+	// Wait for the checker to pick it up and download
+	select {
+	case <-callbackCalled:
+		// Success: download happened and callback was called after re-enabling
+		if !downloadAttempted.Load() {
+			t.Fatal("expected download to be attempted after re-enabling")
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatal("expected download and callback after re-enabling auto-update")
+	}
+}
+
+func TestCancelOngoingDownload(t *testing.T) {
+	UpdateStageDir = t.TempDir()
+	downloadStarted := make(chan struct{})
+	downloadCancelled := make(chan struct{})
+
+	ctx := t.Context()
+	VerifyDownload = func() error {
+		return nil
+	}
+
+	var server *httptest.Server
+	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/update.json" {
+			w.Write([]byte(
+				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
+					server.URL+"/9.9.9/"+Installer)))
+		} else if r.URL.Path == "/9.9.9/"+Installer {
+			if r.Method == http.MethodHead {
+				w.Header().Set("Content-Length", "1000000")
+				w.WriteHeader(http.StatusOK)
+				return
+			}
+			// Signal that download has started
+			close(downloadStarted)
+			// Wait for cancellation or timeout
+			select {
+			case <-r.Context().Done():
+				close(downloadCancelled)
+				return
+			case <-time.After(5 * time.Second):
+				t.Error("download was not cancelled in time")
+			}
+		}
+	}))
+	defer server.Close()
+	UpdateCheckURLBase = server.URL + "/update.json"
+
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
+	defer updater.Store.Close()
+
+	_, resp := updater.checkForUpdate(ctx)
+
+	// Start download in goroutine
+	go func() {
+		_ = updater.DownloadNewRelease(ctx, resp)
+	}()
+
+	// Wait for download to start
+	select {
+	case <-downloadStarted:
+	case <-time.After(2 * time.Second):
+		t.Fatal("download did not start in time")
+	}
+
+	// Cancel the download
+	updater.CancelOngoingDownload()
+
+	// Verify cancellation was received
+	select {
+	case <-downloadCancelled:
+		// Success
+	case <-time.After(2 * time.Second):
+		t.Fatal("download cancellation was not received by server")
+	}
+}
+
+func TestTriggerImmediateCheck(t *testing.T) {
+	UpdateStageDir = t.TempDir()
+	checkCount := atomic.Int32{}
+	checkDone := make(chan struct{}, 10)
+
+	ctx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	// Set a very long interval so only TriggerImmediateCheck causes checks
+	UpdateCheckInitialDelay = 1 * time.Millisecond
+	UpdateCheckInterval = 1 * time.Hour
+	VerifyDownload = func() error {
+		return nil
+	}
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path == "/update.json" {
+			checkCount.Add(1)
+			select {
+			case checkDone <- struct{}{}:
+			default:
+			}
+			// Return no update available
+			w.WriteHeader(http.StatusNoContent)
+		}
+	}))
+	defer server.Close()
+	UpdateCheckURLBase = server.URL + "/update.json"
+
+	updater := &Updater{Store: &store.Store{DBPath: filepath.Join(t.TempDir(), "test.db")}}
+	defer updater.Store.Close()
+
+	cb := func(ver string) error {
+		return nil
+	}
+
+	updater.StartBackgroundUpdaterChecker(ctx, cb)
+
+	// Wait for the initial check that fires after the initial delay
+	select {
+	case <-checkDone:
+	case <-time.After(2 * time.Second):
+		t.Fatal("initial check did not happen")
+	}
+
+	initialCount := checkCount.Load()
+
+	// Trigger immediate check
+	updater.TriggerImmediateCheck()
+
+	// Wait for the triggered check
+	select {
+	case <-checkDone:
+	case <-time.After(2 * time.Second):
+		t.Fatal("triggered check did not happen")
+	}
+
+	finalCount := checkCount.Load()
+	if finalCount <= initialCount {
+		t.Fatalf("TriggerImmediateCheck did not cause additional check: initial=%d, final=%d", initialCount, finalCount)
+	}
+}
--- a/app/wintray/tray.go
+++ b/app/wintray/tray.go
@@ -369,25 +369,6 @@ func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
 	return nil
 }

-// func (t *winTray) hideMenuItem(menuItemId, parentId uint32) error {
-// 	const ERROR_SUCCESS syscall.Errno = 0
-
-// 	t.muMenus.RLock()
-// 	menu := uintptr(t.menus[parentId])
-// 	t.muMenus.RUnlock()
-// 	res, _, err := pRemoveMenu.Call(
-// 		menu,
-// 		uintptr(menuItemId),
-// 		MF_BYCOMMAND,
-// 	)
-// 	if res == 0 && err.(syscall.Errno) != ERROR_SUCCESS {
-// 		return err
-// 	}
-// 	t.delFromVisibleItems(parentId, menuItemId)
-
-// 	return nil
-// }
-
 func (t *winTray) showMenu() error {
 	p := point{}
 	boolRet, _, err := pGetCursorPos.Call(uintptr(unsafe.Pointer(&p)))
--- a/app/wintray/w32api.go
+++ b/app/wintray/w32api.go
@@ -51,7 +51,6 @@ const (
 	IMAGE_ICON          = 1          // Loads an icon
 	LR_DEFAULTSIZE      = 0x00000040 // Loads default-size icon for windows(SM_CXICON x SM_CYICON) if cx, cy are set to zero
 	LR_LOADFROMFILE     = 0x00000010 // Loads the stand-alone image from the file
-	MF_BYCOMMAND        = 0x00000000
 	MFS_DISABLED        = 0x00000003
 	MFT_SEPARATOR       = 0x00000800
 	MFT_STRING          = 0x00000000
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -57,9 +57,9 @@ import (

 func init() {
 	// Override default selectors to use Bubbletea TUI instead of raw terminal I/O.
-	config.DefaultSingleSelector = func(title string, items []config.ModelItem) (string, error) {
+	config.DefaultSingleSelector = func(title string, items []config.ModelItem, current string) (string, error) {
 		tuiItems := tui.ReorderItems(tui.ConvertItems(items))
-		result, err := tui.SelectSingle(title, tuiItems)
+		result, err := tui.SelectSingle(title, tuiItems, current)
 		if errors.Is(err, tui.ErrCancelled) {
 			return "", config.ErrCancelled
 		}
@@ -182,6 +182,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				mfConfig.System = cmd.Args
 			case "license":
 				mfConfig.License = cmd.Args
+			case "parser":
+				mfConfig.Parser = cmd.Args
+			case "renderer":
+				mfConfig.Renderer = cmd.Args
 			}
 		}

@@ -581,6 +585,17 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	opts.WordWrap = !nowrap

+	useImagegen := false
+	if cmd.Flags().Lookup("imagegen") != nil {
+		useImagegen, err = cmd.Flags().GetBool("imagegen")
+		if err != nil {
+			return err
+		}
+	}
+	if useImagegen {
+		opts.Options["use_imagegen_runner"] = true
+	}
+
 	// Fill out the rest of the options based on information about the
 	// model.
 	client, err := api.ClientFromEnvironment()
@@ -1886,12 +1901,9 @@ func runInteractiveTUI(cmd *cobra.Command) {
 	}

 	// Selector adapters for tui
-	singleSelector := func(title string, items []config.ModelItem) (string, error) {
-		tuiItems := make([]tui.SelectItem, len(items))
-		for i, item := range items {
-			tuiItems[i] = tui.SelectItem{Name: item.Name, Description: item.Description, Recommended: item.Recommended}
-		}
-		result, err := tui.SelectSingle(title, tuiItems)
+	singleSelector := func(title string, items []config.ModelItem, current string) (string, error) {
+		tuiItems := tui.ReorderItems(tui.ConvertItems(items))
+		result, err := tui.SelectSingle(title, tuiItems, current)
 		if errors.Is(err, tui.ErrCancelled) {
 			return "", config.ErrCancelled
 		}
@@ -1899,10 +1911,7 @@ func runInteractiveTUI(cmd *cobra.Command) {
 	}

 	multiSelector := func(title string, items []config.ModelItem, preChecked []string) ([]string, error) {
-		tuiItems := make([]tui.SelectItem, len(items))
-		for i, item := range items {
-			tuiItems[i] = tui.SelectItem{Name: item.Name, Description: item.Description, Recommended: item.Recommended}
-		}
+		tuiItems := tui.ReorderItems(tui.ConvertItems(items))
 		result, err := tui.SelectMultiple(title, tuiItems, preChecked)
 		if errors.Is(err, tui.ErrCancelled) {
 			return nil, config.ErrCancelled
@@ -1947,9 +1956,13 @@ func runInteractiveTUI(cmd *cobra.Command) {
 		}

 		launchIntegration := func(name string) bool {
+			if err := config.EnsureInstalled(name); err != nil {
+				fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+				return true
+			}
 			// If not configured or model no longer exists, prompt for model selection
 			configuredModel := config.IntegrationModel(name)
-			if configuredModel == "" || !config.ModelExists(cmd.Context(), configuredModel) {
+			if configuredModel == "" || !config.ModelExists(cmd.Context(), configuredModel) || config.IsCloudModelDisabled(cmd.Context(), configuredModel) {
 				err := config.ConfigureIntegrationWithSelectors(cmd.Context(), name, singleSelector, multiSelector)
 				if errors.Is(err, config.ErrCancelled) {
 					return false // Return to main menu
@@ -1971,7 +1984,7 @@ func runInteractiveTUI(cmd *cobra.Command) {
 			return
 		case tui.SelectionRunModel:
 			_ = config.SetLastSelection("run")
-			if modelName := config.LastModel(); modelName != "" {
+			if modelName := config.LastModel(); modelName != "" && !config.IsCloudModelDisabled(cmd.Context(), modelName) {
 				runModel(modelName)
 			} else {
 				modelName, err := config.SelectModelWithSelector(cmd.Context(), singleSelector)
@@ -1999,6 +2012,9 @@ func runInteractiveTUI(cmd *cobra.Command) {
 					continue
 				}
 			}
+			if config.IsCloudModelDisabled(cmd.Context(), modelName) {
+				continue // Return to main menu
+			}
 			runModel(modelName)
 		case tui.SelectionIntegration:
 			_ = config.SetLastSelection(result.Integration)
@@ -2008,6 +2024,17 @@ func runInteractiveTUI(cmd *cobra.Command) {
 		case tui.SelectionChangeIntegration:
 			_ = config.SetLastSelection(result.Integration)
 			if len(result.Models) > 0 {
+				// Filter out cloud-disabled models
+				var filtered []string
+				for _, m := range result.Models {
+					if !config.IsCloudModelDisabled(cmd.Context(), m) {
+						filtered = append(filtered, m)
+					}
+				}
+				if len(filtered) == 0 {
+					continue
+				}
+				result.Models = filtered
 				// Multi-select from modal (Editor integrations)
 				if err := config.SaveAndEditIntegration(result.Integration, result.Models); err != nil {
 					fmt.Fprintf(os.Stderr, "Error configuring %s: %v\n", result.Integration, err)
@@ -2017,8 +2044,11 @@ func runInteractiveTUI(cmd *cobra.Command) {
 					fmt.Fprintf(os.Stderr, "Error launching %s: %v\n", result.Integration, err)
 				}
 			} else if result.Model != "" {
+				if config.IsCloudModelDisabled(cmd.Context(), result.Model) {
+					continue
+				}
 				// Single-select from modal - save and launch
-				if err := config.SaveIntegrationModel(result.Integration, result.Model); err != nil {
+				if err := config.SaveIntegration(result.Integration, []string{result.Model}); err != nil {
 					fmt.Fprintf(os.Stderr, "Error saving config: %v\n", err)
 					continue
 				}
@@ -2130,6 +2160,9 @@ func NewCLI() *cobra.Command {
 	// Image generation flags (width, height, steps, seed, etc.)
 	imagegen.RegisterFlags(runCmd)

+	runCmd.Flags().Bool("imagegen", false, "Use the imagegen runner for LLM inference")
+	runCmd.Flags().MarkHidden("imagegen")
+
 	stopCmd := &cobra.Command{
 		Use:     "stop MODEL",
 		Short:   "Stop a running model",
@@ -2273,6 +2306,7 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_MAX_QUEUE"],
 				envVars["OLLAMA_MODELS"],
 				envVars["OLLAMA_NUM_PARALLEL"],
+				envVars["OLLAMA_NO_CLOUD"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
 				envVars["OLLAMA_SCHED_SPREAD"],
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -126,7 +126,7 @@ func (c *Claude) ConfigureAliases(ctx context.Context, model string, existingAli
 	fmt.Fprintf(os.Stderr, "\n%sModel Configuration%s\n\n", ansiBold, ansiReset)

 	if aliases["primary"] == "" || force {
-		primary, err := DefaultSingleSelector("Select model:", items)
+		primary, err := DefaultSingleSelector("Select model:", items, aliases["primary"])
 		if err != nil {
 			return nil, false, err
 		}
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -140,7 +140,7 @@ func TestClaudeModelEnvVars(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)

-		saveIntegration("claude", []string{"qwen3:8b"})
+		SaveIntegration("claude", []string{"qwen3:8b"})
 		saveAliases("claude", map[string]string{"primary": "qwen3:8b"})

 		got := envMap(c.modelEnvVars("qwen3:8b"))
@@ -162,7 +162,7 @@ func TestClaudeModelEnvVars(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)

-		saveIntegration("claude", []string{"llama3.2:70b"})
+		SaveIntegration("claude", []string{"llama3.2:70b"})
 		saveAliases("claude", map[string]string{
 			"primary": "llama3.2:70b",
 			"fast":    "llama3.2:8b",
@@ -187,7 +187,7 @@ func TestClaudeModelEnvVars(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)

-		saveIntegration("claude", []string{"saved-model"})
+		SaveIntegration("claude", []string{"saved-model"})
 		saveAliases("claude", map[string]string{"primary": "saved-model"})

 		got := envMap(c.modelEnvVars("different-model"))
--- a/cmd/config/cline.go
+++ b/cmd/config/cline.go
@@ -0,0 +1,123 @@
+package config
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+// Cline implements Runner and Editor for the Cline CLI integration
+type Cline struct{}
+
+func (c *Cline) String() string { return "Cline" }
+
+func (c *Cline) Run(model string, args []string) error {
+	if _, err := exec.LookPath("cline"); err != nil {
+		return fmt.Errorf("cline is not installed, install with: npm install -g cline")
+	}
+
+	models := []string{model}
+	if config, err := loadIntegration("cline"); err == nil && len(config.Models) > 0 {
+		models = config.Models
+	}
+	var err error
+	models, err = resolveEditorModels("cline", models, func() ([]string, error) {
+		return selectModels(context.Background(), "cline", "")
+	})
+	if errors.Is(err, errCancelled) {
+		return nil
+	}
+	if err != nil {
+		return err
+	}
+	if err := c.Edit(models); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	cmd := exec.Command("cline", args...)
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func (c *Cline) Paths() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+	p := filepath.Join(home, ".cline", "data", "globalState.json")
+	if _, err := os.Stat(p); err == nil {
+		return []string{p}
+	}
+	return nil
+}
+
+func (c *Cline) Edit(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	configPath := filepath.Join(home, ".cline", "data", "globalState.json")
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return err
+	}
+
+	config := make(map[string]any)
+	if data, err := os.ReadFile(configPath); err == nil {
+		if err := json.Unmarshal(data, &config); err != nil {
+			return fmt.Errorf("failed to parse config: %w, at: %s", err, configPath)
+		}
+	}
+
+	// Set Ollama as the provider for both act and plan modes
+	baseURL := envconfig.Host().String()
+	config["ollamaBaseUrl"] = baseURL
+	config["actModeApiProvider"] = "ollama"
+	config["actModeOllamaModelId"] = models[0]
+	config["actModeOllamaBaseUrl"] = baseURL
+	config["planModeApiProvider"] = "ollama"
+	config["planModeOllamaModelId"] = models[0]
+	config["planModeOllamaBaseUrl"] = baseURL
+
+	config["welcomeViewCompleted"] = true
+
+	data, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeWithBackup(configPath, data)
+}
+
+func (c *Cline) Models() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	config, err := readJSONFile(filepath.Join(home, ".cline", "data", "globalState.json"))
+	if err != nil {
+		return nil
+	}
+
+	if config["actModeApiProvider"] != "ollama" {
+		return nil
+	}
+
+	modelID, _ := config["actModeOllamaModelId"].(string)
+	if modelID == "" {
+		return nil
+	}
+	return []string{modelID}
+}
--- a/cmd/config/cline_test.go
+++ b/cmd/config/cline_test.go
@@ -0,0 +1,204 @@
+package config
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestClineIntegration(t *testing.T) {
+	c := &Cline{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := c.String(); got != "Cline" {
+			t.Errorf("String() = %q, want %q", got, "Cline")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = c
+	})
+
+	t.Run("implements Editor", func(t *testing.T) {
+		var _ Editor = c
+	})
+}
+
+func TestClineEdit(t *testing.T) {
+	c := &Cline{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".cline", "data")
+	configPath := filepath.Join(configDir, "globalState.json")
+
+	readConfig := func() map[string]any {
+		data, _ := os.ReadFile(configPath)
+		var config map[string]any
+		json.Unmarshal(data, &config)
+		return config
+	}
+
+	t.Run("creates config from scratch", func(t *testing.T) {
+		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
+
+		if err := c.Edit([]string{"kimi-k2.5:cloud"}); err != nil {
+			t.Fatal(err)
+		}
+
+		config := readConfig()
+		if config["actModeApiProvider"] != "ollama" {
+			t.Errorf("actModeApiProvider = %v, want ollama", config["actModeApiProvider"])
+		}
+		if config["actModeOllamaModelId"] != "kimi-k2.5:cloud" {
+			t.Errorf("actModeOllamaModelId = %v, want kimi-k2.5:cloud", config["actModeOllamaModelId"])
+		}
+		if config["planModeApiProvider"] != "ollama" {
+			t.Errorf("planModeApiProvider = %v, want ollama", config["planModeApiProvider"])
+		}
+		if config["planModeOllamaModelId"] != "kimi-k2.5:cloud" {
+			t.Errorf("planModeOllamaModelId = %v, want kimi-k2.5:cloud", config["planModeOllamaModelId"])
+		}
+		if config["welcomeViewCompleted"] != true {
+			t.Errorf("welcomeViewCompleted = %v, want true", config["welcomeViewCompleted"])
+		}
+	})
+
+	t.Run("preserves existing fields", func(t *testing.T) {
+		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
+		os.MkdirAll(configDir, 0o755)
+
+		existing := map[string]any{
+			"remoteRulesToggles":    map[string]any{},
+			"remoteWorkflowToggles": map[string]any{},
+			"customSetting":         "keep-me",
+		}
+		data, _ := json.Marshal(existing)
+		os.WriteFile(configPath, data, 0o644)
+
+		if err := c.Edit([]string{"glm-5:cloud"}); err != nil {
+			t.Fatal(err)
+		}
+
+		config := readConfig()
+		if config["customSetting"] != "keep-me" {
+			t.Errorf("customSetting was not preserved")
+		}
+		if config["actModeOllamaModelId"] != "glm-5:cloud" {
+			t.Errorf("actModeOllamaModelId = %v, want glm-5:cloud", config["actModeOllamaModelId"])
+		}
+	})
+
+	t.Run("updates model on re-edit", func(t *testing.T) {
+		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
+
+		if err := c.Edit([]string{"kimi-k2.5:cloud"}); err != nil {
+			t.Fatal(err)
+		}
+		if err := c.Edit([]string{"glm-5:cloud"}); err != nil {
+			t.Fatal(err)
+		}
+
+		config := readConfig()
+		if config["actModeOllamaModelId"] != "glm-5:cloud" {
+			t.Errorf("actModeOllamaModelId = %v, want glm-5:cloud", config["actModeOllamaModelId"])
+		}
+		if config["planModeOllamaModelId"] != "glm-5:cloud" {
+			t.Errorf("planModeOllamaModelId = %v, want glm-5:cloud", config["planModeOllamaModelId"])
+		}
+	})
+
+	t.Run("empty models is no-op", func(t *testing.T) {
+		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
+
+		if err := c.Edit(nil); err != nil {
+			t.Fatal(err)
+		}
+
+		if _, err := os.Stat(configPath); !os.IsNotExist(err) {
+			t.Error("expected no config file to be created for empty models")
+		}
+	})
+
+	t.Run("uses first model as primary", func(t *testing.T) {
+		os.RemoveAll(filepath.Join(tmpDir, ".cline"))
+
+		if err := c.Edit([]string{"kimi-k2.5:cloud", "glm-5:cloud"}); err != nil {
+			t.Fatal(err)
+		}
+
+		config := readConfig()
+		if config["actModeOllamaModelId"] != "kimi-k2.5:cloud" {
+			t.Errorf("actModeOllamaModelId = %v, want kimi-k2.5:cloud (first model)", config["actModeOllamaModelId"])
+		}
+	})
+}
+
+func TestClineModels(t *testing.T) {
+	c := &Cline{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".cline", "data")
+	configPath := filepath.Join(configDir, "globalState.json")
+
+	t.Run("returns nil when no config", func(t *testing.T) {
+		if models := c.Models(); models != nil {
+			t.Errorf("Models() = %v, want nil", models)
+		}
+	})
+
+	t.Run("returns nil when provider is not ollama", func(t *testing.T) {
+		os.MkdirAll(configDir, 0o755)
+		config := map[string]any{
+			"actModeApiProvider":   "anthropic",
+			"actModeOllamaModelId": "some-model",
+		}
+		data, _ := json.Marshal(config)
+		os.WriteFile(configPath, data, 0o644)
+
+		if models := c.Models(); models != nil {
+			t.Errorf("Models() = %v, want nil", models)
+		}
+	})
+
+	t.Run("returns model when ollama is configured", func(t *testing.T) {
+		os.MkdirAll(configDir, 0o755)
+		config := map[string]any{
+			"actModeApiProvider":   "ollama",
+			"actModeOllamaModelId": "kimi-k2.5:cloud",
+		}
+		data, _ := json.Marshal(config)
+		os.WriteFile(configPath, data, 0o644)
+
+		models := c.Models()
+		if len(models) != 1 || models[0] != "kimi-k2.5:cloud" {
+			t.Errorf("Models() = %v, want [kimi-k2.5:cloud]", models)
+		}
+	})
+}
+
+func TestClinePaths(t *testing.T) {
+	c := &Cline{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	t.Run("returns nil when no config exists", func(t *testing.T) {
+		if paths := c.Paths(); paths != nil {
+			t.Errorf("Paths() = %v, want nil", paths)
+		}
+	})
+
+	t.Run("returns path when config exists", func(t *testing.T) {
+		configDir := filepath.Join(tmpDir, ".cline", "data")
+		os.MkdirAll(configDir, 0o755)
+		configPath := filepath.Join(configDir, "globalState.json")
+		os.WriteFile(configPath, []byte("{}"), 0o644)
+
+		paths := c.Paths()
+		if len(paths) != 1 || paths[0] != configPath {
+			t.Errorf("Paths() = %v, want [%s]", paths, configPath)
+		}
+	})
+}
--- a/cmd/config/codex.go
+++ b/cmd/config/codex.go
@@ -6,6 +6,7 @@ import (
 	"os/exec"
 	"strings"

+	"github.com/ollama/ollama/envconfig"
 	"golang.org/x/mod/semver"
 )

@@ -32,6 +33,10 @@ func (c *Codex) Run(model string, args []string) error {
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
+	cmd.Env = append(os.Environ(),
+		"OPENAI_BASE_URL="+envconfig.Host().String()+"/v1/",
+		"OPENAI_API_KEY=ollama",
+	)
 	return cmd.Run()
 }

--- a/cmd/config/config.go
+++ b/cmd/config/config.go
@@ -15,8 +15,9 @@ import (
 )

 type integration struct {
-	Models  []string          `json:"models"`
-	Aliases map[string]string `json:"aliases,omitempty"`
+	Models    []string          `json:"models"`
+	Aliases   map[string]string `json:"aliases,omitempty"`
+	Onboarded bool              `json:"onboarded,omitempty"`
 }

 type config struct {
@@ -56,8 +57,8 @@ func migrateConfig() (bool, error) {
 		return false, err
 	}

-	var js json.RawMessage
-	if err := json.Unmarshal(oldData, &js); err != nil {
+	// Ignore legacy files with invalid JSON and continue startup.
+	if !json.Valid(oldData) {
 		return false, nil
 	}

@@ -126,7 +127,7 @@ func save(cfg *config) error {
 	return writeWithBackup(path, data)
 }

-func saveIntegration(appName string, models []string) error {
+func SaveIntegration(appName string, models []string) error {
 	if appName == "" {
 		return errors.New("app name cannot be empty")
 	}
@@ -139,34 +140,54 @@ func saveIntegration(appName string, models []string) error {
 	key := strings.ToLower(appName)
 	existing := cfg.Integrations[key]
 	var aliases map[string]string
-	if existing != nil && existing.Aliases != nil {
+	var onboarded bool
+	if existing != nil {
 		aliases = existing.Aliases
+		onboarded = existing.Onboarded
 	}

 	cfg.Integrations[key] = &integration{
-		Models:  models,
-		Aliases: aliases,
+		Models:    models,
+		Aliases:   aliases,
+		Onboarded: onboarded,
 	}

 	return save(cfg)
 }

+// integrationOnboarded marks an integration as onboarded in ollama's config.
+func integrationOnboarded(appName string) error {
+	cfg, err := load()
+	if err != nil {
+		return err
+	}
+
+	key := strings.ToLower(appName)
+	existing := cfg.Integrations[key]
+	if existing == nil {
+		existing = &integration{}
+	}
+	existing.Onboarded = true
+	cfg.Integrations[key] = existing
+	return save(cfg)
+}
+
 // IntegrationModel returns the first configured model for an integration, or empty string if not configured.
 func IntegrationModel(appName string) string {
-	ic, err := loadIntegration(appName)
-	if err != nil || len(ic.Models) == 0 {
+	integrationConfig, err := loadIntegration(appName)
+	if err != nil || len(integrationConfig.Models) == 0 {
 		return ""
 	}
-	return ic.Models[0]
+	return integrationConfig.Models[0]
 }

 // IntegrationModels returns all configured models for an integration, or nil.
 func IntegrationModels(appName string) []string {
-	ic, err := loadIntegration(appName)
-	if err != nil || len(ic.Models) == 0 {
+	integrationConfig, err := loadIntegration(appName)
+	if err != nil || len(integrationConfig.Models) == 0 {
 		return nil
 	}
-	return ic.Models
+	return integrationConfig.Models
 }

 // LastModel returns the last model that was run, or empty string if none.
@@ -234,12 +255,12 @@ func loadIntegration(appName string) (*integration, error) {
 		return nil, err
 	}

-	ic, ok := cfg.Integrations[strings.ToLower(appName)]
+	integrationConfig, ok := cfg.Integrations[strings.ToLower(appName)]
 	if !ok {
 		return nil, os.ErrNotExist
 	}

-	return ic, nil
+	return integrationConfig, nil
 }

 func saveAliases(appName string, aliases map[string]string) error {
@@ -272,8 +293,8 @@ func listIntegrations() ([]integration, error) {
 	}

 	result := make([]integration, 0, len(cfg.Integrations))
-	for _, ic := range cfg.Integrations {
-		result = append(result, *ic)
+	for _, integrationConfig := range cfg.Integrations {
+		result = append(result, *integrationConfig)
 	}

 	return result, nil
--- a/cmd/config/config_cloud_test.go
+++ b/cmd/config/config_cloud_test.go
@@ -85,7 +85,7 @@ func TestSaveAliases_PreservesModels(t *testing.T) {
 	setTestHome(t, tmpDir)

 	// First save integration with models
-	if err := saveIntegration("claude", []string{"model1", "model2"}); err != nil {
+	if err := SaveIntegration("claude", []string{"model1", "model2"}); err != nil {
 		t.Fatalf("failed to save integration: %v", err)
 	}

@@ -604,7 +604,7 @@ func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 		}

 		// Save integration with same model (this is the pattern we use)
-		if err := saveIntegration("claude", []string{"model-a"}); err != nil {
+		if err := SaveIntegration("claude", []string{"model-a"}); err != nil {
 			t.Fatal(err)
 		}

@@ -619,7 +619,7 @@ func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 		setTestHome(t, tmpDir)

 		// Simulate out-of-sync state (like manual edit or bug)
-		if err := saveIntegration("claude", []string{"old-model"}); err != nil {
+		if err := SaveIntegration("claude", []string{"old-model"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := saveAliases("claude", map[string]string{"primary": "new-model"}); err != nil {
@@ -634,7 +634,7 @@ func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 		}

 		// The fix: when updating aliases, also update models
-		if err := saveIntegration("claude", []string{loaded.Aliases["primary"]}); err != nil {
+		if err := SaveIntegration("claude", []string{loaded.Aliases["primary"]}); err != nil {
 			t.Fatal(err)
 		}

@@ -650,7 +650,7 @@ func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 		setTestHome(t, tmpDir)

 		// Initial state
-		if err := saveIntegration("claude", []string{"initial-model"}); err != nil {
+		if err := SaveIntegration("claude", []string{"initial-model"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := saveAliases("claude", map[string]string{"primary": "initial-model"}); err != nil {
@@ -662,7 +662,7 @@ func TestModelsAndAliasesMustStayInSync(t *testing.T) {
 		if err := saveAliases("claude", newAliases); err != nil {
 			t.Fatal(err)
 		}
-		if err := saveIntegration("claude", []string{newAliases["primary"]}); err != nil {
+		if err := SaveIntegration("claude", []string{newAliases["primary"]}); err != nil {
 			t.Fatal(err)
 		}

--- a/cmd/config/config_test.go
+++ b/cmd/config/config_test.go
@@ -27,7 +27,7 @@ func TestIntegrationConfig(t *testing.T) {

 	t.Run("save and load round-trip", func(t *testing.T) {
 		models := []string{"llama3.2", "mistral", "qwen2.5"}
-		if err := saveIntegration("claude", models); err != nil {
+		if err := SaveIntegration("claude", models); err != nil {
 			t.Fatal(err)
 		}

@@ -48,7 +48,7 @@ func TestIntegrationConfig(t *testing.T) {

 	t.Run("save and load aliases", func(t *testing.T) {
 		models := []string{"llama3.2"}
-		if err := saveIntegration("claude", models); err != nil {
+		if err := SaveIntegration("claude", models); err != nil {
 			t.Fatal(err)
 		}
 		aliases := map[string]string{
@@ -74,14 +74,14 @@ func TestIntegrationConfig(t *testing.T) {
 	})

 	t.Run("saveIntegration preserves aliases", func(t *testing.T) {
-		if err := saveIntegration("claude", []string{"model-a"}); err != nil {
+		if err := SaveIntegration("claude", []string{"model-a"}); err != nil {
 			t.Fatal(err)
 		}
 		if err := saveAliases("claude", map[string]string{"primary": "model-a", "fast": "model-small"}); err != nil {
 			t.Fatal(err)
 		}

-		if err := saveIntegration("claude", []string{"model-b"}); err != nil {
+		if err := SaveIntegration("claude", []string{"model-b"}); err != nil {
 			t.Fatal(err)
 		}
 		config, err := loadIntegration("claude")
@@ -94,7 +94,7 @@ func TestIntegrationConfig(t *testing.T) {
 	})

 	t.Run("defaultModel returns first model", func(t *testing.T) {
-		saveIntegration("codex", []string{"model-a", "model-b"})
+		SaveIntegration("codex", []string{"model-a", "model-b"})

 		config, _ := loadIntegration("codex")
 		defaultModel := ""
@@ -118,7 +118,7 @@ func TestIntegrationConfig(t *testing.T) {
 	})

 	t.Run("app name is case-insensitive", func(t *testing.T) {
-		saveIntegration("Claude", []string{"model-x"})
+		SaveIntegration("Claude", []string{"model-x"})

 		config, err := loadIntegration("claude")
 		if err != nil {
@@ -134,8 +134,8 @@ func TestIntegrationConfig(t *testing.T) {
 	})

 	t.Run("multiple integrations in single file", func(t *testing.T) {
-		saveIntegration("app1", []string{"model-1"})
-		saveIntegration("app2", []string{"model-2"})
+		SaveIntegration("app1", []string{"model-1"})
+		SaveIntegration("app2", []string{"model-2"})

 		config1, _ := loadIntegration("app1")
 		config2, _ := loadIntegration("app2")
@@ -172,8 +172,8 @@ func TestListIntegrations(t *testing.T) {
 	})

 	t.Run("returns all saved integrations", func(t *testing.T) {
-		saveIntegration("claude", []string{"model-1"})
-		saveIntegration("droid", []string{"model-2"})
+		SaveIntegration("claude", []string{"model-1"})
+		SaveIntegration("droid", []string{"model-2"})

 		configs, err := listIntegrations()
 		if err != nil {
@@ -261,7 +261,7 @@ func TestSaveIntegration_NilModels(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)

-	if err := saveIntegration("test", nil); err != nil {
+	if err := SaveIntegration("test", nil); err != nil {
 		t.Fatalf("saveIntegration with nil models failed: %v", err)
 	}

@@ -281,7 +281,7 @@ func TestSaveIntegration_EmptyAppName(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)

-	err := saveIntegration("", []string{"model"})
+	err := SaveIntegration("", []string{"model"})
 	if err == nil {
 		t.Error("expected error for empty app name, got nil")
 	}
@@ -511,7 +511,7 @@ func TestMigrateConfig(t *testing.T) {
 		os.WriteFile(filepath.Join(legacyDir, "config.json"), []byte(`{"integrations":{"claude":{"models":["llama3.2"]}}}`), 0o644)

 		// load triggers migration, then save should write to new path
-		if err := saveIntegration("codex", []string{"qwen2.5"}); err != nil {
+		if err := SaveIntegration("codex", []string{"qwen2.5"}); err != nil {
 			t.Fatal(err)
 		}

--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -3,6 +3,7 @@ package config
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"os/exec"
@@ -51,6 +52,16 @@ func (d *Droid) Run(model string, args []string) error {
 	if config, err := loadIntegration("droid"); err == nil && len(config.Models) > 0 {
 		models = config.Models
 	}
+	var err error
+	models, err = resolveEditorModels("droid", models, func() ([]string, error) {
+		return selectModels(context.Background(), "droid", "")
+	})
+	if errors.Is(err, errCancelled) {
+		return nil
+	}
+	if err != nil {
+		return err
+	}
 	if err := d.Edit(models); err != nil {
 		return fmt.Errorf("setup failed: %w", err)
 	}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"maps"
+	"net/http"
 	"os"
 	"os/exec"
 	"runtime"
@@ -13,6 +13,7 @@ import (
 	"time"

 	"github.com/ollama/ollama/api"
+	internalcloud "github.com/ollama/ollama/internal/cloud"
 	"github.com/ollama/ollama/progress"
 	"github.com/spf13/cobra"
 )
@@ -52,6 +53,7 @@ type AliasConfigurer interface {
 var integrations = map[string]Runner{
 	"claude":   &Claude{},
 	"clawdbot": &Openclaw{},
+	"cline":    &Cline{},
 	"codex":    &Codex{},
 	"moltbot":  &Openclaw{},
 	"droid":    &Droid{},
@@ -100,16 +102,17 @@ var recommendedVRAM = map[string]string{
 var integrationAliases = map[string]bool{
 	"clawdbot": true,
 	"moltbot":  true,
-	"pi":       true,
 }

 // integrationInstallHints maps integration names to install URLs.
 var integrationInstallHints = map[string]string{
 	"claude":   "https://code.claude.com/docs/en/quickstart",
+	"cline":    "https://cline.bot/cli",
 	"openclaw": "https://docs.openclaw.ai",
 	"codex":    "https://developers.openai.com/codex/cli/",
 	"droid":    "https://docs.factory.ai/cli/getting-started/quickstart",
 	"opencode": "https://opencode.ai",
+	"pi":       "https://github.com/badlogic/pi-mono",
 }

 // hyperlink wraps text in an OSC 8 terminal hyperlink so it is cmd+clickable.
@@ -127,13 +130,21 @@ type IntegrationInfo struct {
 // integrationDescriptions maps integration names to short descriptions.
 var integrationDescriptions = map[string]string{
 	"claude":   "Anthropic's coding tool with subagents",
+	"cline":    "Autonomous coding agent with parallel execution",
 	"codex":    "OpenAI's open-source coding agent",
 	"openclaw": "Personal AI with 100+ skills",
 	"droid":    "Factory's coding agent across terminal and IDEs",
 	"opencode": "Anomaly's open-source coding agent",
+	"pi":       "Minimal AI agent toolkit with plugin support",
 }

-// ListIntegrationInfos returns all non-alias registered integrations, sorted by name.
+// integrationOrder defines a custom display order for integrations.
+// Integrations listed here are placed at the end in the given order;
+// all others appear first, sorted alphabetically.
+var integrationOrder = []string{"opencode", "droid", "pi", "cline"}
+
+// ListIntegrationInfos returns all non-alias registered integrations, sorted by name
+// with integrationOrder entries placed at the end.
 func ListIntegrationInfos() []IntegrationInfo {
 	var result []IntegrationInfo
 	for name, r := range integrations {
@@ -146,7 +157,26 @@ func ListIntegrationInfos() []IntegrationInfo {
 			Description: integrationDescriptions[name],
 		})
 	}
+
+	orderRank := make(map[string]int, len(integrationOrder))
+	for i, name := range integrationOrder {
+		orderRank[name] = i + 1 // 1-indexed so 0 means "not in the list"
+	}
+
 	slices.SortFunc(result, func(a, b IntegrationInfo) int {
+		aRank, bRank := orderRank[a.Name], orderRank[b.Name]
+		// Both have custom order: sort by their rank
+		if aRank > 0 && bRank > 0 {
+			return aRank - bRank
+		}
+		// Only one has custom order: it goes last
+		if aRank > 0 {
+			return 1
+		}
+		if bRank > 0 {
+			return -1
+		}
+		// Neither has custom order: alphabetical
 		return strings.Compare(a.Name, b.Name)
 	})
 	return result
@@ -184,14 +214,45 @@ func IsIntegrationInstalled(name string) bool {
 	case "droid":
 		_, err := exec.LookPath("droid")
 		return err == nil
+	case "cline":
+		_, err := exec.LookPath("cline")
+		return err == nil
 	case "opencode":
 		_, err := exec.LookPath("opencode")
 		return err == nil
+	case "pi":
+		_, err := exec.LookPath("pi")
+		return err == nil
 	default:
 		return true // Assume installed for unknown integrations
 	}
 }

+// AutoInstallable returns true if the integration can be automatically
+// installed when not found (e.g. via npm).
+func AutoInstallable(name string) bool {
+	switch strings.ToLower(name) {
+	case "openclaw", "clawdbot", "moltbot":
+		return true
+	default:
+		return false
+	}
+}
+
+// EnsureInstalled checks if an auto-installable integration is present and
+// offers to install it if missing. Returns nil for non-auto-installable
+// integrations or when the binary is already on PATH.
+func EnsureInstalled(name string) error {
+	if !AutoInstallable(name) {
+		return nil
+	}
+	if IsIntegrationInstalled(name) {
+		return nil
+	}
+	_, err := ensureOpenclawInstalled()
+	return err
+}
+
 // IsEditorIntegration returns true if the named integration uses multi-model
 // selection (implements the Editor interface).
 func IsEditorIntegration(name string) bool {
@@ -212,7 +273,8 @@ type ModelItem struct {
 }

 // SingleSelector is a function type for single item selection.
-type SingleSelector func(title string, items []ModelItem) (string, error)
+// current is the name of the previously selected item to highlight; empty means no pre-selection.
+type SingleSelector func(title string, items []ModelItem, current string) (string, error)

 // MultiSelector is a function type for multi item selection.
 type MultiSelector func(title string, items []ModelItem, preChecked []string) ([]string, error)
@@ -234,6 +296,11 @@ func SelectModelWithSelector(ctx context.Context, selector SingleSelector) (stri
 		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
 	}

+	cloudDisabled, _ := cloudStatusDisabled(ctx, client)
+	if cloudDisabled {
+		existing = filterCloudModels(existing)
+	}
+
 	lastModel := LastModel()
 	var preChecked []string
 	if lastModel != "" {
@@ -242,11 +309,15 @@ func SelectModelWithSelector(ctx context.Context, selector SingleSelector) (stri

 	items, _, existingModels, cloudModels := buildModelList(existing, preChecked, lastModel)

+	if cloudDisabled {
+		items = filterCloudItems(items)
+	}
+
 	if len(items) == 0 {
 		return "", fmt.Errorf("no models available, run 'ollama pull <model>' first")
 	}

-	selected, err := selector("Select model to run:", items)
+	selected, err := selector("Select model to run:", items, "")
 	if err != nil {
 		return "", err
 	}
@@ -356,13 +427,11 @@ func selectIntegration() (string, error) {
 		return "", fmt.Errorf("no integrations available")
 	}

-	names := slices.Sorted(maps.Keys(integrations))
 	var items []ModelItem
-	for _, name := range names {
+	for name, r := range integrations {
 		if integrationAliases[name] {
 			continue
 		}
-		r := integrations[name]
 		description := r.String()
 		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
 			description = fmt.Sprintf("%s (%s)", r.String(), conn.Models[0])
@@ -370,7 +439,25 @@ func selectIntegration() (string, error) {
 		items = append(items, ModelItem{Name: name, Description: description})
 	}

-	return DefaultSingleSelector("Select integration:", items)
+	orderRank := make(map[string]int, len(integrationOrder))
+	for i, name := range integrationOrder {
+		orderRank[name] = i + 1
+	}
+	slices.SortFunc(items, func(a, b ModelItem) int {
+		aRank, bRank := orderRank[a.Name], orderRank[b.Name]
+		if aRank > 0 && bRank > 0 {
+			return aRank - bRank
+		}
+		if aRank > 0 {
+			return 1
+		}
+		if bRank > 0 {
+			return -1
+		}
+		return strings.Compare(a.Name, b.Name)
+	})
+
+	return DefaultSingleSelector("Select integration:", items, "")
 }

 // selectModelsWithSelectors lets the user select models for an integration using provided selectors.
@@ -395,6 +482,11 @@ func selectModelsWithSelectors(ctx context.Context, name, current string, single
 		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
 	}

+	cloudDisabled, _ := cloudStatusDisabled(ctx, client)
+	if cloudDisabled {
+		existing = filterCloudModels(existing)
+	}
+
 	var preChecked []string
 	if saved, err := loadIntegration(name); err == nil {
 		preChecked = saved.Models
@@ -404,6 +496,10 @@ func selectModelsWithSelectors(ctx context.Context, name, current string, single

 	items, preChecked, existingModels, cloudModels := buildModelList(existing, preChecked, current)

+	if cloudDisabled {
+		items = filterCloudItems(items)
+	}
+
 	if len(items) == 0 {
 		return nil, fmt.Errorf("no models available")
 	}
@@ -419,7 +515,7 @@ func selectModelsWithSelectors(ctx context.Context, name, current string, single
 		if _, ok := r.(AliasConfigurer); ok {
 			prompt = fmt.Sprintf("Select Primary model for %s:", r)
 		}
-		model, err := single(prompt, items)
+		model, err := single(prompt, items, current)
 		if err != nil {
 			return nil, err
 		}
@@ -510,8 +606,17 @@ func listModels(ctx context.Context) ([]ModelItem, map[string]bool, map[string]b
 		})
 	}

+	cloudDisabled, _ := cloudStatusDisabled(ctx, client)
+	if cloudDisabled {
+		existing = filterCloudModels(existing)
+	}
+
 	items, _, existingModels, cloudModels := buildModelList(existing, nil, "")

+	if cloudDisabled {
+		items = filterCloudItems(items)
+	}
+
 	if len(items) == 0 {
 		return nil, nil, nil, nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
 	}
@@ -540,6 +645,9 @@ func ensureAuth(ctx context.Context, client *api.Client, cloudModels map[string]
 	if len(selectedCloudModels) == 0 {
 		return nil
 	}
+	if disabled, known := cloudStatusDisabled(ctx, client); known && disabled {
+		return errors.New(internalcloud.DisabledError("remote inference is unavailable"))
+	}

 	user, err := client.Whoami(ctx)
 	if err == nil && user != nil && user.Name != "" {
@@ -672,25 +780,6 @@ func LaunchIntegrationWithModel(name, modelName string) error {
 	return runIntegration(name, modelName, nil)
 }

-// SaveIntegrationModel saves the model for an integration.
-func SaveIntegrationModel(name, modelName string) error {
-	// Load existing models and prepend the new one
-	var models []string
-	if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
-		models = existing.Models
-		// Remove the model if it already exists
-		for i, m := range models {
-			if m == modelName {
-				models = append(models[:i], models[i+1:]...)
-				break
-			}
-		}
-	}
-	// Prepend the new model
-	models = append([]string{modelName}, models...)
-	return saveIntegration(name, models)
-}
-
 // SaveAndEditIntegration saves the models for an Editor integration and runs its Edit method
 // to write the integration's config files.
 func SaveAndEditIntegration(name string, models []string) error {
@@ -698,7 +787,7 @@ func SaveAndEditIntegration(name string, models []string) error {
 	if !ok {
 		return fmt.Errorf("unknown integration: %s", name)
 	}
-	if err := saveIntegration(name, models); err != nil {
+	if err := SaveIntegration(name, models); err != nil {
 		return fmt.Errorf("failed to save: %w", err)
 	}
 	if editor, isEditor := r.(Editor); isEditor {
@@ -709,6 +798,29 @@ func SaveAndEditIntegration(name string, models []string) error {
 	return nil
 }

+// resolveEditorModels filters out cloud-disabled models before editor launch.
+// If no models remain, it invokes picker to collect a valid replacement list.
+func resolveEditorModels(name string, models []string, picker func() ([]string, error)) ([]string, error) {
+	filtered := filterDisabledCloudModels(models)
+	if len(filtered) != len(models) {
+		if err := SaveIntegration(name, filtered); err != nil {
+			return nil, fmt.Errorf("failed to save: %w", err)
+		}
+	}
+	if len(filtered) > 0 {
+		return filtered, nil
+	}
+
+	selected, err := picker()
+	if err != nil {
+		return nil, err
+	}
+	if err := SaveIntegration(name, selected); err != nil {
+		return nil, fmt.Errorf("failed to save: %w", err)
+	}
+	return selected, nil
+}
+
 // ConfigureIntegrationWithSelectors allows the user to select/change the model for an integration using custom selectors.
 func ConfigureIntegrationWithSelectors(ctx context.Context, name string, single SingleSelector, multi MultiSelector) error {
 	r, ok := integrations[name]
@@ -743,7 +855,7 @@ func ConfigureIntegrationWithSelectors(ctx context.Context, name string, single
 		}
 	}

-	if err := saveIntegration(name, models); err != nil {
+	if err := SaveIntegration(name, models); err != nil {
 		return fmt.Errorf("failed to save: %w", err)
 	}

@@ -776,10 +888,12 @@ Without arguments, this is equivalent to running 'ollama' directly.

 Supported integrations:
  claude    Claude Code
+  cline     Cline
  codex     Codex
  droid     Droid
  opencode  OpenCode
  openclaw  OpenClaw (aliases: clawdbot, moltbot)
+  pi        Pi

 Examples:
  ollama launch
@@ -837,6 +951,14 @@ Examples:
 				return fmt.Errorf("unknown integration: %s", name)
 			}

+			if err := EnsureInstalled(name); err != nil {
+				return err
+			}
+
+			if modelFlag != "" && IsCloudModelDisabled(cmd.Context(), modelFlag) {
+				modelFlag = ""
+			}
+
 			// Handle AliasConfigurer integrations (claude, codex)
 			if ac, ok := r.(AliasConfigurer); ok {
 				client, err := api.ClientFromEnvironment()
@@ -864,7 +986,7 @@ Examples:
 						model = cfg.Models[0]
 						// AliasConfigurer integrations use single model; sanitize if multiple
 						if len(cfg.Models) > 1 {
-							_ = saveIntegration(name, []string{model})
+							_ = SaveIntegration(name, []string{model})
 						}
 					}
 				}
@@ -876,7 +998,9 @@ Examples:

 				// Validate saved model still exists
 				if model != "" && modelFlag == "" {
-					if _, err := client.Show(cmd.Context(), &api.ShowRequest{Model: model}); err != nil {
+					if disabled, _ := cloudStatusDisabled(cmd.Context(), client); disabled && isCloudModelName(model) {
+						model = ""
+					} else if _, err := client.Show(cmd.Context(), &api.ShowRequest{Model: model}); err != nil {
 						fmt.Fprintf(os.Stderr, "%sConfigured model %q not found%s\n\n", ansiGray, model, ansiReset)
 						if err := ShowOrPull(cmd.Context(), client, model); err != nil {
 							model = ""
@@ -884,18 +1008,16 @@ Examples:
 					}
 				}

-				// If no valid model or --config flag, show picker
-				if model == "" || configFlag {
-					aliases, _, err := ac.ConfigureAliases(cmd.Context(), model, existingAliases, configFlag)
-					if errors.Is(err, errCancelled) {
-						return nil
-					}
-					if err != nil {
-						return err
-					}
-					model = aliases["primary"]
-					existingAliases = aliases
+				// Show picker so user can change model (skip when --model flag provided)
+				aliases, _, err := ac.ConfigureAliases(cmd.Context(), model, existingAliases, modelFlag == "")
+				if errors.Is(err, errCancelled) {
+					return nil
 				}
+				if err != nil {
+					return err
+				}
+				model = aliases["primary"]
+				existingAliases = aliases

 				// Ensure cloud models are authenticated
 				if isCloudModel(cmd.Context(), client, model) {
@@ -908,7 +1030,7 @@ Examples:
 				if err := syncAliases(cmd.Context(), client, ac, name, model, existingAliases); err != nil {
 					fmt.Fprintf(os.Stderr, "%sWarning: Could not sync aliases: %v%s\n", ansiGray, err, ansiReset)
 				}
-				if err := saveIntegration(name, []string{model}); err != nil {
+				if err := SaveIntegration(name, []string{model}); err != nil {
 					return fmt.Errorf("failed to save: %w", err)
 				}

@@ -946,11 +1068,24 @@ Examples:
 						}
 					}
 				}
-			} else if saved, err := loadIntegration(name); err == nil && len(saved.Models) > 0 && !configFlag {
-				return runIntegration(name, saved.Models[0], passArgs)
+				models = filterDisabledCloudModels(models)
+				if len(models) == 0 {
+					var err error
+					models, err = selectModels(cmd.Context(), name, "")
+					if errors.Is(err, errCancelled) {
+						return nil
+					}
+					if err != nil {
+						return err
+					}
+				}
 			} else {
+				current := ""
+				if saved, err := loadIntegration(name); err == nil && len(saved.Models) > 0 {
+					current = saved.Models[0]
+				}
 				var err error
-				models, err = selectModels(cmd.Context(), name, "")
+				models, err = selectModels(cmd.Context(), name, current)
 				if errors.Is(err, errCancelled) {
 					return nil
 				}
@@ -974,7 +1109,7 @@ Examples:
 				}
 			}

-			if err := saveIntegration(name, models); err != nil {
+			if err := SaveIntegration(name, models); err != nil {
 				return fmt.Errorf("failed to save: %w", err)
 			}

@@ -1048,7 +1183,7 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
 			continue
 		}
 		items = append(items, rec)
-		if strings.HasSuffix(rec.Name, ":cloud") {
+		if isCloudModelName(rec.Name) {
 			cloudModels[rec.Name] = true
 		}
 	}
@@ -1153,7 +1288,55 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
 	return items, preChecked, existingModels, cloudModels
 }

-// isCloudModel checks if a model is a cloud model using the Show API.
+// IsCloudModelDisabled reports whether the given model name looks like a cloud
+// model and cloud features are currently disabled on the server.
+func IsCloudModelDisabled(ctx context.Context, name string) bool {
+	if !isCloudModelName(name) {
+		return false
+	}
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return false
+	}
+	disabled, _ := cloudStatusDisabled(ctx, client)
+	return disabled
+}
+
+func isCloudModelName(name string) bool {
+	return strings.HasSuffix(name, ":cloud") || strings.HasSuffix(name, "-cloud")
+}
+
+func filterCloudModels(existing []modelInfo) []modelInfo {
+	filtered := existing[:0]
+	for _, m := range existing {
+		if !m.Remote {
+			filtered = append(filtered, m)
+		}
+	}
+	return filtered
+}
+
+// filterDisabledCloudModels removes cloud models from a list when cloud is disabled.
+func filterDisabledCloudModels(models []string) []string {
+	var filtered []string
+	for _, m := range models {
+		if !IsCloudModelDisabled(context.Background(), m) {
+			filtered = append(filtered, m)
+		}
+	}
+	return filtered
+}
+
+func filterCloudItems(items []ModelItem) []ModelItem {
+	filtered := items[:0]
+	for _, item := range items {
+		if !isCloudModelName(item.Name) {
+			filtered = append(filtered, item)
+		}
+	}
+	return filtered
+}
+
 func isCloudModel(ctx context.Context, client *api.Client, name string) bool {
 	if client == nil {
 		return false
@@ -1183,6 +1366,11 @@ func GetModelItems(ctx context.Context) ([]ModelItem, map[string]bool) {
 		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
 	}

+	cloudDisabled, _ := cloudStatusDisabled(ctx, client)
+	if cloudDisabled {
+		existing = filterCloudModels(existing)
+	}
+
 	lastModel := LastModel()
 	var preChecked []string
 	if lastModel != "" {
@@ -1191,9 +1379,25 @@ func GetModelItems(ctx context.Context) ([]ModelItem, map[string]bool) {

 	items, _, existingModels, _ := buildModelList(existing, preChecked, lastModel)

+	if cloudDisabled {
+		items = filterCloudItems(items)
+	}
+
 	return items, existingModels
 }

+func cloudStatusDisabled(ctx context.Context, client *api.Client) (disabled bool, known bool) {
+	status, err := client.CloudStatusExperimental(ctx)
+	if err != nil {
+		var statusErr api.StatusError
+		if errors.As(err, &statusErr) && statusErr.StatusCode == http.StatusNotFound {
+			return false, false
+		}
+		return false, false
+	}
+	return status.Cloud.Disabled, true
+}
+
 func pullModel(ctx context.Context, client *api.Client, model string) error {
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -16,6 +16,28 @@ import (
 	"github.com/spf13/cobra"
 )

+type stubEditorRunner struct {
+	edited   [][]string
+	ranModel string
+}
+
+func (s *stubEditorRunner) Run(model string, args []string) error {
+	s.ranModel = model
+	return nil
+}
+
+func (s *stubEditorRunner) String() string { return "StubEditor" }
+
+func (s *stubEditorRunner) Paths() []string { return nil }
+
+func (s *stubEditorRunner) Edit(models []string) error {
+	cloned := append([]string(nil), models...)
+	s.edited = append(s.edited, cloned)
+	return nil
+}
+
+func (s *stubEditorRunner) Models() []string { return nil }
+
 func TestIntegrationLookup(t *testing.T) {
 	tests := []struct {
 		name      string
@@ -149,6 +171,10 @@ func TestLaunchCmd_TUICallback(t *testing.T) {
 	})

 	t.Run("integration arg bypasses TUI", func(t *testing.T) {
+		srv := httptest.NewServer(http.NotFoundHandler())
+		defer srv.Close()
+		t.Setenv("OLLAMA_HOST", srv.URL)
+
 		tuiCalled := false
 		mockTUI := func(cmd *cobra.Command) {
 			tuiCalled = true
@@ -680,7 +706,7 @@ func TestEditorIntegration_SavedConfigSkipsSelection(t *testing.T) {
 	setTestHome(t, tmpDir)

 	// Save a config for opencode so it looks like a previous launch
-	if err := saveIntegration("opencode", []string{"llama3.2"}); err != nil {
+	if err := SaveIntegration("opencode", []string{"llama3.2"}); err != nil {
 		t.Fatal(err)
 	}

@@ -697,6 +723,137 @@ func TestEditorIntegration_SavedConfigSkipsSelection(t *testing.T) {
 	}
 }

+func TestResolveEditorLaunchModels_PicksWhenAllFiltered(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/status":
+			fmt.Fprintf(w, `{"cloud":{"disabled":true,"source":"config"}}`)
+		default:
+			w.WriteHeader(http.StatusNotFound)
+		}
+	}))
+	defer srv.Close()
+	t.Setenv("OLLAMA_HOST", srv.URL)
+
+	pickerCalled := false
+	models, err := resolveEditorModels("opencode", []string{"glm-5:cloud"}, func() ([]string, error) {
+		pickerCalled = true
+		return []string{"llama3.2"}, nil
+	})
+	if err != nil {
+		t.Fatalf("resolveEditorLaunchModels returned error: %v", err)
+	}
+	if !pickerCalled {
+		t.Fatal("expected model picker to be called when all models are filtered")
+	}
+	if diff := cmp.Diff([]string{"llama3.2"}, models); diff != "" {
+		t.Fatalf("resolved models mismatch (-want +got):\n%s", diff)
+	}
+
+	saved, err := loadIntegration("opencode")
+	if err != nil {
+		t.Fatalf("failed to reload integration config: %v", err)
+	}
+	if diff := cmp.Diff([]string{"llama3.2"}, saved.Models); diff != "" {
+		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
+	}
+}
+
+func TestResolveEditorLaunchModels_FiltersAndSkipsPickerWhenLocalRemains(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/status":
+			fmt.Fprintf(w, `{"cloud":{"disabled":true,"source":"config"}}`)
+		default:
+			w.WriteHeader(http.StatusNotFound)
+		}
+	}))
+	defer srv.Close()
+	t.Setenv("OLLAMA_HOST", srv.URL)
+
+	pickerCalled := false
+	models, err := resolveEditorModels("droid", []string{"llama3.2", "glm-5:cloud"}, func() ([]string, error) {
+		pickerCalled = true
+		return []string{"qwen3:8b"}, nil
+	})
+	if err != nil {
+		t.Fatalf("resolveEditorLaunchModels returned error: %v", err)
+	}
+	if pickerCalled {
+		t.Fatal("picker should not be called when a local model remains")
+	}
+	if diff := cmp.Diff([]string{"llama3.2"}, models); diff != "" {
+		t.Fatalf("resolved models mismatch (-want +got):\n%s", diff)
+	}
+
+	saved, err := loadIntegration("droid")
+	if err != nil {
+		t.Fatalf("failed to reload integration config: %v", err)
+	}
+	if diff := cmp.Diff([]string{"llama3.2"}, saved.Models); diff != "" {
+		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
+	}
+}
+
+func TestLaunchCmd_ModelFlagFiltersDisabledCloudFromSavedConfig(t *testing.T) {
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	if err := SaveIntegration("stubeditor", []string{"glm-5:cloud"}); err != nil {
+		t.Fatalf("failed to seed saved config: %v", err)
+	}
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/api/status":
+			fmt.Fprintf(w, `{"cloud":{"disabled":true,"source":"config"}}`)
+		case "/api/show":
+			fmt.Fprintf(w, `{"model":"llama3.2"}`)
+		default:
+			w.WriteHeader(http.StatusNotFound)
+		}
+	}))
+	defer srv.Close()
+	t.Setenv("OLLAMA_HOST", srv.URL)
+
+	stub := &stubEditorRunner{}
+	old, existed := integrations["stubeditor"]
+	integrations["stubeditor"] = stub
+	defer func() {
+		if existed {
+			integrations["stubeditor"] = old
+		} else {
+			delete(integrations, "stubeditor")
+		}
+	}()
+
+	cmd := LaunchCmd(func(cmd *cobra.Command, args []string) error { return nil }, func(cmd *cobra.Command) {})
+	cmd.SetArgs([]string{"stubeditor", "--model", "llama3.2"})
+	if err := cmd.Execute(); err != nil {
+		t.Fatalf("launch command failed: %v", err)
+	}
+
+	saved, err := loadIntegration("stubeditor")
+	if err != nil {
+		t.Fatalf("failed to reload integration config: %v", err)
+	}
+	if diff := cmp.Diff([]string{"llama3.2"}, saved.Models); diff != "" {
+		t.Fatalf("saved models mismatch (-want +got):\n%s", diff)
+	}
+	if diff := cmp.Diff([][]string{{"llama3.2"}}, stub.edited); diff != "" {
+		t.Fatalf("editor models mismatch (-want +got):\n%s", diff)
+	}
+	if stub.ranModel != "llama3.2" {
+		t.Fatalf("expected launch to run with llama3.2, got %q", stub.ranModel)
+	}
+}
+
 func TestAliasConfigurerInterface(t *testing.T) {
 	t.Run("claude implements AliasConfigurer", func(t *testing.T) {
 		claude := &Claude{}
@@ -1091,10 +1248,26 @@ func TestListIntegrationInfos(t *testing.T) {
 		}
 	})

-	t.Run("sorted by name", func(t *testing.T) {
+	t.Run("sorted with custom order at end", func(t *testing.T) {
+		// integrationOrder entries (cline, opencode) should appear last, in that order.
+		// All other entries should be sorted alphabetically before them.
+		orderRank := make(map[string]int)
+		for i, name := range integrationOrder {
+			orderRank[name] = i + 1
+		}
 		for i := 1; i < len(infos); i++ {
-			if infos[i-1].Name >= infos[i].Name {
-				t.Errorf("not sorted: %q >= %q", infos[i-1].Name, infos[i].Name)
+			aRank, bRank := orderRank[infos[i-1].Name], orderRank[infos[i].Name]
+			switch {
+			case aRank == 0 && bRank == 0:
+				if infos[i-1].Name >= infos[i].Name {
+					t.Errorf("non-ordered items not sorted: %q >= %q", infos[i-1].Name, infos[i].Name)
+				}
+			case aRank > 0 && bRank == 0:
+				t.Errorf("ordered item %q should come after non-ordered %q", infos[i-1].Name, infos[i].Name)
+			case aRank > 0 && bRank > 0:
+				if aRank >= bRank {
+					t.Errorf("ordered items wrong: %q (rank %d) before %q (rank %d)", infos[i-1].Name, aRank, infos[i].Name, bRank)
+				}
 			}
 		}
 	})
@@ -1234,7 +1407,7 @@ func TestIntegrationModels(t *testing.T) {
 	})

 	t.Run("returns all saved models", func(t *testing.T) {
-		if err := saveIntegration("droid", []string{"llama3.2", "qwen3:8b"}); err != nil {
+		if err := SaveIntegration("droid", []string{"llama3.2", "qwen3:8b"}); err != nil {
 			t.Fatal(err)
 		}
 		got := IntegrationModels("droid")
--- a/cmd/config/openclaw.go
+++ b/cmd/config/openclaw.go
@@ -1,69 +1,287 @@
 package config

 import (
-	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
-	"io"
+	"net"
+	"net/url"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"runtime"
+	"slices"
 	"strings"
+	"time"

+	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/types/model"
 )

+const defaultGatewayPort = 18789
+
+// Bound model capability probing so launch/config cannot hang on slow/unreachable API calls.
+var openclawModelShowTimeout = 5 * time.Second
+
 type Openclaw struct{}

 func (c *Openclaw) String() string { return "OpenClaw" }

 func (c *Openclaw) Run(model string, args []string) error {
-	bin := "openclaw"
-	if _, err := exec.LookPath(bin); err != nil {
-		bin = "clawdbot"
-		if _, err := exec.LookPath(bin); err != nil {
-			return fmt.Errorf("openclaw is not installed, install from https://docs.openclaw.ai")
+	bin, err := ensureOpenclawInstalled()
+	if err != nil {
+		return err
+	}
+
+	firstLaunch := true
+	if integrationConfig, err := loadIntegration("openclaw"); err == nil {
+		firstLaunch = !integrationConfig.Onboarded
+	}
+
+	if firstLaunch {
+		fmt.Fprintf(os.Stderr, "\n%sSecurity%s\n\n", ansiBold, ansiReset)
+		fmt.Fprintf(os.Stderr, "  OpenClaw can read files and run actions when tools are enabled.\n")
+		fmt.Fprintf(os.Stderr, "  A bad prompt can trick it into doing unsafe things.\n\n")
+		fmt.Fprintf(os.Stderr, "%s  Learn more: https://docs.openclaw.ai/gateway/security%s\n\n", ansiGray, ansiReset)
+
+		ok, err := confirmPrompt("I understand the risks. Continue?")
+		if err != nil {
+			return err
+		}
+		if !ok {
+			return nil
 		}
 	}

-	models := []string{model}
-	if config, err := loadIntegration("openclaw"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	} else if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := c.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
 	if !c.onboarded() {
-		// Onboarding not completed: run it (model already set via Edit)
-		// Use "ollama" as gateway token for simple local access
+		fmt.Fprintf(os.Stderr, "\n%sSetting up OpenClaw with Ollama...%s\n", ansiGreen, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s  Model: %s%s\n\n", ansiGray, model, ansiReset)
+
 		cmd := exec.Command(bin, "onboard",
+			"--non-interactive",
+			"--accept-risk",
 			"--auth-choice", "skip",
 			"--gateway-token", "ollama",
+			"--install-daemon",
+			"--skip-channels",
+			"--skip-skills",
 		)
 		cmd.Stdin = os.Stdin
 		cmd.Stdout = os.Stdout
 		cmd.Stderr = os.Stderr
-		return cmd.Run()
+		if err := cmd.Run(); err != nil {
+			return windowsHint(fmt.Errorf("openclaw onboarding failed: %w\n\nTry running: openclaw onboard", err))
+		}
+
+		patchDeviceScopes()
+
+		// Onboarding overwrites openclaw.json, so re-apply the model config
+		// that Edit() wrote before Run() was called.
+		if err := c.Edit([]string{model}); err != nil {
+			fmt.Fprintf(os.Stderr, "%s  Warning: could not re-apply model config: %v%s\n", ansiYellow, err, ansiReset)
+		}
 	}

-	// Onboarding completed: run gateway
-	cmd := exec.Command(bin, append([]string{"gateway"}, args...)...)
-	cmd.Stdin = os.Stdin
+	if strings.HasSuffix(model, ":cloud") || strings.HasSuffix(model, "-cloud") {
+		if ensureWebSearchPlugin() {
+			registerWebSearchPlugin()
+		}
+	}

-	// Capture output to detect "already running" message
-	var outputBuf bytes.Buffer
-	cmd.Stdout = io.MultiWriter(os.Stdout, &outputBuf)
-	cmd.Stderr = io.MultiWriter(os.Stderr, &outputBuf)
+	if firstLaunch {
+		fmt.Fprintf(os.Stderr, "\n%sPreparing your assistant — this may take a moment...%s\n\n", ansiGray, ansiReset)
+	} else {
+		fmt.Fprintf(os.Stderr, "\n%sStarting your assistant — this may take a moment...%s\n\n", ansiGray, ansiReset)
+	}

-	err := cmd.Run()
-	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
-		fmt.Fprintf(os.Stderr, "%sOpenClaw has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
+	// When extra args are passed through, run exactly what the user asked for
+	// after setup and skip the built-in gateway+TUI convenience flow.
+	if len(args) > 0 {
+		cmd := exec.Command(bin, args...)
+		cmd.Env = openclawEnv()
+		cmd.Stdin = os.Stdin
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		if err := cmd.Run(); err != nil {
+			return windowsHint(err)
+		}
+		if firstLaunch {
+			if err := integrationOnboarded("openclaw"); err != nil {
+				return fmt.Errorf("failed to save onboarding state: %w", err)
+			}
+		}
 		return nil
 	}
-	return err
+
+	token, port := c.gatewayInfo()
+	addr := fmt.Sprintf("localhost:%d", port)
+
+	// If the gateway is already running (e.g. via the daemon), restart it
+	// so it picks up any config changes from Edit() above (model, provider, etc.).
+	if portOpen(addr) {
+		restart := exec.Command(bin, "daemon", "restart")
+		restart.Env = openclawEnv()
+		if err := restart.Run(); err != nil {
+			fmt.Fprintf(os.Stderr, "%s  Warning: daemon restart failed: %v%s\n", ansiYellow, err, ansiReset)
+		}
+		if !waitForPort(addr, 10*time.Second) {
+			fmt.Fprintf(os.Stderr, "%s  Warning: gateway did not come back after restart%s\n", ansiYellow, ansiReset)
+		}
+	}
+
+	// If the gateway isn't running, start it as a background child process.
+	if !portOpen(addr) {
+		gw := exec.Command(bin, "gateway", "run", "--force")
+		gw.Env = openclawEnv()
+		if err := gw.Start(); err != nil {
+			return windowsHint(fmt.Errorf("failed to start gateway: %w", err))
+		}
+		defer func() {
+			if gw.Process != nil {
+				_ = gw.Process.Kill()
+				_ = gw.Wait()
+			}
+		}()
+	}
+
+	fmt.Fprintf(os.Stderr, "%sStarting gateway...%s\n", ansiGray, ansiReset)
+	if !waitForPort(addr, 30*time.Second) {
+		return windowsHint(fmt.Errorf("gateway did not start on %s", addr))
+	}
+
+	printOpenclawReady(bin, token, port, firstLaunch)
+
+	tuiArgs := []string{"tui"}
+	if firstLaunch {
+		tuiArgs = append(tuiArgs, "--message", "Wake up, my friend!")
+	}
+	tui := exec.Command(bin, tuiArgs...)
+	tui.Env = openclawEnv()
+	tui.Stdin = os.Stdin
+	tui.Stdout = os.Stdout
+	tui.Stderr = os.Stderr
+	if err := tui.Run(); err != nil {
+		return windowsHint(err)
+	}
+
+	if firstLaunch {
+		if err := integrationOnboarded("openclaw"); err != nil {
+			return fmt.Errorf("failed to save onboarding state: %w", err)
+		}
+	}
+	return nil
+}
+
+// gatewayInfo reads the gateway auth token and port from the OpenClaw config.
+func (c *Openclaw) gatewayInfo() (token string, port int) {
+	port = defaultGatewayPort
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", port
+	}
+
+	for _, path := range []string{
+		filepath.Join(home, ".openclaw", "openclaw.json"),
+		filepath.Join(home, ".clawdbot", "clawdbot.json"),
+	} {
+		data, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		var config map[string]any
+		if json.Unmarshal(data, &config) != nil {
+			continue
+		}
+		gw, _ := config["gateway"].(map[string]any)
+		if p, ok := gw["port"].(float64); ok && p > 0 {
+			port = int(p)
+		}
+		auth, _ := gw["auth"].(map[string]any)
+		if t, _ := auth["token"].(string); t != "" {
+			token = t
+		}
+		return token, port
+	}
+	return "", port
+}
+
+func printOpenclawReady(bin, token string, port int, firstLaunch bool) {
+	u := fmt.Sprintf("http://localhost:%d", port)
+	if token != "" {
+		u += "/#token=" + url.QueryEscape(token)
+	}
+
+	fmt.Fprintf(os.Stderr, "\n%s✓ OpenClaw is running%s\n\n", ansiGreen, ansiReset)
+	fmt.Fprintf(os.Stderr, "  Open the Web UI:\n")
+	fmt.Fprintf(os.Stderr, "    %s\n\n", hyperlink(u, u))
+
+	if firstLaunch {
+		fmt.Fprintf(os.Stderr, "%s  Quick start:%s\n", ansiBold, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s    /help             see all commands%s\n", ansiGray, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s    %s configure --section channels   connect WhatsApp, Telegram, etc.%s\n", ansiGray, bin, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s    %s skills                         browse and install skills%s\n\n", ansiGray, bin, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s  The OpenClaw gateway is running in the background.%s\n", ansiYellow, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s  Stop it with: %s gateway stop%s\n\n", ansiYellow, bin, ansiReset)
+	} else {
+		fmt.Fprintf(os.Stderr, "%sTip: connect WhatsApp, Telegram, and more with: %s configure --section channels%s\n", ansiGray, bin, ansiReset)
+	}
+}
+
+// openclawEnv returns the current environment with provider API keys cleared
+// so openclaw only uses the Ollama gateway, not keys from the user's shell.
+func openclawEnv() []string {
+	clear := map[string]bool{
+		"ANTHROPIC_API_KEY":     true,
+		"ANTHROPIC_OAUTH_TOKEN": true,
+		"OPENAI_API_KEY":        true,
+		"GEMINI_API_KEY":        true,
+		"MISTRAL_API_KEY":       true,
+		"GROQ_API_KEY":          true,
+		"XAI_API_KEY":           true,
+		"OPENROUTER_API_KEY":    true,
+	}
+	var env []string
+	for _, e := range os.Environ() {
+		key, _, _ := strings.Cut(e, "=")
+		if !clear[key] {
+			env = append(env, e)
+		}
+	}
+	return env
+}
+
+// portOpen checks if a TCP port is currently accepting connections.
+func portOpen(addr string) bool {
+	conn, err := net.DialTimeout("tcp", addr, 500*time.Millisecond)
+	if err != nil {
+		return false
+	}
+	conn.Close()
+	return true
+}
+
+func waitForPort(addr string, timeout time.Duration) bool {
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		conn, err := net.DialTimeout("tcp", addr, 500*time.Millisecond)
+		if err == nil {
+			conn.Close()
+			return true
+		}
+		time.Sleep(250 * time.Millisecond)
+	}
+	return false
+}
+
+func windowsHint(err error) error {
+	if runtime.GOOS != "windows" {
+		return err
+	}
+	return fmt.Errorf("%w\n\n"+
+		"OpenClaw runs best on WSL2.\n"+
+		"Quick setup: wsl --install\n"+
+		"Guide: https://docs.openclaw.ai/windows", err)
 }

 // onboarded checks if OpenClaw onboarding wizard was completed
@@ -95,6 +313,144 @@ func (c *Openclaw) onboarded() bool {
 	return lastRunAt != ""
 }

+// patchDeviceScopes upgrades the local CLI device's paired scopes to include
+// operator.admin. Only patches the local device, not remote ones.
+// Best-effort: silently returns on any error.
+func patchDeviceScopes() {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return
+	}
+
+	deviceID := readLocalDeviceID(home)
+	if deviceID == "" {
+		return
+	}
+
+	path := filepath.Join(home, ".openclaw", "devices", "paired.json")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return
+	}
+
+	var devices map[string]map[string]any
+	if err := json.Unmarshal(data, &devices); err != nil {
+		return
+	}
+
+	dev, ok := devices[deviceID]
+	if !ok {
+		return
+	}
+
+	required := []string{
+		"operator.read",
+		"operator.admin",
+		"operator.approvals",
+		"operator.pairing",
+	}
+
+	changed := patchScopes(dev, "scopes", required)
+	if tokens, ok := dev["tokens"].(map[string]any); ok {
+		for _, tok := range tokens {
+			if tokenMap, ok := tok.(map[string]any); ok {
+				if patchScopes(tokenMap, "scopes", required) {
+					changed = true
+				}
+			}
+		}
+	}
+
+	if !changed {
+		return
+	}
+
+	out, err := json.MarshalIndent(devices, "", "  ")
+	if err != nil {
+		return
+	}
+	_ = os.WriteFile(path, out, 0o600)
+}
+
+// readLocalDeviceID reads the local device ID from openclaw's identity file.
+func readLocalDeviceID(home string) string {
+	data, err := os.ReadFile(filepath.Join(home, ".openclaw", "identity", "device-auth.json"))
+	if err != nil {
+		return ""
+	}
+	var auth map[string]any
+	if err := json.Unmarshal(data, &auth); err != nil {
+		return ""
+	}
+	id, _ := auth["deviceId"].(string)
+	return id
+}
+
+// patchScopes ensures obj[key] contains all required scopes. Returns true if
+// any scopes were added.
+func patchScopes(obj map[string]any, key string, required []string) bool {
+	existing, _ := obj[key].([]any)
+	have := make(map[string]bool, len(existing))
+	for _, s := range existing {
+		if str, ok := s.(string); ok {
+			have[str] = true
+		}
+	}
+	added := false
+	for _, s := range required {
+		if !have[s] {
+			existing = append(existing, s)
+			added = true
+		}
+	}
+	if added {
+		obj[key] = existing
+	}
+	return added
+}
+
+func ensureOpenclawInstalled() (string, error) {
+	if _, err := exec.LookPath("openclaw"); err == nil {
+		return "openclaw", nil
+	}
+	if _, err := exec.LookPath("clawdbot"); err == nil {
+		return "clawdbot", nil
+	}
+
+	if _, err := exec.LookPath("npm"); err != nil {
+		return "", fmt.Errorf("openclaw is not installed and npm was not found\n\n" +
+			"Install Node.js first:\n" +
+			"  https://nodejs.org/\n\n" +
+			"Then rerun:\n" +
+			"  ollama launch\n" +
+			"and select OpenClaw")
+	}
+
+	ok, err := confirmPrompt("OpenClaw is not installed. Install with npm?")
+	if err != nil {
+		return "", err
+	}
+	if !ok {
+		return "", fmt.Errorf("openclaw installation cancelled")
+	}
+
+	fmt.Fprintf(os.Stderr, "\nInstalling OpenClaw...\n")
+	cmd := exec.Command("npm", "install", "-g", "openclaw@latest")
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		return "", fmt.Errorf("failed to install openclaw: %w", err)
+	}
+
+	if _, err := exec.LookPath("openclaw"); err != nil {
+		return "", fmt.Errorf("openclaw was installed but the binary was not found on PATH\n\nYou may need to restart your shell")
+	}
+
+	fmt.Fprintf(os.Stderr, "%sOpenClaw installed successfully%s\n\n", ansiGreen, ansiReset)
+	return "openclaw", nil
+}
+
 func (c *Openclaw) Paths() []string {
 	home, _ := os.UserHomeDir()
 	p := filepath.Join(home, ".openclaw", "openclaw.json")
@@ -149,8 +505,7 @@ func (c *Openclaw) Edit(models []string) error {
 	ollama["baseUrl"] = envconfig.Host().String() + "/v1"
 	// needed to register provider
 	ollama["apiKey"] = "ollama-local"
-	// TODO(parthsareen): potentially move to responses
-	ollama["api"] = "openai-completions"
+	ollama["api"] = "ollama"

 	// Build map of existing models to preserve user customizations
 	existingModels, _ := ollama["models"].([]any)
@@ -163,25 +518,13 @@ func (c *Openclaw) Edit(models []string) error {
 		}
 	}

+	client, _ := api.ClientFromEnvironment()
+
 	var newModels []any
-	for _, model := range models {
-		entry := map[string]any{
-			"id":        model,
-			"name":      model,
-			"reasoning": false,
-			"input":     []any{"text"},
-			"cost": map[string]any{
-				"input":      0,
-				"output":     0,
-				"cacheRead":  0,
-				"cacheWrite": 0,
-			},
-			// TODO(parthsareen): get these values from API
-			"contextWindow": 131072,
-			"maxTokens":     16384,
-		}
+	for _, m := range models {
+		entry, _ := openclawModelConfig(context.Background(), client, m)
 		// Merge existing fields (user customizations)
-		if existing, ok := existingByID[model]; ok {
+		if existing, ok := existingByID[m]; ok {
 			for k, v := range existing {
 				if _, isNew := entry[k]; !isNew {
 					entry[k] = v
@@ -218,7 +561,213 @@ func (c *Openclaw) Edit(models []string) error {
 	if err != nil {
 		return err
 	}
-	return writeWithBackup(configPath, data)
+	if err := writeWithBackup(configPath, data); err != nil {
+		return err
+	}
+
+	// Clear any per-session model overrides so the new primary takes effect
+	// immediately rather than being shadowed by a cached modelOverride.
+	clearSessionModelOverride(models[0])
+	return nil
+}
+
+// clearSessionModelOverride removes per-session model overrides from the main
+// agent session so the global primary model takes effect on the next TUI launch.
+func clearSessionModelOverride(primary string) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return
+	}
+	path := filepath.Join(home, ".openclaw", "agents", "main", "sessions", "sessions.json")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return
+	}
+	var sessions map[string]map[string]any
+	if json.Unmarshal(data, &sessions) != nil {
+		return
+	}
+	changed := false
+	for _, sess := range sessions {
+		if override, _ := sess["modelOverride"].(string); override != "" && override != primary {
+			delete(sess, "modelOverride")
+			delete(sess, "providerOverride")
+			sess["model"] = primary
+			changed = true
+		}
+	}
+	if !changed {
+		return
+	}
+	out, err := json.MarshalIndent(sessions, "", "  ")
+	if err != nil {
+		return
+	}
+	_ = os.WriteFile(path, out, 0o600)
+}
+
+const webSearchNpmPackage = "@ollama/openclaw-web-search"
+
+// ensureWebSearchPlugin installs the openclaw-web-search extension into the
+// user-level extensions directory (~/.openclaw/extensions/) if it isn't already
+// present. Returns true if the extension is available.
+func ensureWebSearchPlugin() bool {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return false
+	}
+
+	pluginDir := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
+	if _, err := os.Stat(filepath.Join(pluginDir, "index.ts")); err == nil {
+		return true // already installed
+	}
+
+	npmBin, err := exec.LookPath("npm")
+	if err != nil {
+		return false
+	}
+
+	if err := os.MkdirAll(pluginDir, 0o755); err != nil {
+		return false
+	}
+
+	// Download the tarball via `npm pack`, extract it flat into the plugin dir.
+	pack := exec.Command(npmBin, "pack", webSearchNpmPackage, "--pack-destination", pluginDir)
+	out, err := pack.Output()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%s  Warning: could not download web search plugin: %v%s\n", ansiYellow, err, ansiReset)
+		return false
+	}
+
+	tgzName := strings.TrimSpace(string(out))
+	tgzPath := filepath.Join(pluginDir, tgzName)
+	defer os.Remove(tgzPath)
+
+	tar := exec.Command("tar", "xzf", tgzPath, "--strip-components=1", "-C", pluginDir)
+	if err := tar.Run(); err != nil {
+		fmt.Fprintf(os.Stderr, "%s  Warning: could not extract web search plugin: %v%s\n", ansiYellow, err, ansiReset)
+		return false
+	}
+
+	fmt.Fprintf(os.Stderr, "%s  ✓ Installed web search plugin%s\n", ansiGreen, ansiReset)
+	return true
+}
+
+// registerWebSearchPlugin adds plugins.entries.openclaw-web-search to the OpenClaw
+// config so the gateway activates it on next start. Best-effort; silently returns
+// on any error.
+func registerWebSearchPlugin() {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return
+	}
+	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		return
+	}
+	var config map[string]any
+	if json.Unmarshal(data, &config) != nil {
+		return
+	}
+
+	plugins, _ := config["plugins"].(map[string]any)
+	if plugins == nil {
+		plugins = make(map[string]any)
+	}
+	entries, _ := plugins["entries"].(map[string]any)
+	if entries == nil {
+		entries = make(map[string]any)
+	}
+	if _, ok := entries["openclaw-web-search"]; ok {
+		return // already registered
+	}
+	entries["openclaw-web-search"] = map[string]any{"enabled": true}
+	plugins["entries"] = entries
+	config["plugins"] = plugins
+
+	// Disable the built-in web search since our plugin replaces it.
+	tools, _ := config["tools"].(map[string]any)
+	if tools == nil {
+		tools = make(map[string]any)
+	}
+	web, _ := tools["web"].(map[string]any)
+	if web == nil {
+		web = make(map[string]any)
+	}
+	web["search"] = map[string]any{"enabled": false}
+	tools["web"] = web
+	config["tools"] = tools
+
+	out, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return
+	}
+	_ = os.WriteFile(configPath, out, 0o600)
+}
+
+// openclawModelConfig builds an OpenClaw model config entry with capability detection.
+// The second return value indicates whether the model is a cloud (remote) model.
+func openclawModelConfig(ctx context.Context, client *api.Client, modelID string) (map[string]any, bool) {
+	entry := map[string]any{
+		"id":    modelID,
+		"name":  modelID,
+		"input": []any{"text"},
+		"cost": map[string]any{
+			"input":      0,
+			"output":     0,
+			"cacheRead":  0,
+			"cacheWrite": 0,
+		},
+	}
+
+	if client == nil {
+		return entry, false
+	}
+
+	showCtx := ctx
+	if _, hasDeadline := ctx.Deadline(); !hasDeadline {
+		var cancel context.CancelFunc
+		showCtx, cancel = context.WithTimeout(ctx, openclawModelShowTimeout)
+		defer cancel()
+	}
+
+	resp, err := client.Show(showCtx, &api.ShowRequest{Model: modelID})
+	if err != nil {
+		return entry, false
+	}
+
+	// Set input types based on vision capability
+	if slices.Contains(resp.Capabilities, model.CapabilityVision) {
+		entry["input"] = []any{"text", "image"}
+	}
+
+	// Set reasoning based on thinking capability
+	if slices.Contains(resp.Capabilities, model.CapabilityThinking) {
+		entry["reasoning"] = true
+	}
+
+	// Cloud models: use hardcoded limits for context/output tokens.
+	// Capability detection above still applies (vision, thinking).
+	if resp.RemoteModel != "" {
+		if l, ok := lookupCloudModelLimit(modelID); ok {
+			entry["contextWindow"] = l.Context
+			entry["maxTokens"] = l.Output
+		}
+		return entry, true
+	}
+
+	// Extract context window from ModelInfo (local models only)
+	for key, val := range resp.ModelInfo {
+		if strings.HasSuffix(key, ".context_length") {
+			if ctxLen, ok := val.(float64); ok && ctxLen > 0 {
+				entry["contextWindow"] = int(ctxLen)
+			}
+			break
+		}
+	}
+
+	return entry, false
 }

 func (c *Openclaw) Models() []string {
--- a/cmd/config/openclaw_test.go
+++ b/cmd/config/openclaw_test.go
@@ -1,11 +1,21 @@
 package config

 import (
+	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
 	"os"
 	"path/filepath"
+	"runtime"
+	"strings"
 	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
 )

 func TestOpenclawIntegration(t *testing.T) {
@@ -26,6 +36,124 @@ func TestOpenclawIntegration(t *testing.T) {
 	})
 }

+func TestOpenclawRunPassthroughArgs(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("uses a POSIX shell test binary")
+	}
+
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	t.Setenv("PATH", tmpDir)
+
+	if err := integrationOnboarded("openclaw"); err != nil {
+		t.Fatal(err)
+	}
+
+	configDir := filepath.Join(tmpDir, ".openclaw")
+	if err := os.MkdirAll(configDir, 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+		"wizard": {"lastRunAt": "2026-01-01T00:00:00Z"}
+	}`), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	bin := filepath.Join(tmpDir, "openclaw")
+	if err := os.WriteFile(bin, []byte("#!/bin/sh\nprintf '%s\\n' \"$*\" >> \"$HOME/invocations.log\"\n"), 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	c := &Openclaw{}
+	if err := c.Run("llama3.2", []string{"gateway", "--someflag"}); err != nil {
+		t.Fatalf("Run() error = %v", err)
+	}
+
+	data, err := os.ReadFile(filepath.Join(tmpDir, "invocations.log"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	lines := strings.Split(strings.TrimSpace(string(data)), "\n")
+	if len(lines) != 1 {
+		t.Fatalf("expected exactly 1 invocation, got %d: %v", len(lines), lines)
+	}
+	if lines[0] != "gateway --someflag" {
+		t.Fatalf("invocation = %q, want %q", lines[0], "gateway --someflag")
+	}
+}
+
+func TestOpenclawRunFirstLaunchPersistence(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("uses a POSIX shell test binary")
+	}
+
+	oldHook := DefaultConfirmPrompt
+	DefaultConfirmPrompt = func(prompt string) (bool, error) {
+		return true, nil
+	}
+	defer func() { DefaultConfirmPrompt = oldHook }()
+
+	t.Run("success persists onboarding flag", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		// Mark OpenClaw onboarding complete so Run takes passthrough path directly.
+		if err := os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+			"wizard": {"lastRunAt": "2026-01-01T00:00:00Z"}
+		}`), 0o644); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(filepath.Join(tmpDir, "openclaw"), []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+			t.Fatal(err)
+		}
+
+		c := &Openclaw{}
+		if err := c.Run("llama3.2", []string{"gateway", "--status"}); err != nil {
+			t.Fatalf("Run() error = %v", err)
+		}
+		integrationConfig, err := loadIntegration("openclaw")
+		if err != nil {
+			t.Fatalf("loadIntegration() error = %v", err)
+		}
+		if !integrationConfig.Onboarded {
+			t.Fatal("expected onboarding flag to be persisted after successful run")
+		}
+	})
+
+	t.Run("failure does not persist onboarding flag", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+			"wizard": {"lastRunAt": "2026-01-01T00:00:00Z"}
+		}`), 0o644); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(filepath.Join(tmpDir, "openclaw"), []byte("#!/bin/sh\nexit 1\n"), 0o755); err != nil {
+			t.Fatal(err)
+		}
+
+		c := &Openclaw{}
+		if err := c.Run("llama3.2", []string{"gateway", "--status"}); err == nil {
+			t.Fatal("expected run failure")
+		}
+		integrationConfig, err := loadIntegration("openclaw")
+		if err == nil && integrationConfig.Onboarded {
+			t.Fatal("expected onboarding flag to remain unset after failed run")
+		}
+	})
+}
+
 func TestOpenclawEdit(t *testing.T) {
 	c := &Openclaw{}
 	tmpDir := t.TempDir()
@@ -359,19 +487,16 @@ func TestOpenclawEditSchemaFields(t *testing.T) {
 	modelList := ollama["models"].([]any)
 	entry := modelList[0].(map[string]any)

-	// Verify required schema fields
-	if entry["reasoning"] != false {
-		t.Error("reasoning should be false")
+	// Verify base schema fields (always set regardless of API availability)
+	if entry["id"] != "llama3.2" {
+		t.Errorf("id = %v, want llama3.2", entry["id"])
+	}
+	if entry["name"] != "llama3.2" {
+		t.Errorf("name = %v, want llama3.2", entry["name"])
 	}
 	if entry["input"] == nil {
 		t.Error("input should be set")
 	}
-	if entry["contextWindow"] == nil {
-		t.Error("contextWindow should be set")
-	}
-	if entry["maxTokens"] == nil {
-		t.Error("maxTokens should be set")
-	}
 	cost := entry["cost"].(map[string]any)
 	if cost["cacheRead"] == nil {
 		t.Error("cost.cacheRead should be set")
@@ -876,3 +1001,589 @@ func TestOpenclawOnboarded(t *testing.T) {
 		}
 	})
 }
+
+func TestOpenclawGatewayInfo(t *testing.T) {
+	c := &Openclaw{}
+
+	t.Run("returns defaults when no config exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		token, port := c.gatewayInfo()
+		if token != "" {
+			t.Errorf("expected empty token, got %q", token)
+		}
+		if port != defaultGatewayPort {
+			t.Errorf("expected default port %d, got %d", defaultGatewayPort, port)
+		}
+	})
+
+	t.Run("reads token and port from config", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+			"gateway": {
+				"port": 9999,
+				"auth": {"mode": "token", "token": "my-secret"}
+			}
+		}`), 0o644)
+
+		token, port := c.gatewayInfo()
+		if token != "my-secret" {
+			t.Errorf("expected token %q, got %q", "my-secret", token)
+		}
+		if port != 9999 {
+			t.Errorf("expected port 9999, got %d", port)
+		}
+	})
+
+	t.Run("uses default port when not in config", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
+			"gateway": {"auth": {"token": "tok"}}
+		}`), 0o644)
+
+		token, port := c.gatewayInfo()
+		if token != "tok" {
+			t.Errorf("expected token %q, got %q", "tok", token)
+		}
+		if port != defaultGatewayPort {
+			t.Errorf("expected default port %d, got %d", defaultGatewayPort, port)
+		}
+	})
+
+	t.Run("falls back to legacy clawdbot config", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		legacyDir := filepath.Join(tmpDir, ".clawdbot")
+		os.MkdirAll(legacyDir, 0o755)
+		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
+			"gateway": {"port": 12345, "auth": {"token": "legacy-token"}}
+		}`), 0o644)
+
+		token, port := c.gatewayInfo()
+		if token != "legacy-token" {
+			t.Errorf("expected token %q, got %q", "legacy-token", token)
+		}
+		if port != 12345 {
+			t.Errorf("expected port 12345, got %d", port)
+		}
+	})
+
+	t.Run("handles corrupted JSON gracefully", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{corrupted`), 0o644)
+
+		token, port := c.gatewayInfo()
+		if token != "" {
+			t.Errorf("expected empty token, got %q", token)
+		}
+		if port != defaultGatewayPort {
+			t.Errorf("expected default port, got %d", port)
+		}
+	})
+
+	t.Run("handles missing gateway section", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		configDir := filepath.Join(tmpDir, ".openclaw")
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"theme":"dark"}`), 0o644)
+
+		token, port := c.gatewayInfo()
+		if token != "" {
+			t.Errorf("expected empty token, got %q", token)
+		}
+		if port != defaultGatewayPort {
+			t.Errorf("expected default port, got %d", port)
+		}
+	})
+}
+
+func TestPrintOpenclawReady(t *testing.T) {
+	t.Run("includes port in URL", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "", 9999, false)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		if !strings.Contains(output, "localhost:9999") {
+			t.Errorf("expected port 9999 in output, got:\n%s", output)
+		}
+		if strings.Contains(output, "#token=") {
+			t.Error("should not include token fragment when token is empty")
+		}
+	})
+
+	t.Run("URL-escapes token", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "my token&special=chars", defaultGatewayPort, false)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		escaped := url.QueryEscape("my token&special=chars")
+		if !strings.Contains(output, "#token="+escaped) {
+			t.Errorf("expected URL-escaped token %q in output, got:\n%s", escaped, output)
+		}
+	})
+
+	t.Run("simple token is not mangled", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "ollama", defaultGatewayPort, false)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		if !strings.Contains(output, "#token=ollama") {
+			t.Errorf("expected #token=ollama in output, got:\n%s", output)
+		}
+	})
+
+	t.Run("includes web UI hint", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "", defaultGatewayPort, false)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		if !strings.Contains(output, "Open the Web UI") {
+			t.Errorf("expected web UI hint in output, got:\n%s", output)
+		}
+	})
+
+	t.Run("first launch shows quick start tips", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "ollama", defaultGatewayPort, true)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		for _, want := range []string{"/help", "channels", "skills", "gateway"} {
+			if !strings.Contains(output, want) {
+				t.Errorf("expected %q in first-launch output, got:\n%s", want, output)
+			}
+		}
+	})
+
+	t.Run("subsequent launch shows single tip", func(t *testing.T) {
+		var buf bytes.Buffer
+		old := os.Stderr
+		r, w, _ := os.Pipe()
+		os.Stderr = w
+
+		printOpenclawReady("openclaw", "ollama", defaultGatewayPort, false)
+
+		w.Close()
+		os.Stderr = old
+		buf.ReadFrom(r)
+
+		output := buf.String()
+		if !strings.Contains(output, "Tip:") {
+			t.Errorf("expected single tip line, got:\n%s", output)
+		}
+		if strings.Contains(output, "Quick start") {
+			t.Errorf("should not show quick start on subsequent launch")
+		}
+	})
+}
+
+func TestOpenclawModelConfig(t *testing.T) {
+	t.Run("nil client returns base config", func(t *testing.T) {
+		cfg, _ := openclawModelConfig(context.Background(), nil, "llama3.2")
+
+		if cfg["id"] != "llama3.2" {
+			t.Errorf("id = %v, want llama3.2", cfg["id"])
+		}
+		if cfg["name"] != "llama3.2" {
+			t.Errorf("name = %v, want llama3.2", cfg["name"])
+		}
+		if cfg["cost"] == nil {
+			t.Error("cost should be set")
+		}
+		// Should not have capability fields without API
+		if _, ok := cfg["reasoning"]; ok {
+			t.Error("reasoning should not be set without API")
+		}
+		if _, ok := cfg["contextWindow"]; ok {
+			t.Error("contextWindow should not be set without API")
+		}
+	})
+
+	t.Run("sets vision input when model has vision capability", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["vision"],"model_info":{"llama.context_length":4096}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "llava:7b")
+
+		input, ok := cfg["input"].([]any)
+		if !ok || len(input) != 2 {
+			t.Errorf("input = %v, want [text image]", cfg["input"])
+		}
+	})
+
+	t.Run("sets text-only input when model lacks vision", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["completion"],"model_info":{}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "llama3.2")
+
+		input, ok := cfg["input"].([]any)
+		if !ok || len(input) != 1 {
+			t.Errorf("input = %v, want [text]", cfg["input"])
+		}
+		if _, ok := cfg["reasoning"]; ok {
+			t.Error("reasoning should not be set for non-thinking model")
+		}
+	})
+
+	t.Run("sets reasoning when model has thinking capability", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["thinking"],"model_info":{}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "qwq")
+
+		if cfg["reasoning"] != true {
+			t.Error("expected reasoning = true for thinking model")
+		}
+	})
+
+	t.Run("extracts context window from model info", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":[],"model_info":{"llama.context_length":131072}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "llama3.2")
+
+		if cfg["contextWindow"] != 131072 {
+			t.Errorf("contextWindow = %v, want 131072", cfg["contextWindow"])
+		}
+	})
+
+	t.Run("handles all capabilities together", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["vision","thinking"],"model_info":{"qwen3.context_length":32768}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "qwen3-vision")
+
+		input, ok := cfg["input"].([]any)
+		if !ok || len(input) != 2 {
+			t.Errorf("input = %v, want [text image]", cfg["input"])
+		}
+		if cfg["reasoning"] != true {
+			t.Error("expected reasoning = true")
+		}
+		if cfg["contextWindow"] != 32768 {
+			t.Errorf("contextWindow = %v, want 32768", cfg["contextWindow"])
+		}
+	})
+
+	t.Run("returns base config when show fails", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(http.StatusNotFound)
+			fmt.Fprintf(w, `{"error":"model not found"}`)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "missing-model")
+
+		if cfg["id"] != "missing-model" {
+			t.Errorf("id = %v, want missing-model", cfg["id"])
+		}
+		// Should still have input (default)
+		if cfg["input"] == nil {
+			t.Error("input should always be set")
+		}
+		if _, ok := cfg["reasoning"]; ok {
+			t.Error("reasoning should not be set when show fails")
+		}
+		if _, ok := cfg["contextWindow"]; ok {
+			t.Error("contextWindow should not be set when show fails")
+		}
+	})
+
+	t.Run("times out slow show and returns base config", func(t *testing.T) {
+		oldTimeout := openclawModelShowTimeout
+		openclawModelShowTimeout = 50 * time.Millisecond
+		t.Cleanup(func() { openclawModelShowTimeout = oldTimeout })
+
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				time.Sleep(300 * time.Millisecond)
+				fmt.Fprintf(w, `{"capabilities":["thinking"],"model_info":{"llama.context_length":4096}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		start := time.Now()
+		cfg, _ := openclawModelConfig(context.Background(), client, "slow-model")
+		elapsed := time.Since(start)
+		if elapsed >= 250*time.Millisecond {
+			t.Fatalf("openclawModelConfig took too long: %v", elapsed)
+		}
+		if cfg["id"] != "slow-model" {
+			t.Errorf("id = %v, want slow-model", cfg["id"])
+		}
+		if _, ok := cfg["reasoning"]; ok {
+			t.Error("reasoning should not be set on timeout")
+		}
+		if _, ok := cfg["contextWindow"]; ok {
+			t.Error("contextWindow should not be set on timeout")
+		}
+	})
+
+	t.Run("skips zero context length", func(t *testing.T) {
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":[],"model_info":{"llama.context_length":0}}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, _ := openclawModelConfig(context.Background(), client, "test-model")
+
+		if _, ok := cfg["contextWindow"]; ok {
+			t.Error("contextWindow should not be set for zero value")
+		}
+	})
+
+	t.Run("cloud model uses hardcoded limits", func(t *testing.T) {
+		// Use a model name that's in cloudModelLimits and make the server
+		// report it as a remote/cloud model
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":[],"model_info":{},"remote_model":"minimax-m2.5"}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, isCloud := openclawModelConfig(context.Background(), client, "minimax-m2.5:cloud")
+
+		if !isCloud {
+			t.Error("expected isCloud = true for cloud model")
+		}
+		if cfg["contextWindow"] != 204_800 {
+			t.Errorf("contextWindow = %v, want 204800", cfg["contextWindow"])
+		}
+		if cfg["maxTokens"] != 128_000 {
+			t.Errorf("maxTokens = %v, want 128000", cfg["maxTokens"])
+		}
+	})
+
+	t.Run("cloud model with vision capability gets image input", func(t *testing.T) {
+		// Regression test: cloud models must not skip capability detection.
+		// A cloud model that reports vision capability should have input: [text, image].
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["vision"],"model_info":{},"remote_model":"qwen3-vl"}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, isCloud := openclawModelConfig(context.Background(), client, "qwen3-vl:235b-cloud")
+
+		if !isCloud {
+			t.Error("expected isCloud = true for cloud vision model")
+		}
+		input, ok := cfg["input"].([]any)
+		if !ok || len(input) != 2 {
+			t.Errorf("input = %v, want [text image] for cloud vision model", cfg["input"])
+		}
+	})
+
+	t.Run("cloud model with thinking capability gets reasoning flag", func(t *testing.T) {
+		// Regression test: cloud models must not skip capability detection.
+		// A cloud model that reports thinking capability should have reasoning: true.
+		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if r.URL.Path == "/api/show" {
+				fmt.Fprintf(w, `{"capabilities":["thinking"],"model_info":{},"remote_model":"qwq-cloud"}`)
+				return
+			}
+			w.WriteHeader(http.StatusNotFound)
+		}))
+		defer srv.Close()
+
+		u, _ := url.Parse(srv.URL)
+		client := api.NewClient(u, srv.Client())
+
+		cfg, isCloud := openclawModelConfig(context.Background(), client, "qwq:cloud")
+
+		if !isCloud {
+			t.Error("expected isCloud = true for cloud thinking model")
+		}
+		if cfg["reasoning"] != true {
+			t.Error("expected reasoning = true for cloud thinking model")
+		}
+	})
+}
+
+func TestIntegrationOnboarded(t *testing.T) {
+	t.Run("returns false when not set", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		integrationConfig, err := loadIntegration("openclaw")
+		if err == nil && integrationConfig.Onboarded {
+			t.Error("expected false for fresh config")
+		}
+	})
+
+	t.Run("returns true after integrationOnboarded", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		os.MkdirAll(filepath.Join(tmpDir, ".ollama"), 0o755)
+
+		if err := integrationOnboarded("openclaw"); err != nil {
+			t.Fatal(err)
+		}
+		integrationConfig, err := loadIntegration("openclaw")
+		if err != nil || !integrationConfig.Onboarded {
+			t.Error("expected true after integrationOnboarded")
+		}
+	})
+
+	t.Run("is case insensitive", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		os.MkdirAll(filepath.Join(tmpDir, ".ollama"), 0o755)
+
+		if err := integrationOnboarded("OpenClaw"); err != nil {
+			t.Fatal(err)
+		}
+		integrationConfig, err := loadIntegration("openclaw")
+		if err != nil || !integrationConfig.Onboarded {
+			t.Error("expected true when set with different case")
+		}
+	})
+
+	t.Run("preserves existing integration data", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		os.MkdirAll(filepath.Join(tmpDir, ".ollama"), 0o755)
+
+		if err := SaveIntegration("openclaw", []string{"llama3.2", "mistral"}); err != nil {
+			t.Fatal(err)
+		}
+		if err := integrationOnboarded("openclaw"); err != nil {
+			t.Fatal(err)
+		}
+
+		// Verify onboarded is set
+		integrationConfig, err := loadIntegration("openclaw")
+		if err != nil || !integrationConfig.Onboarded {
+			t.Error("expected true after integrationOnboarded")
+		}
+
+		// Verify models are preserved
+		model := IntegrationModel("openclaw")
+		if model != "llama3.2" {
+			t.Errorf("expected first model llama3.2, got %q", model)
+		}
+	})
+}
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -3,6 +3,7 @@ package config
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"maps"
 	"os"
@@ -51,6 +52,16 @@ func (o *OpenCode) Run(model string, args []string) error {
 	if config, err := loadIntegration("opencode"); err == nil && len(config.Models) > 0 {
 		models = config.Models
 	}
+	var err error
+	models, err = resolveEditorModels("opencode", models, func() ([]string, error) {
+		return selectModels(context.Background(), "opencode", "")
+	})
+	if errors.Is(err, errCancelled) {
+		return nil
+	}
+	if err != nil {
+		return err
+	}
 	if err := o.Edit(models); err != nil {
 		return fmt.Errorf("setup failed: %w", err)
 	}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -10,10 +10,11 @@ import (

 // ANSI escape sequences for terminal formatting.
 const (
-	ansiBold  = "\033[1m"
-	ansiReset = "\033[0m"
-	ansiGray  = "\033[37m"
-	ansiGreen = "\033[32m"
+	ansiBold   = "\033[1m"
+	ansiReset  = "\033[0m"
+	ansiGray   = "\033[37m"
+	ansiGreen  = "\033[32m"
+	ansiYellow = "\033[33m"
 )

 // ErrCancelled is returned when the user cancels a selection.
--- a/cmd/tui/selector.go
+++ b/cmd/tui/selector.go
@@ -365,14 +365,27 @@ func (m selectorModel) View() string {
 	return s
 }

-func SelectSingle(title string, items []SelectItem) (string, error) {
+// cursorForCurrent returns the item index matching current, or 0 if not found.
+func cursorForCurrent(items []SelectItem, current string) int {
+	if current != "" {
+		for i, item := range items {
+			if item.Name == current || strings.HasPrefix(item.Name, current+":") || strings.HasPrefix(current, item.Name+":") {
+				return i
+			}
+		}
+	}
+	return 0
+}
+
+func SelectSingle(title string, items []SelectItem, current string) (string, error) {
 	if len(items) == 0 {
 		return "", fmt.Errorf("no items to select from")
 	}

 	m := selectorModel{
-		title: title,
-		items: items,
+		title:  title,
+		items:  items,
+		cursor: cursorForCurrent(items, current),
 	}

 	p := tea.NewProgram(m)
@@ -402,6 +415,12 @@ type multiSelectorModel struct {
 	cancelled    bool
 	confirmed    bool
 	width        int
+
+	// multi enables full multi-select editing mode. The zero value (false)
+	// shows a single-select picker where Enter adds the chosen model to
+	// the existing list. Tab toggles between modes.
+	multi     bool
+	singleAdd string // model picked in single mode
 }

 func newMultiSelectorModel(title string, items []SelectItem, preChecked []string) multiSelectorModel {
@@ -416,13 +435,23 @@ func newMultiSelectorModel(title string, items []SelectItem, preChecked []string
 		m.itemIndex[item.Name] = i
 	}

-	for _, name := range preChecked {
-		if idx, ok := m.itemIndex[name]; ok {
+	// Reverse order so preChecked[0] (the current default) ends up last
+	// in checkOrder, matching the "last checked = default" convention.
+	for i := len(preChecked) - 1; i >= 0; i-- {
+		if idx, ok := m.itemIndex[preChecked[i]]; ok {
 			m.checked[idx] = true
 			m.checkOrder = append(m.checkOrder, idx)
 		}
 	}

+	// Position cursor on the current default model
+	if len(preChecked) > 0 {
+		if idx, ok := m.itemIndex[preChecked[0]]; ok {
+			m.cursor = idx
+			m.updateScroll(m.otherStart())
+		}
+	}
+
 	return m
 }

@@ -533,14 +562,25 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.cancelled = true
 			return m, tea.Quit

+		case tea.KeyTab:
+			m.multi = !m.multi
+
 		case tea.KeyEnter:
-			if len(m.checkOrder) > 0 {
+			if !m.multi {
+				if len(filtered) > 0 && m.cursor < len(filtered) {
+					m.singleAdd = filtered[m.cursor].Name
+					m.confirmed = true
+					return m, tea.Quit
+				}
+			} else if len(m.checkOrder) > 0 {
 				m.confirmed = true
 				return m, tea.Quit
 			}

 		case tea.KeySpace:
-			m.toggleItem()
+			if m.multi {
+				m.toggleItem()
+			}

 		case tea.KeyUp:
 			if m.cursor > 0 {
@@ -576,15 +616,36 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			}

 		case tea.KeyRunes:
-			m.filter += string(msg.Runes)
-			m.cursor = 0
-			m.scrollOffset = 0
+			// On some terminals (e.g. Windows PowerShell), space arrives as
+			// KeyRunes instead of KeySpace. Intercept it so toggle still works.
+			if len(msg.Runes) == 1 && msg.Runes[0] == ' ' {
+				if m.multi {
+					m.toggleItem()
+				}
+			} else {
+				m.filter += string(msg.Runes)
+				m.cursor = 0
+				m.scrollOffset = 0
+			}
 		}
 	}

 	return m, nil
 }

+func (m multiSelectorModel) renderSingleItem(s *strings.Builder, item SelectItem, idx int) {
+	if idx == m.cursor {
+		s.WriteString(selectorSelectedItemStyle.Render("▸ " + item.Name))
+	} else {
+		s.WriteString(selectorItemStyle.Render(item.Name))
+	}
+	s.WriteString("\n")
+	if item.Description != "" {
+		s.WriteString(selectorDescLineStyle.Render(item.Description))
+		s.WriteString("\n")
+	}
+}
+
 func (m multiSelectorModel) renderMultiItem(s *strings.Builder, item SelectItem, idx int) {
 	origIdx := m.itemIndex[item.Name]

@@ -596,7 +657,7 @@ func (m multiSelectorModel) renderMultiItem(s *strings.Builder, item SelectItem,
 	}

 	suffix := ""
-	if len(m.checkOrder) > 0 && m.checkOrder[0] == origIdx {
+	if len(m.checkOrder) > 0 && m.checkOrder[len(m.checkOrder)-1] == origIdx {
 		suffix = " " + selectorDefaultTagStyle.Render("(default)")
 	}

@@ -618,6 +679,11 @@ func (m multiSelectorModel) View() string {
 		return ""
 	}

+	renderItem := m.renderSingleItem
+	if m.multi {
+		renderItem = m.renderMultiItem
+	}
+
 	var s strings.Builder

 	s.WriteString(selectorTitleStyle.Render(m.title))
@@ -642,7 +708,7 @@ func (m multiSelectorModel) View() string {
 			if idx >= len(filtered) {
 				break
 			}
-			m.renderMultiItem(&s, filtered[idx], idx)
+			renderItem(&s, filtered[idx], idx)
 		}

 		if remaining := len(filtered) - m.scrollOffset - displayCount; remaining > 0 {
@@ -665,7 +731,7 @@ func (m multiSelectorModel) View() string {
 			s.WriteString(sectionHeaderStyle.Render("Recommended"))
 			s.WriteString("\n")
 			for _, idx := range recItems {
-				m.renderMultiItem(&s, filtered[idx], idx)
+				renderItem(&s, filtered[idx], idx)
 			}
 		}

@@ -685,7 +751,7 @@ func (m multiSelectorModel) View() string {
 				if idx >= len(otherItems) {
 					break
 				}
-				m.renderMultiItem(&s, filtered[otherItems[idx]], otherItems[idx])
+				renderItem(&s, filtered[otherItems[idx]], otherItems[idx])
 			}

 			if remaining := len(otherItems) - m.scrollOffset - displayCount; remaining > 0 {
@@ -697,15 +763,18 @@ func (m multiSelectorModel) View() string {

 	s.WriteString("\n")

-	count := m.selectedCount()
-	if count == 0 {
-		s.WriteString(selectorDescStyle.Render("  Select at least one model."))
+	if !m.multi {
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
 	} else {
-		s.WriteString(selectorDescStyle.Render(fmt.Sprintf("  %d selected - press enter to continue", count)))
+		count := m.selectedCount()
+		if count == 0 {
+			s.WriteString(selectorDescStyle.Render("  Select at least one model."))
+		} else {
+			s.WriteString(selectorDescStyle.Render(fmt.Sprintf("  %d selected - press enter to continue", count)))
+		}
+		s.WriteString("\n\n")
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
 	}
-	s.WriteString("\n\n")
-
-	s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • enter confirm • esc cancel"))

 	result := s.String()
 	if m.width > 0 {
@@ -728,18 +797,28 @@ func SelectMultiple(title string, items []SelectItem, preChecked []string) ([]st
 	}

 	fm := finalModel.(multiSelectorModel)
-	if fm.cancelled {
+	if fm.cancelled || !fm.confirmed {
 		return nil, ErrCancelled
 	}

-	if !fm.confirmed {
-		return nil, ErrCancelled
+	// Single-add mode: prepend the picked model, keep existing models deduped
+	if fm.singleAdd != "" {
+		result := []string{fm.singleAdd}
+		for _, name := range preChecked {
+			if name != fm.singleAdd {
+				result = append(result, name)
+			}
+		}
+		return result, nil
 	}

-	var result []string
+	// Multi-edit mode: last checked is default (first in result)
+	last := fm.checkOrder[len(fm.checkOrder)-1]
+	result := []string{fm.items[last].Name}
 	for _, idx := range fm.checkOrder {
-		result = append(result, fm.items[idx].Name)
+		if idx != last {
+			result = append(result, fm.items[idx].Name)
+		}
 	}
-
 	return result, nil
 }
--- a/cmd/tui/selector_test.go
+++ b/cmd/tui/selector_test.go
@@ -382,6 +382,42 @@ func TestUpdateNavigation_Backspace(t *testing.T) {
 	}
 }

+// --- cursorForCurrent ---
+
+func TestCursorForCurrent(t *testing.T) {
+	testItems := []SelectItem{
+		{Name: "llama3.2", Recommended: true},
+		{Name: "qwen3:8b", Recommended: true},
+		{Name: "gemma3:latest"},
+		{Name: "deepseek-r1"},
+		{Name: "glm-5:cloud"},
+	}
+
+	tests := []struct {
+		name    string
+		current string
+		want    int
+	}{
+		{"empty current", "", 0},
+		{"exact match", "qwen3:8b", 1},
+		{"no match returns 0", "nonexistent", 0},
+		{"bare name matches with :latest suffix", "gemma3", 2},
+		{"full tag matches bare item", "llama3.2:latest", 0},
+		{"cloud model exact match", "glm-5:cloud", 4},
+		{"cloud model bare name", "glm-5", 4},
+		{"recommended item exact match", "llama3.2", 0},
+		{"recommended item with tag", "qwen3", 1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := cursorForCurrent(testItems, tt.current); got != tt.want {
+				t.Errorf("cursorForCurrent(%q) = %d, want %d", tt.current, got, tt.want)
+			}
+		})
+	}
+}
+
 // --- ReorderItems ---

 func TestReorderItems(t *testing.T) {
@@ -503,6 +539,7 @@ func TestMultiView_CursorIndicator(t *testing.T) {

 func TestMultiView_CheckedItemShowsX(t *testing.T) {
 	m := newMultiSelectorModel("Pick:", items("a", "b"), []string{"a"})
+	m.multi = true
 	content := m.View()

 	if !strings.Contains(content, "[x]") {
@@ -514,11 +551,18 @@ func TestMultiView_CheckedItemShowsX(t *testing.T) {
 }

 func TestMultiView_DefaultTag(t *testing.T) {
-	m := newMultiSelectorModel("Pick:", items("a", "b"), []string{"a"})
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"a", "b"})
+	m.multi = true
 	content := m.View()

 	if !strings.Contains(content, "(default)") {
-		t.Error("first checked item should have (default) tag")
+		t.Error("should have (default) tag")
+	}
+	// preChecked[0] ("a") should be the default (last in checkOrder)
+	aIdx := strings.Index(content, "a")
+	defaultIdx := strings.Index(content, "(default)")
+	if defaultIdx < aIdx {
+		t.Error("(default) tag should appear after 'a' (the current default)")
 	}
 }

@@ -545,6 +589,200 @@ func TestMultiView_OverflowIndicator(t *testing.T) {
 	}
 }

+// --- Multi-select space toggle (including KeyRunes fallback for Windows PowerShell) ---
+
+func TestMultiUpdate_SpaceTogglesItem(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
+	m.multi = true
+	m.cursor = 1
+
+	// Simulate space delivered as tea.KeySpace
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeySpace})
+	m = updated.(multiSelectorModel)
+
+	if !m.checked[1] {
+		t.Error("space (KeySpace) should toggle the item at cursor")
+	}
+	if m.filter != "" {
+		t.Error("space should not modify filter")
+	}
+}
+
+func TestMultiUpdate_SpaceRuneTogglesItem(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
+	m.multi = true
+	m.cursor = 1
+
+	// Simulate space delivered as tea.KeyRunes (Windows PowerShell behavior)
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{' '}})
+	m = updated.(multiSelectorModel)
+
+	if !m.checked[1] {
+		t.Error("space (KeyRunes) should toggle the item at cursor")
+	}
+	if m.filter != "" {
+		t.Error("space rune should not be added to filter")
+	}
+	if m.cursor != 1 {
+		t.Errorf("cursor should stay at 1, got %d", m.cursor)
+	}
+}
+
+// --- Single-add mode ---
+
+func TestMulti_StartsInSingleMode(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
+	if m.multi {
+		t.Error("should start in single mode (multi=false)")
+	}
+}
+
+func TestMulti_SingleModeNoCheckboxes(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
+	content := m.View()
+	if strings.Contains(content, "[x]") || strings.Contains(content, "[ ]") {
+		t.Error("single mode should not show checkboxes")
+	}
+	if !strings.Contains(content, "▸") {
+		t.Error("single mode should show cursor indicator")
+	}
+}
+
+func TestMulti_SingleModeEnterPicksItem(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
+	m.cursor = 1
+
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyEnter})
+	m = updated.(multiSelectorModel)
+
+	if m.singleAdd != "b" {
+		t.Errorf("enter in single mode should pick cursor item, got %q", m.singleAdd)
+	}
+	if !m.confirmed {
+		t.Error("should set confirmed")
+	}
+}
+
+func TestMulti_SingleModeSpaceIsNoop(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
+	m.cursor = 0
+
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeySpace})
+	m = updated.(multiSelectorModel)
+
+	if len(m.checked) != 0 {
+		t.Error("space in single mode should not toggle items")
+	}
+}
+
+func TestMulti_SingleModeSpaceRuneIsNoop(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
+	m.cursor = 0
+
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{' '}})
+	m = updated.(multiSelectorModel)
+
+	if len(m.checked) != 0 {
+		t.Error("space rune in single mode should not toggle items")
+	}
+	if m.filter != "" {
+		t.Error("space rune in single mode should not add to filter")
+	}
+}
+
+func TestMulti_TabTogglesMode(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
+
+	if m.multi {
+		t.Fatal("should start in single mode")
+	}
+
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyTab})
+	m = updated.(multiSelectorModel)
+	if !m.multi {
+		t.Error("tab should switch to multi mode")
+	}
+
+	updated, _ = m.Update(tea.KeyMsg{Type: tea.KeyTab})
+	m = updated.(multiSelectorModel)
+	if m.multi {
+		t.Error("tab should switch back to single mode")
+	}
+}
+
+func TestMulti_SingleModeHelpText(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a"), nil)
+	content := m.View()
+	if !strings.Contains(content, "tab add multiple") {
+		t.Error("single mode should show 'tab add multiple' in help")
+	}
+}
+
+func TestMulti_MultiModeHelpText(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a"), nil)
+	m.multi = true
+	content := m.View()
+	if !strings.Contains(content, "tab select single") {
+		t.Error("multi mode should show 'tab select single' in help")
+	}
+}
+
+// --- preChecked initialization order ---
+
+func TestMulti_PreCheckedDefaultIsLast(t *testing.T) {
+	// preChecked[0] ("a") is the current default and should end up
+	// last in checkOrder so it gets the (default) tag.
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"a", "b", "c"})
+
+	if len(m.checkOrder) != 3 {
+		t.Fatalf("expected 3 in checkOrder, got %d", len(m.checkOrder))
+	}
+	lastIdx := m.checkOrder[len(m.checkOrder)-1]
+	if m.items[lastIdx].Name != "a" {
+		t.Errorf("preChecked[0] should be last in checkOrder, got %q", m.items[lastIdx].Name)
+	}
+}
+
+func TestMulti_CursorOnDefaultModel(t *testing.T) {
+	// preChecked[0] ("b") is the default; cursor should start on it
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"b", "c"})
+
+	if m.cursor != 1 {
+		t.Errorf("cursor should be on preChecked[0] ('b') at index 1, got %d", m.cursor)
+	}
+}
+
+// --- Multi-mode last-checked is default ---
+
+func TestMulti_LastCheckedIsDefault(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("alpha", "beta", "gamma"), nil)
+	m.multi = true
+
+	// Check "alpha" then "gamma"
+	m.cursor = 0
+	m.toggleItem()
+	m.cursor = 2
+	m.toggleItem()
+
+	// Last checked ("gamma") should be at the end of checkOrder
+	lastIdx := m.checkOrder[len(m.checkOrder)-1]
+	if m.items[lastIdx].Name != "gamma" {
+		t.Errorf("last checked should be 'gamma', got %q", m.items[lastIdx].Name)
+	}
+
+	// The (default) tag renders based on checkOrder[len-1]
+	content := m.View()
+	if !strings.Contains(content, "(default)") {
+		t.Fatal("should show (default) tag")
+	}
+	// "alpha" line should NOT have the default tag
+	for _, line := range strings.Split(content, "\n") {
+		if strings.Contains(line, "alpha") && strings.Contains(line, "(default)") {
+			t.Error("'alpha' (first checked) should not have (default) tag")
+		}
+	}
+}
+
 // Key message helpers for testing

 type keyType = int
--- a/cmd/tui/tui.go
+++ b/cmd/tui/tui.go
@@ -131,7 +131,7 @@ type model struct {
 	signInURL       string
 	signInModel     string
 	signInSpinner   int
-	signInFromModal bool   // true if sign-in was triggered from modal (not main menu)
+	signInFromModal bool // true if sign-in was triggered from modal (not main menu)

 	width     int    // terminal width from WindowSizeMsg
 	statusMsg string // temporary status message shown near help text
@@ -209,7 +209,26 @@ func (m *model) openMultiModelModal(integration string) {
 }

 func isCloudModel(name string) bool {
-	return strings.HasSuffix(name, ":cloud")
+	return strings.HasSuffix(name, ":cloud") || strings.HasSuffix(name, "-cloud")
+}
+
+func cloudStatusDisabled(client *api.Client) bool {
+	status, err := client.CloudStatusExperimental(context.Background())
+	if err != nil {
+		return false
+	}
+	return status.Cloud.Disabled
+}
+
+func cloudModelDisabled(name string) bool {
+	if !isCloudModel(name) {
+		return false
+	}
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return false
+	}
+	return cloudStatusDisabled(client)
 }

 // checkCloudSignIn checks if a cloud model needs sign-in.
@@ -222,6 +241,9 @@ func (m *model) checkCloudSignIn(modelName string, fromModal bool) tea.Cmd {
 	if err != nil {
 		return nil
 	}
+	if cloudStatusDisabled(client) {
+		return nil
+	}
 	user, err := client.Whoami(context.Background())
 	if err == nil && user != nil && user.Name != "" {
 		return nil
@@ -272,7 +294,11 @@ func (m *model) loadAvailableModels() {
 	if err != nil {
 		return
 	}
+	cloudDisabled := cloudStatusDisabled(client)
 	for _, mdl := range models.Models {
+		if cloudDisabled && mdl.RemoteModel != "" {
+			continue
+		}
 		m.availableModels[mdl.Name] = true
 	}
 }
@@ -403,8 +429,24 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			}
 			if m.multiModalSelector.confirmed {
 				var selected []string
-				for _, idx := range m.multiModalSelector.checkOrder {
-					selected = append(selected, m.multiModalSelector.items[idx].Name)
+				if m.multiModalSelector.singleAdd != "" {
+					// Single-add mode: prepend picked model, keep existing deduped
+					selected = []string{m.multiModalSelector.singleAdd}
+					for _, name := range config.IntegrationModels(m.items[m.cursor].integration) {
+						if name != m.multiModalSelector.singleAdd {
+							selected = append(selected, name)
+						}
+					}
+				} else {
+					// Last checked is default (first in result)
+					co := m.multiModalSelector.checkOrder
+					last := co[len(co)-1]
+					selected = []string{m.multiModalSelector.items[last].Name}
+					for _, idx := range co {
+						if idx != last {
+							selected = append(selected, m.multiModalSelector.items[idx].Name)
+						}
+					}
 				}
 				if len(selected) > 0 {
 					m.changeModels = selected
@@ -482,7 +524,7 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		case "enter", " ":
 			item := m.items[m.cursor]

-			if item.integration != "" && !config.IsIntegrationInstalled(item.integration) {
+			if item.integration != "" && !config.IsIntegrationInstalled(item.integration) && !config.AutoInstallable(item.integration) {
 				return m, nil
 			}

@@ -496,6 +538,15 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 				return m, cmd
 			}

+			if configuredModel != "" && isCloudModel(configuredModel) && cloudModelDisabled(configuredModel) {
+				if item.integration != "" && config.IsEditorIntegration(item.integration) {
+					m.openMultiModelModal(item.integration)
+				} else {
+					m.openModelModal(configuredModel)
+				}
+				return m, nil
+			}
+
 			m.selected = true
 			m.quitting = true
 			return m, tea.Quit
@@ -504,6 +555,12 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			item := m.items[m.cursor]
 			if item.integration != "" || item.isRunModel {
 				if item.integration != "" && !config.IsIntegrationInstalled(item.integration) {
+					if config.AutoInstallable(item.integration) {
+						// Auto-installable: select to trigger install flow
+						m.selected = true
+						m.quitting = true
+						return m, tea.Quit
+					}
 					return m, nil
 				}
 				if item.integration != "" && config.IsEditorIntegration(item.integration) {
@@ -567,7 +624,11 @@ func (m model) View() string {
 		var modelSuffix string
 		if item.integration != "" {
 			if !isInstalled {
-				title += " " + notInstalledStyle.Render("(not installed)")
+				if config.AutoInstallable(item.integration) {
+					title += " " + notInstalledStyle.Render("(install)")
+				} else {
+					title += " " + notInstalledStyle.Render("(not installed)")
+				}
 			} else if m.cursor == i {
 				if mdl := config.IntegrationModel(item.integration); mdl != "" && m.modelExists(mdl) {
 					modelSuffix = " " + modelStyle.Render("("+mdl+")")
@@ -583,7 +644,9 @@ func (m model) View() string {

 		desc := item.description
 		if !isInstalled && item.integration != "" && m.cursor == i {
-			if hint := config.IntegrationInstallHint(item.integration); hint != "" {
+			if config.AutoInstallable(item.integration) {
+				desc = "Press enter to install"
+			} else if hint := config.IntegrationInstallHint(item.integration); hint != "" {
 				desc = hint
 			} else {
 				desc = "not installed"
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -257,10 +257,11 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 	if err != nil {
 		return nil, nil, err
 	}
+	bts = sanitizeNonFiniteJSON(bts)

 	var p ModelParameters
 	if err := json.Unmarshal(bts, &p); err != nil {
-		return nil, nil, err
+		return nil, nil, fmt.Errorf("parse config.json: %w", err)
 	}

 	if len(p.Architectures) < 1 {
@@ -315,16 +316,20 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &glm4MoeLiteModel{}
 	case "GlmOcrForConditionalGeneration":
 		conv = &glmOcrModel{}
-	case "Lfm2ForCausalLM":
+	case "Lfm2ForCausalLM", "Lfm2MoeForCausalLM":
 		conv = &lfm2Model{}
-	case "Qwen3NextForCausalLM":
+	case "Lfm2VlForConditionalGeneration":
+		conv = &lfm2VLTextModel{}
+	case "Qwen3NextForCausalLM", "Qwen3_5ForConditionalGeneration", "Qwen3_5MoeForConditionalGeneration":
 		conv = &qwen3NextModel{}
+	case "NemotronHForCausalLM":
+		conv = &nemotronHModel{}
 	default:
 		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}

 	if err := json.Unmarshal(bts, conv); err != nil {
-		return nil, nil, err
+		return nil, nil, fmt.Errorf("parse config.json for %q: %w", p.Architectures[0], err)
 	}

 	if t, ok := conv.(moreParser); ok {
--- a/convert/convert_lfm2.go
+++ b/convert/convert_lfm2.go
@@ -1,6 +1,8 @@
 package convert

 import (
+	"cmp"
+	"fmt"
 	"slices"
 	"strings"

@@ -13,42 +15,149 @@ type lfm2Model struct {
 	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
 	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
 	IntermediateSize      uint32   `json:"intermediate_size"`
+	BlockFFDim            uint32   `json:"block_ff_dim"`
+	BlockMultipleOf       uint32   `json:"block_multiple_of"`
+	BlockAutoAdjustFFDim  bool     `json:"block_auto_adjust_ff_dim"`
+	BlockFFNDimMultiplier float32  `json:"block_ffn_dim_multiplier"`
 	NumAttentionHeads     uint32   `json:"num_attention_heads"`
 	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
 	RopeTheta             float32  `json:"rope_theta"`
 	NormEps               float32  `json:"norm_eps"`
 	ConvLCache            uint32   `json:"conv_L_cache"`
+	MoEIntermediateSize   uint32   `json:"moe_intermediate_size"`
+	NumExperts            uint32   `json:"num_experts"`
+	NumLocalExperts       uint32   `json:"num_local_experts"`
+	NumExpertsPerToken    uint32   `json:"num_experts_per_tok"`
+	NumDenseLayers        uint32   `json:"num_dense_layers"`
+	RoutedScalingFactor   float32  `json:"routed_scaling_factor"`
 	LayerTypes            []string `json:"layer_types"`
 	TieEmbedding          bool     `json:"tie_embedding"`
+	RopeParameters        struct {
+		RopeTheta float32 `json:"rope_theta"`
+	} `json:"rope_parameters"`
 }

 var _ ModelConverter = (*lfm2Model)(nil)

+const (
+	defaultMaxPositionEmbeddings = uint32(128_000)
+	fallbackContextLength        = uint32(32_768)
+)
+
+func (p *lfm2Model) isMoE() bool {
+	return p.ModelType == "lfm2_moe" || p.expertCount() > 0
+}
+
+func (p *lfm2Model) ropeFreqBase() float32 {
+	if p.RopeTheta != 0 {
+		return p.RopeTheta
+	}
+
+	return p.RopeParameters.RopeTheta
+}
+
+func (p *lfm2Model) expertCount() uint32 {
+	if p.NumLocalExperts > 0 {
+		return p.NumLocalExperts
+	}
+	return p.NumExperts
+}
+
+func (p *lfm2Model) feedForwardLength() uint32 {
+	ff := p.IntermediateSize
+	if p.BlockFFDim != 0 {
+		ff = p.BlockFFDim
+	}
+
+	if !p.BlockAutoAdjustFFDim || p.BlockMultipleOf == 0 {
+		return ff
+	}
+
+	ff = (2 * ff) / 3
+
+	// Keep default multiplier behavior consistent with llama.cpp conversion.
+	if p.BlockFFNDimMultiplier != 0 {
+		ff = uint32(float32(ff) * p.BlockFFNDimMultiplier)
+	}
+
+	m := p.BlockMultipleOf
+	return m * ((ff + m - 1) / m)
+}
+
+func (p *lfm2Model) hasKnownContextLengthFallbackSignature() bool {
+	return p.isMoE() &&
+		p.VocabSize == 65536 &&
+		p.HiddenSize == 2048 &&
+		p.NumHiddenLayers == 40 &&
+		p.IntermediateSize == 11776 &&
+		p.NumAttentionHeads == 32 &&
+		p.NumKeyValueHeads == 8 &&
+		p.NumDenseLayers == 2 &&
+		p.expertCount() == 64 &&
+		p.NumExpertsPerToken == 4 &&
+		p.MoEIntermediateSize == 1536
+}
+
+func (p *lfm2Model) contextLength() uint32 {
+	if p.MaxPositionEmbeddings == defaultMaxPositionEmbeddings && p.hasKnownContextLengthFallbackSignature() {
+		return fallbackContextLength
+	}
+
+	return p.MaxPositionEmbeddings
+}
+
 func (p *lfm2Model) KV(t *Tokenizer) KV {
+	architecture := "lfm2"
+	if p.isMoE() {
+		architecture = "lfm2moe"
+	}
+
 	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "lfm2"
-	kv["lfm2.vocab_size"] = p.VocabSize
-	kv["lfm2.block_count"] = p.NumHiddenLayers
-	kv["lfm2.embedding_length"] = p.HiddenSize
-	kv["lfm2.feed_forward_length"] = p.IntermediateSize
-	kv["lfm2.context_length"] = p.MaxPositionEmbeddings
+	kv["general.architecture"] = architecture
+	kv["tokenizer.ggml.pre"] = "lfm2"
+	kv["vocab_size"] = p.VocabSize
+	kv["block_count"] = p.NumHiddenLayers
+	kv["embedding_length"] = p.HiddenSize
+	kv["feed_forward_length"] = p.feedForwardLength()
+	kv["context_length"] = p.contextLength()

 	// Build per-layer KV head count array based on layer_types
-	// (0 = shortconv layer, non-zero = attention layer with that many KV heads)
+	// (0 = shortconv layer, non-zero = attention layer with that many KV heads).
+	//
+	// Dense LFM2 in HF defaults to all attention layers when layer_types is absent.
+	// Preserve that behavior to avoid accidentally emitting all-conv metadata.
 	kvHeadCounts := make([]uint32, p.NumHiddenLayers)
-	for i := range p.NumHiddenLayers {
-		if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
+	if len(p.LayerTypes) == 0 {
+		for i := range p.NumHiddenLayers {
 			kvHeadCounts[i] = p.NumKeyValueHeads
 		}
+	} else {
+		for i := range p.NumHiddenLayers {
+			if int(i) < len(p.LayerTypes) && p.LayerTypes[i] == "full_attention" {
+				kvHeadCounts[i] = p.NumKeyValueHeads
+			}
+		}
 	}

-	kv["lfm2.attention.head_count"] = p.NumAttentionHeads
-	kv["lfm2.attention.head_count_kv"] = kvHeadCounts
-	kv["lfm2.attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
-	kv["lfm2.attention.layer_norm_rms_epsilon"] = p.NormEps
-	kv["lfm2.rope.freq_base"] = p.RopeTheta
-	kv["lfm2.shortconv.l_cache"] = p.ConvLCache
+	kv["attention.head_count"] = p.NumAttentionHeads
+	kv["attention.head_count_kv"] = kvHeadCounts
+	kv["attention.key_length"] = p.HiddenSize / p.NumAttentionHeads
+	kv["attention.value_length"] = p.HiddenSize / p.NumAttentionHeads
+	kv["attention.layer_norm_rms_epsilon"] = p.NormEps
+	kv["shortconv.l_cache"] = p.ConvLCache
+
+	if ropeFreqBase := p.ropeFreqBase(); ropeFreqBase != 0 {
+		kv["rope.freq_base"] = ropeFreqBase
+	}
+
+	if p.isMoE() {
+		kv["expert_count"] = p.expertCount()
+		kv["expert_used_count"] = p.NumExpertsPerToken
+		kv["expert_feed_forward_length"] = p.MoEIntermediateSize
+		kv["leading_dense_block_count"] = p.NumDenseLayers
+		kv["expert_gating_func"] = uint32(2) // sigmoid
+		kv["expert_weights_scale"] = cmp.Or(p.RoutedScalingFactor, float32(1.0))
+	}

 	return kv
 }
@@ -56,6 +165,30 @@ func (p *lfm2Model) KV(t *Tokenizer) KV {
 func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
 	var out []*ggml.Tensor

+	if p.isMoE() {
+		merges := make([]merge, 0, p.NumHiddenLayers*3)
+		for i := range p.NumHiddenLayers {
+			if i < p.NumDenseLayers {
+				continue
+			}
+
+			merges = append(merges, merge{
+				fmt.Sprintf("blk.%d.feed_forward.experts.*.w1.weight", i),
+				fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
+			}, merge{
+				fmt.Sprintf("blk.%d.feed_forward.experts.*.w2.weight", i),
+				fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
+			}, merge{
+				fmt.Sprintf("blk.%d.feed_forward.experts.*.w3.weight", i),
+				fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
+			})
+		}
+
+		merged, remaining := mergeTensors(ts, merges...)
+		out = append(out, merged...)
+		ts = remaining
+	}
+
 	for _, t := range ts {
 		shape := t.Shape()

@@ -80,7 +213,7 @@ func (p *lfm2Model) Tensors(ts []Tensor) []*ggml.Tensor {
 func (p *lfm2Model) Replacements() []string {
 	return []string{
 		"model.embed_tokens", "token_embd",
-		"model.embedding_norm", "output_norm",
+		"model.embedding_norm", "token_embd_norm",
 		"model.layers", "blk",
 		"operator_norm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
@@ -92,6 +225,8 @@ func (p *lfm2Model) Replacements() []string {
 		"conv.conv", "shortconv.conv",
 		"conv.in_proj", "shortconv.in_proj",
 		"conv.out_proj", "shortconv.out_proj",
+		"feed_forward.gate", "ffn_gate_inp",
+		"feed_forward.expert_bias", "exp_probs_b.bias",
 		"feed_forward.w1", "ffn_gate",
 		"feed_forward.w2", "ffn_down",
 		"feed_forward.w3", "ffn_up",
--- a/convert/convert_lfm2_test.go
+++ b/convert/convert_lfm2_test.go
@@ -0,0 +1,271 @@
+package convert
+
+import (
+	"io"
+	"slices"
+	"strings"
+	"testing"
+)
+
+type lfm2StubTensor struct {
+	tensorBase
+}
+
+func newLFM2StubTensor(name string, shape []uint64) *lfm2StubTensor {
+	return &lfm2StubTensor{
+		tensorBase: tensorBase{
+			name:  name,
+			shape: shape,
+		},
+	}
+}
+
+func (t *lfm2StubTensor) WriteTo(io.Writer) (int64, error) {
+	return 0, nil
+}
+
+func (t *lfm2StubTensor) Clone() Tensor {
+	return &lfm2StubTensor{
+		tensorBase: tensorBase{
+			name:  t.name,
+			shape: slices.Clone(t.shape),
+		},
+	}
+}
+
+func TestLFM2MoEKV(t *testing.T) {
+	var p lfm2Model
+	p.ModelParameters.ModelType = "lfm2_moe"
+	p.VocabSize = 65536
+	p.HiddenSize = 2048
+	p.NumHiddenLayers = 4
+	p.MaxPositionEmbeddings = 128000
+	p.IntermediateSize = 11776
+	p.NumAttentionHeads = 32
+	p.NumKeyValueHeads = 8
+	p.LayerTypes = []string{"conv", "full_attention", "conv", "full_attention"}
+	p.NormEps = 1e-5
+	p.ConvLCache = 3
+	p.MoEIntermediateSize = 1536
+	p.NumExperts = 64
+	p.NumExpertsPerToken = 4
+	p.NumDenseLayers = 2
+	p.RopeParameters.RopeTheta = 1_000_000
+
+	kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
+
+	if got, want := kv["general.architecture"], "lfm2moe"; got != want {
+		t.Fatalf("general.architecture = %v, want %v", got, want)
+	}
+	if got, want := kv["tokenizer.ggml.pre"], "lfm2"; got != want {
+		t.Fatalf("tokenizer.ggml.pre = %v, want %v", got, want)
+	}
+
+	if got, want := kv["expert_count"], uint32(64); got != want {
+		t.Fatalf("expert_count = %v, want %v", got, want)
+	}
+
+	if got, want := kv["expert_used_count"], uint32(4); got != want {
+		t.Fatalf("expert_used_count = %v, want %v", got, want)
+	}
+
+	if got, want := kv["expert_feed_forward_length"], uint32(1536); got != want {
+		t.Fatalf("expert_feed_forward_length = %v, want %v", got, want)
+	}
+
+	if got, want := kv["leading_dense_block_count"], uint32(2); got != want {
+		t.Fatalf("leading_dense_block_count = %v, want %v", got, want)
+	}
+
+	if got, want := kv["expert_gating_func"], uint32(2); got != want {
+		t.Fatalf("expert_gating_func = %v, want %v", got, want)
+	}
+
+	gotHeadCounts, ok := kv["attention.head_count_kv"].([]uint32)
+	if !ok {
+		t.Fatalf("attention.head_count_kv has unexpected type %T", kv["attention.head_count_kv"])
+	}
+
+	wantHeadCounts := []uint32{0, 8, 0, 8}
+	if !slices.Equal(gotHeadCounts, wantHeadCounts) {
+		t.Fatalf("attention.head_count_kv = %v, want %v", gotHeadCounts, wantHeadCounts)
+	}
+
+	if got, want := kv["rope.freq_base"], float32(1_000_000); got != want {
+		t.Fatalf("rope.freq_base = %v, want %v", got, want)
+	}
+}
+
+func TestLFM2DenseKV(t *testing.T) {
+	p := lfm2Model{
+		ModelParameters:       ModelParameters{ModelType: "lfm2", VocabSize: 32000},
+		HiddenSize:            1024,
+		NumHiddenLayers:       2,
+		MaxPositionEmbeddings: 32768,
+		IntermediateSize:      4096,
+		NumAttentionHeads:     16,
+		NumKeyValueHeads:      4,
+		LayerTypes:            []string{"conv", "full_attention"},
+		NormEps:               1e-5,
+		ConvLCache:            3,
+		RopeTheta:             10000,
+	}
+
+	kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
+
+	if got, want := kv["general.architecture"], "lfm2"; got != want {
+		t.Fatalf("general.architecture = %v, want %v", got, want)
+	}
+	if got, want := kv["tokenizer.ggml.pre"], "lfm2"; got != want {
+		t.Fatalf("tokenizer.ggml.pre = %v, want %v", got, want)
+	}
+
+	if _, ok := kv["expert_count"]; ok {
+		t.Fatalf("expert_count should not be set for dense lfm2")
+	}
+}
+
+func TestLFM2MoETensors(t *testing.T) {
+	p := lfm2Model{
+		ModelParameters: ModelParameters{ModelType: "lfm2_moe"},
+		NumHiddenLayers: 4,
+		NumDenseLayers:  2,
+	}
+
+	in := []Tensor{
+		newLFM2StubTensor("blk.2.feed_forward.experts.0.w1.weight", []uint64{1536, 2048}),
+		newLFM2StubTensor("blk.2.feed_forward.experts.1.w1.weight", []uint64{1536, 2048}),
+		newLFM2StubTensor("blk.2.feed_forward.experts.0.w2.weight", []uint64{2048, 1536}),
+		newLFM2StubTensor("blk.2.feed_forward.experts.1.w2.weight", []uint64{2048, 1536}),
+		newLFM2StubTensor("blk.2.feed_forward.experts.0.w3.weight", []uint64{1536, 2048}),
+		newLFM2StubTensor("blk.2.feed_forward.experts.1.w3.weight", []uint64{1536, 2048}),
+		newLFM2StubTensor("blk.0.shortconv.conv.weight", []uint64{2048, 1, 3}),
+	}
+
+	out := p.Tensors(in)
+
+	byName := make(map[string][]uint64, len(out))
+	for _, tns := range out {
+		byName[tns.Name] = tns.Shape
+	}
+
+	if got, ok := byName["blk.2.ffn_gate_exps.weight"]; !ok {
+		t.Fatalf("missing merged tensor blk.2.ffn_gate_exps.weight")
+	} else if !slices.Equal(got, []uint64{2, 1536, 2048}) {
+		t.Fatalf("blk.2.ffn_gate_exps.weight shape = %v, want [2 1536 2048]", got)
+	}
+
+	if got, ok := byName["blk.2.ffn_down_exps.weight"]; !ok {
+		t.Fatalf("missing merged tensor blk.2.ffn_down_exps.weight")
+	} else if !slices.Equal(got, []uint64{2, 2048, 1536}) {
+		t.Fatalf("blk.2.ffn_down_exps.weight shape = %v, want [2 2048 1536]", got)
+	}
+
+	if got, ok := byName["blk.2.ffn_up_exps.weight"]; !ok {
+		t.Fatalf("missing merged tensor blk.2.ffn_up_exps.weight")
+	} else if !slices.Equal(got, []uint64{2, 1536, 2048}) {
+		t.Fatalf("blk.2.ffn_up_exps.weight shape = %v, want [2 1536 2048]", got)
+	}
+
+	if got, ok := byName["blk.0.shortconv.conv.weight"]; !ok {
+		t.Fatalf("missing shortconv tensor")
+	} else if !slices.Equal(got, []uint64{2048, 3}) {
+		t.Fatalf("blk.0.shortconv.conv.weight shape = %v, want [2048 3]", got)
+	}
+
+	if _, ok := byName["blk.2.feed_forward.experts.0.w1.weight"]; ok {
+		t.Fatalf("unmerged expert tensor should not be present")
+	}
+}
+
+func TestLFM2MoEReplacements(t *testing.T) {
+	p := lfm2Model{}
+	replacer := strings.NewReplacer(p.Replacements()...)
+
+	if got, want := replacer.Replace("model.layers.2.feed_forward.expert_bias"), "blk.2.exp_probs_b.bias"; got != want {
+		t.Fatalf("expert bias replacement = %q, want %q", got, want)
+	}
+
+	if got, want := replacer.Replace("model.layers.2.feed_forward.gate.weight"), "blk.2.ffn_gate_inp.weight"; got != want {
+		t.Fatalf("gate replacement = %q, want %q", got, want)
+	}
+}
+
+func TestLFM2KVContextLengthEdgeCaseFallbackOverride(t *testing.T) {
+	p := lfm2Model{
+		ModelParameters:       ModelParameters{ModelType: "lfm2_moe", VocabSize: 65536},
+		HiddenSize:            2048,
+		NumHiddenLayers:       40,
+		MaxPositionEmbeddings: 128000,
+		IntermediateSize:      11776,
+		NumAttentionHeads:     32,
+		NumKeyValueHeads:      8,
+		LayerTypes:            make([]string, 40),
+		NormEps:               1e-5,
+		ConvLCache:            3,
+		MoEIntermediateSize:   1536,
+		NumExperts:            64,
+		NumExpertsPerToken:    4,
+		NumDenseLayers:        2,
+	}
+	for i := 0; i < len(p.LayerTypes); i++ {
+		p.LayerTypes[i] = "conv"
+	}
+	p.LayerTypes[2] = "full_attention"
+
+	kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
+
+	if got, want := kv["context_length"], uint32(32768); got != want {
+		t.Fatalf("context_length = %v, want %v", got, want)
+	}
+}
+
+func TestLFM2KVContextLengthNoOverride(t *testing.T) {
+	p := lfm2Model{
+		ModelParameters:       ModelParameters{ModelType: "lfm2_moe", VocabSize: 65536},
+		HiddenSize:            2048,
+		NumHiddenLayers:       39, // mismatch: should not trigger edge case
+		MaxPositionEmbeddings: 128000,
+		IntermediateSize:      11776,
+		NumAttentionHeads:     32,
+		NumKeyValueHeads:      8,
+		LayerTypes:            []string{"conv", "full_attention"},
+		NormEps:               1e-5,
+		ConvLCache:            3,
+		MoEIntermediateSize:   1536,
+		NumExperts:            64,
+		NumExpertsPerToken:    4,
+		NumDenseLayers:        2,
+	}
+
+	kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
+
+	if got, want := kv["context_length"], uint32(128000); got != want {
+		t.Fatalf("context_length = %v, want %v", got, want)
+	}
+}
+
+func TestLFM2KVFeedForwardLengthAutoAdjust(t *testing.T) {
+	p := lfm2Model{
+		ModelParameters:       ModelParameters{ModelType: "lfm2", VocabSize: 65536},
+		HiddenSize:            2048,
+		NumHiddenLayers:       16,
+		MaxPositionEmbeddings: 128000,
+		IntermediateSize:      12288, // should be ignored when block_ff_dim is set
+		BlockFFDim:            12288,
+		BlockAutoAdjustFFDim:  true,
+		BlockMultipleOf:       256,
+		BlockFFNDimMultiplier: 1.0,
+		NumAttentionHeads:     32,
+		NumKeyValueHeads:      8,
+		LayerTypes:            []string{"conv", "full_attention"},
+		NormEps:               1e-5,
+		ConvLCache:            3,
+	}
+
+	kv := p.KV(&Tokenizer{Vocabulary: &Vocabulary{Model: "gpt2"}})
+
+	if got, want := kv["feed_forward_length"], uint32(8192); got != want {
+		t.Fatalf("feed_forward_length = %v, want %v", got, want)
+	}
+}
--- a/convert/convert_lfm2_vl.go
+++ b/convert/convert_lfm2_vl.go
@@ -0,0 +1,417 @@
+package convert
+
+import (
+	"cmp"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io/fs"
+	"slices"
+	"strings"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+// lfm2VLTextModel converts the language model component of LFM2 VL checkpoints.
+type lfm2VLTextModel struct {
+	TextConfig            lfm2Model `json:"text_config"`
+	DoImageSplitting      *bool     `json:"do_image_splitting"`
+	DownsampleFactor      uint32    `json:"downsample_factor"`
+	EncoderPatchSize      uint32    `json:"encoder_patch_size"`
+	ImageTokenID          uint32    `json:"image_token_id"`
+	MaxImageTokens        uint32    `json:"max_image_tokens"`
+	MinImageTokens        uint32    `json:"min_image_tokens"`
+	MaxTiles              uint32    `json:"max_tiles"`
+	MinTiles              uint32    `json:"min_tiles"`
+	TileSize              uint32    `json:"tile_size"`
+	MaxPixelsTolerance    float32   `json:"max_pixels_tolerance"`
+	ProjectorUseLayernorm bool      `json:"projector_use_layernorm"`
+	ProjectorHiddenSize   uint32    `json:"projector_hidden_size"`
+	ProjectorHiddenAct    string    `json:"projector_hidden_act"`
+	UseImageSpecialTokens *bool     `json:"use_image_special_tokens"`
+	UseThumbnail          *bool     `json:"use_thumbnail"`
+	VisionConfig          struct {
+		HiddenSize        uint32  `json:"hidden_size"`
+		IntermediateSize  uint32  `json:"intermediate_size"`
+		NumAttentionHeads uint32  `json:"num_attention_heads"`
+		NumHiddenLayers   uint32  `json:"num_hidden_layers"`
+		NumChannels       uint32  `json:"num_channels"`
+		PatchSize         uint32  `json:"patch_size"`
+		LayerNormEpsilon  float32 `json:"layer_norm_eps"`
+	} `json:"vision_config"`
+	Processor struct {
+		ImageProcessor struct {
+			DoImageSplitting *bool     `json:"do_image_splitting"`
+			DownsampleFactor uint32    `json:"downsample_factor"`
+			MaxImageTokens   uint32    `json:"max_image_tokens"`
+			MinImageTokens   uint32    `json:"min_image_tokens"`
+			MaxTiles         uint32    `json:"max_tiles"`
+			MinTiles         uint32    `json:"min_tiles"`
+			MaxPixelsTol     float32   `json:"max_pixels_tolerance"`
+			TileSize         uint32    `json:"tile_size"`
+			UseThumbnail     *bool     `json:"use_thumbnail"`
+			ImageMean        []float32 `json:"image_mean"`
+			ImageStd         []float32 `json:"image_std"`
+			Size             struct {
+				Height uint32 `json:"height"`
+				Width  uint32 `json:"width"`
+			} `json:"size"`
+		} `json:"image_processor"`
+	}
+}
+
+func (p *lfm2VLTextModel) textModel() *lfm2Model {
+	return &p.TextConfig
+}
+
+func (p *lfm2VLTextModel) specialTokenTypes() []string {
+	return p.textModel().specialTokenTypes()
+}
+
+func (p *lfm2VLTextModel) parseMore(fsys fs.FS) error {
+	bts, err := fs.ReadFile(fsys, "processor_config.json")
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+
+	return json.Unmarshal(bts, &p.Processor)
+}
+
+func (p *lfm2VLTextModel) visionImageSize() uint32 {
+	// LFM2-VL image processor operates on 512 tiles and downsamples by factor 2
+	// before projection. Keep a fixed square image size compatible with position
+	// embeddings and the simplified runtime image pipeline.
+	tile := cmp.Or(
+		p.Processor.ImageProcessor.TileSize,
+		p.Processor.ImageProcessor.Size.Height,
+		p.Processor.ImageProcessor.Size.Width,
+		uint32(512),
+	)
+	downsample := cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
+	if downsample == 0 {
+		return tile
+	}
+
+	return max(uint32(1), tile/downsample)
+}
+
+func (p *lfm2VLTextModel) KV(t *Tokenizer) KV {
+	kv := p.textModel().KV(t)
+
+	boolOr := func(defaultValue bool, values ...*bool) bool {
+		for _, v := range values {
+			if v != nil {
+				return *v
+			}
+		}
+		return defaultValue
+	}
+
+	kv["vision.block_count"] = cmp.Or(p.VisionConfig.NumHiddenLayers, uint32(27))
+	kv["vision.embedding_length"] = cmp.Or(p.VisionConfig.HiddenSize, uint32(1152))
+	kv["vision.feed_forward_length"] = cmp.Or(p.VisionConfig.IntermediateSize, uint32(4304))
+	kv["vision.attention.head_count"] = cmp.Or(p.VisionConfig.NumAttentionHeads, uint32(16))
+	kv["vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionConfig.LayerNormEpsilon, float32(1e-6))
+	kv["vision.patch_size"] = cmp.Or(p.VisionConfig.PatchSize, p.EncoderPatchSize, uint32(16))
+	kv["vision.num_channels"] = cmp.Or(p.VisionConfig.NumChannels, uint32(3))
+	kv["vision.image_size"] = p.visionImageSize()
+	kv["vision.projector.scale_factor"] = cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
+	kv["vision.projector.use_layernorm"] = p.ProjectorUseLayernorm
+	kv["vision.do_image_splitting"] = boolOr(true, p.DoImageSplitting, p.Processor.ImageProcessor.DoImageSplitting)
+	kv["vision.min_tiles"] = cmp.Or(p.MinTiles, p.Processor.ImageProcessor.MinTiles, uint32(2))
+	kv["vision.max_tiles"] = cmp.Or(p.MaxTiles, p.Processor.ImageProcessor.MaxTiles, uint32(10))
+	kv["vision.tile_size"] = cmp.Or(p.TileSize, p.Processor.ImageProcessor.TileSize, uint32(512))
+	kv["vision.min_image_tokens"] = cmp.Or(p.MinImageTokens, p.Processor.ImageProcessor.MinImageTokens, uint32(64))
+	kv["vision.max_image_tokens"] = cmp.Or(p.MaxImageTokens, p.Processor.ImageProcessor.MaxImageTokens, uint32(256))
+	kv["vision.max_pixels_tolerance"] = cmp.Or(p.MaxPixelsTolerance, p.Processor.ImageProcessor.MaxPixelsTol, float32(2.0))
+	kv["vision.use_thumbnail"] = boolOr(true, p.UseThumbnail, p.Processor.ImageProcessor.UseThumbnail)
+	kv["vision.use_image_special_tokens"] = boolOr(true, p.UseImageSpecialTokens)
+	kv["vision.image_mean"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageMean, []float32{0.5, 0.5, 0.5}))
+	kv["vision.image_std"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageStd, []float32{0.5, 0.5, 0.5}))
+	kv["vision.image_token_id"] = cmp.Or(p.ImageTokenID, uint32(396))
+
+	setVisionTokenID := func(k, token string) {
+		if t == nil || t.Vocabulary == nil {
+			return
+		}
+		for i, v := range t.Vocabulary.Tokens {
+			if v == token {
+				kv[k] = uint32(i)
+				return
+			}
+		}
+	}
+	setVisionTokenID("vision.image_start_token_id", "<|image_start|>")
+	setVisionTokenID("vision.image_end_token_id", "<|image_end|>")
+	setVisionTokenID("vision.image_thumbnail_token_id", "<|img_thumbnail|>")
+
+	return kv
+}
+
+func (p *lfm2VLTextModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	patchSize := int(cmp.Or(p.VisionConfig.PatchSize, p.EncoderPatchSize, uint32(16)))
+	numChannels := int(cmp.Or(p.VisionConfig.NumChannels, uint32(3)))
+
+	for _, t := range ts {
+		if t.Name() == "v.patch_embd.weight" {
+			shape := t.Shape()
+			if len(shape) == 2 {
+				inputDim := uint64(numChannels * patchSize * patchSize)
+				if shape[1] == inputDim {
+					channels := numChannels
+					patch := patchSize
+					t.SetRepacker(func(_ string, data []float32, srcShape []uint64) ([]float32, error) {
+						return repackPatchEmbeddingWeight(data, srcShape, channels, patch)
+					})
+				}
+			}
+		}
+	}
+
+	out := p.textModel().Tensors(ts)
+	for _, t := range out {
+		if t.Name == "v.patch_embd.weight" && len(t.Shape) == 2 {
+			t.Shape = []uint64{t.Shape[0], uint64(numChannels), uint64(patchSize), uint64(patchSize)}
+		}
+	}
+	return out
+}
+
+func (p *lfm2VLTextModel) Replacements() []string {
+	out := make([]string, 0, 96)
+
+	addText := func(from, to string) {
+		out = append(out, from, to)
+		if strings.HasPrefix(from, "model.") {
+			suffix := strings.TrimPrefix(from, "model.")
+			out = append(out,
+				"model.language_model."+suffix, to,
+				"model.language_model.model."+suffix, to,
+			)
+		}
+	}
+
+	base := p.textModel().Replacements()
+	for i := 0; i+1 < len(base); i += 2 {
+		addText(base[i], base[i+1])
+	}
+
+	// Vision tower + multimodal projector tensors (single-file conversion).
+	out = append(out,
+		"model.vision_tower.vision_model.embeddings.patch_embedding", "v.patch_embd",
+		"model.vision_tower.vision_model.embeddings.position_embedding", "v.position_embd",
+		"model.vision_tower.vision_model.encoder.layers", "v.blk",
+		"model.vision_tower.vision_model.post_layernorm", "v.post_ln",
+		"model.multi_modal_projector.layer_norm", "mm.layer_norm",
+		"model.multi_modal_projector.linear_1", "mm.1",
+		"model.multi_modal_projector.linear_2", "mm.2",
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.out_proj", "attn_out",
+		"layer_norm1", "ln1",
+		"layer_norm2", "ln2",
+		"mlp.fc1", "ffn_up",
+		"mlp.fc2", "ffn_down",
+	)
+
+	return out
+}
+
+// lfm2VLProjectorModel converts the vision encoder + projector component of LFM2 VL checkpoints.
+type lfm2VLProjectorModel struct {
+	ModelParameters
+	DownsampleFactor   uint32 `json:"downsample_factor"`
+	ProjectorHiddenDim uint32 `json:"projector_hidden_size"`
+	VisionModel        struct {
+		HiddenSize        uint32  `json:"hidden_size"`
+		IntermediateSize  uint32  `json:"intermediate_size"`
+		NumAttentionHeads uint32  `json:"num_attention_heads"`
+		NumHiddenLayers   uint32  `json:"num_hidden_layers"`
+		NumChannels       uint32  `json:"num_channels"`
+		PatchSize         uint32  `json:"patch_size"`
+		LayerNormEpsilon  float32 `json:"layer_norm_eps"`
+		ImageSize         uint32  `json:"image_size"`
+	} `json:"vision_config"`
+	Processor struct {
+		ImageProcessor struct {
+			DownsampleFactor uint32    `json:"downsample_factor"`
+			TileSize         uint32    `json:"tile_size"`
+			ImageMean        []float32 `json:"image_mean"`
+			ImageStd         []float32 `json:"image_std"`
+			Size             struct {
+				Height uint32 `json:"height"`
+				Width  uint32 `json:"width"`
+			} `json:"size"`
+		} `json:"image_processor"`
+	}
+}
+
+var (
+	_ ModelConverter = (*lfm2VLTextModel)(nil)
+	_ ModelConverter = (*lfm2VLProjectorModel)(nil)
+	_ moreParser     = (*lfm2VLTextModel)(nil)
+	_ moreParser     = (*lfm2VLProjectorModel)(nil)
+)
+
+func (p *lfm2VLProjectorModel) parseMore(fsys fs.FS) error {
+	bts, err := fs.ReadFile(fsys, "processor_config.json")
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+
+	return json.Unmarshal(bts, &p.Processor)
+}
+
+func (p *lfm2VLProjectorModel) imageSize() uint32 {
+	if p.VisionModel.ImageSize > 0 {
+		return p.VisionModel.ImageSize
+	}
+
+	downsample := cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
+	baseSize := cmp.Or(
+		p.Processor.ImageProcessor.TileSize,
+		p.Processor.ImageProcessor.Size.Height,
+		p.Processor.ImageProcessor.Size.Width,
+		uint32(256),
+	)
+	if downsample == 0 {
+		return baseSize
+	}
+
+	return max(uint32(1), baseSize/downsample)
+}
+
+func (p *lfm2VLProjectorModel) KV(_ *Tokenizer) KV {
+	kv := KV{
+		"general.architecture":         "clip",
+		"general.type":                 "mmproj",
+		"general.file_type":            uint32(1),
+		"general.quantization_version": uint32(2),
+		"clip.has_vision_encoder":      true,
+		"clip.projector_type":          "lfm2",
+		"clip.use_gelu":                true,
+	}
+
+	kv["clip.vision.block_count"] = cmp.Or(p.VisionModel.NumHiddenLayers, uint32(27))
+	kv["clip.vision.embedding_length"] = cmp.Or(p.VisionModel.HiddenSize, uint32(1152))
+	kv["clip.vision.feed_forward_length"] = cmp.Or(p.VisionModel.IntermediateSize, uint32(4304))
+	kv["clip.vision.attention.head_count"] = cmp.Or(p.VisionModel.NumAttentionHeads, uint32(16))
+	kv["clip.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, float32(1e-6))
+	kv["clip.vision.patch_size"] = cmp.Or(p.VisionModel.PatchSize, uint32(16))
+	kv["clip.vision.image_size"] = p.imageSize()
+	kv["clip.vision.projection_dim"] = cmp.Or(p.ProjectorHiddenDim, uint32(2048))
+	kv["clip.vision.projector.scale_factor"] = cmp.Or(p.DownsampleFactor, p.Processor.ImageProcessor.DownsampleFactor, uint32(2))
+	kv["clip.vision.image_mean"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageMean, []float32{0.5, 0.5, 0.5}))
+	kv["clip.vision.image_std"] = slices.Clone(defaultFloat32Slice(p.Processor.ImageProcessor.ImageStd, []float32{0.5, 0.5, 0.5}))
+
+	return kv
+}
+
+func defaultFloat32Slice(v, fallback []float32) []float32 {
+	if len(v) > 0 {
+		return v
+	}
+
+	return fallback
+}
+
+func (p *lfm2VLProjectorModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	var out []*ggml.Tensor
+
+	numChannels := cmp.Or(p.VisionModel.NumChannels, uint32(3))
+	patchSize := cmp.Or(p.VisionModel.PatchSize, uint32(16))
+
+	for _, t := range ts {
+		name := t.Name()
+		if !(strings.HasPrefix(name, "v.") || strings.HasPrefix(name, "mm.")) {
+			continue
+		}
+
+		shape := t.Shape()
+		if name == "v.patch_embd.weight" && len(shape) == 2 {
+			inputDim := uint64(numChannels * patchSize * patchSize)
+			if shape[1] == inputDim {
+				shape = []uint64{shape[0], uint64(numChannels), uint64(patchSize), uint64(patchSize)}
+				channels := int(numChannels)
+				patch := int(patchSize)
+				t.SetRepacker(func(_ string, data []float32, srcShape []uint64) ([]float32, error) {
+					return repackPatchEmbeddingWeight(data, srcShape, channels, patch)
+				})
+			}
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     name,
+			Kind:     t.Kind(),
+			Shape:    slices.Clone(shape),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (p *lfm2VLProjectorModel) Replacements() []string {
+	return []string{
+		"model.multi_modal_projector.linear_1", "mm.1",
+		"model.multi_modal_projector.linear_2", "mm.2",
+		"model.vision_tower.vision_model.embeddings.patch_embedding", "v.patch_embd",
+		"model.vision_tower.vision_model.embeddings.position_embedding", "v.position_embd",
+		"model.vision_tower.vision_model.encoder.layers", "v.blk",
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.out_proj", "attn_out",
+		"layer_norm1", "ln1",
+		"layer_norm2", "ln2",
+		"mlp.fc1", "ffn_up",
+		"mlp.fc2", "ffn_down",
+		"model.vision_tower.vision_model.post_layernorm", "v.post_ln",
+	}
+}
+
+func repackPatchEmbeddingWeight(data []float32, srcShape []uint64, channels, patch int) ([]float32, error) {
+	if len(srcShape) != 2 {
+		return nil, fmt.Errorf("invalid patch embedding shape rank: %d", len(srcShape))
+	}
+
+	outDim := int(srcShape[0])
+	flatInputDim := int(srcShape[1])
+	expectedInputDim := channels * patch * patch
+	if flatInputDim != expectedInputDim {
+		return nil, fmt.Errorf("invalid patch embedding input dim: got %d, want %d", flatInputDim, expectedInputDim)
+	}
+
+	expectedSize := outDim * flatInputDim
+	if len(data) != expectedSize {
+		return nil, fmt.Errorf("invalid patch embedding data size: got %d, want %d", len(data), expectedSize)
+	}
+
+	repacked := make([]float32, len(data))
+	perChannel := patch * patch
+
+	for o := range outDim {
+		inBase := o * flatInputDim
+		outBase := o * flatInputDim
+
+		for y := range patch {
+			for x := range patch {
+				inPixelBase := inBase + (y*patch+x)*channels
+				for c := range channels {
+					src := inPixelBase + c
+					dst := outBase + c*perChannel + y*patch + x
+					repacked[dst] = data[src]
+				}
+			}
+		}
+	}
+
+	return repacked, nil
+}
--- a/convert/convert_lfm2_vl_test.go
+++ b/convert/convert_lfm2_vl_test.go
@@ -0,0 +1,249 @@
+package convert
+
+import (
+	"slices"
+	"strings"
+	"testing"
+)
+
+func TestLFM2VLTextModelKVUsesTextConfig(t *testing.T) {
+	p := lfm2VLTextModel{
+		TextConfig: lfm2Model{
+			ModelParameters:       ModelParameters{ModelType: "lfm2", VocabSize: 65536},
+			HiddenSize:            2048,
+			NumHiddenLayers:       16,
+			MaxPositionEmbeddings: 128000,
+			IntermediateSize:      12288,
+			BlockFFDim:            12288,
+			BlockAutoAdjustFFDim:  true,
+			BlockMultipleOf:       256,
+			BlockFFNDimMultiplier: 1.0,
+			NumAttentionHeads:     32,
+			NumKeyValueHeads:      8,
+			LayerTypes:            []string{"conv", "full_attention"},
+			NormEps:               1e-5,
+			ConvLCache:            3,
+		},
+		DownsampleFactor: 2,
+		VisionConfig: struct {
+			HiddenSize        uint32  `json:"hidden_size"`
+			IntermediateSize  uint32  `json:"intermediate_size"`
+			NumAttentionHeads uint32  `json:"num_attention_heads"`
+			NumHiddenLayers   uint32  `json:"num_hidden_layers"`
+			NumChannels       uint32  `json:"num_channels"`
+			PatchSize         uint32  `json:"patch_size"`
+			LayerNormEpsilon  float32 `json:"layer_norm_eps"`
+		}{
+			HiddenSize:        1152,
+			IntermediateSize:  4304,
+			NumAttentionHeads: 16,
+			NumHiddenLayers:   27,
+			NumChannels:       3,
+			PatchSize:         16,
+			LayerNormEpsilon:  1e-6,
+		},
+	}
+	p.Processor.ImageProcessor.TileSize = 512
+	p.Processor.ImageProcessor.ImageMean = []float32{0.5, 0.5, 0.5}
+	p.Processor.ImageProcessor.ImageStd = []float32{0.5, 0.5, 0.5}
+
+	kv := p.KV(&Tokenizer{
+		Vocabulary: &Vocabulary{
+			Model:  "gpt2",
+			Tokens: []string{"<|pad|>", "<image>", "<|image_start|>", "<|image_end|>", "<|img_thumbnail|>"},
+		},
+	})
+
+	if got, want := kv["general.architecture"], "lfm2"; got != want {
+		t.Fatalf("general.architecture = %v, want %v", got, want)
+	}
+
+	if got, want := kv["feed_forward_length"], uint32(8192); got != want {
+		t.Fatalf("feed_forward_length = %v, want %v", got, want)
+	}
+
+	if got, want := kv["vision.block_count"], uint32(27); got != want {
+		t.Fatalf("vision.block_count = %v, want %v", got, want)
+	}
+
+	if got, want := kv["vision.image_size"], uint32(256); got != want {
+		t.Fatalf("vision.image_size = %v, want %v", got, want)
+	}
+
+	if got, want := kv["vision.image_token_id"], uint32(396); got != want {
+		t.Fatalf("vision.image_token_id = %v, want %v", got, want)
+	}
+
+	if got, want := kv["vision.image_start_token_id"], uint32(2); got != want {
+		t.Fatalf("vision.image_start_token_id = %v, want %v", got, want)
+	}
+
+	if got, want := kv["vision.do_image_splitting"], true; got != want {
+		t.Fatalf("vision.do_image_splitting = %v, want %v", got, want)
+	}
+	if got, want := kv["vision.min_tiles"], uint32(2); got != want {
+		t.Fatalf("vision.min_tiles = %v, want %v", got, want)
+	}
+	if got, want := kv["vision.max_tiles"], uint32(10); got != want {
+		t.Fatalf("vision.max_tiles = %v, want %v", got, want)
+	}
+	if got, want := kv["vision.tile_size"], uint32(512); got != want {
+		t.Fatalf("vision.tile_size = %v, want %v", got, want)
+	}
+	if got, want := kv["vision.use_thumbnail"], true; got != want {
+		t.Fatalf("vision.use_thumbnail = %v, want %v", got, want)
+	}
+	if got, want := kv["vision.use_image_special_tokens"], true; got != want {
+		t.Fatalf("vision.use_image_special_tokens = %v, want %v", got, want)
+	}
+}
+
+func TestLFM2VLTextModelTensorsIncludeVision(t *testing.T) {
+	p := lfm2VLTextModel{}
+	p.VisionConfig.PatchSize = 16
+	p.VisionConfig.NumChannels = 3
+	input := []Tensor{
+		newLFM2StubTensor("model.embed_tokens.weight", []uint64{65536, 2048}),
+		newLFM2StubTensor("model.layers.0.ffn_norm.weight", []uint64{2048}),
+		newLFM2StubTensor("v.patch_embd.weight", []uint64{1152, 768}),
+		newLFM2StubTensor("v.blk.0.attn_q.weight", []uint64{1152, 1152}),
+		newLFM2StubTensor("mm.1.weight", []uint64{2048, 4608}),
+	}
+
+	out := p.Tensors(input)
+	if len(out) == 0 {
+		t.Fatal("expected non-empty tensor list")
+	}
+
+	foundPatch := false
+	foundVision := false
+	for _, tns := range out {
+		if tns.Name == "v.patch_embd.weight" {
+			foundPatch = true
+			if !slices.Equal(tns.Shape, []uint64{1152, 3, 16, 16}) {
+				t.Fatalf("v.patch_embd.weight shape = %v, want [1152 3 16 16]", tns.Shape)
+			}
+		}
+		if strings.HasPrefix(tns.Name, "v.") || strings.HasPrefix(tns.Name, "mm.") {
+			foundVision = true
+		}
+	}
+
+	if !foundPatch {
+		t.Fatal("expected v.patch_embd.weight in output tensors")
+	}
+	if !foundVision {
+		t.Fatal("expected at least one vision/projector tensor in output")
+	}
+}
+
+func TestLFM2VLTextModelReplacements(t *testing.T) {
+	p := lfm2VLTextModel{}
+	r := strings.NewReplacer(p.Replacements()...)
+
+	tests := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{
+			name: "language_model_embed_tokens",
+			in:   "model.language_model.embed_tokens.weight",
+			want: "token_embd.weight",
+		},
+		{
+			name: "language_model_layers",
+			in:   "model.language_model.layers.2.self_attn.q_proj.weight",
+			want: "blk.2.attn_q.weight",
+		},
+		{
+			name: "nested_language_model_prefix",
+			in:   "model.language_model.model.embedding_norm.weight",
+			want: "token_embd_norm.weight",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := r.Replace(tt.in); got != tt.want {
+				t.Fatalf("replacement(%q) = %q, want %q", tt.in, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestLFM2VLProjectorKV(t *testing.T) {
+	p := lfm2VLProjectorModel{
+		DownsampleFactor:   2,
+		ProjectorHiddenDim: 2048,
+	}
+	p.VisionModel.NumHiddenLayers = 27
+	p.VisionModel.HiddenSize = 1152
+	p.VisionModel.IntermediateSize = 4304
+	p.VisionModel.NumAttentionHeads = 16
+	p.VisionModel.PatchSize = 16
+	p.VisionModel.LayerNormEpsilon = 1e-6
+	p.Processor.ImageProcessor.TileSize = 512
+	p.Processor.ImageProcessor.ImageMean = []float32{0.5, 0.5, 0.5}
+	p.Processor.ImageProcessor.ImageStd = []float32{0.5, 0.5, 0.5}
+
+	kv := p.KV(nil)
+
+	if got, want := kv["general.architecture"], "clip"; got != want {
+		t.Fatalf("general.architecture = %v, want %v", got, want)
+	}
+	if got, want := kv["clip.projector_type"], "lfm2"; got != want {
+		t.Fatalf("clip.projector_type = %v, want %v", got, want)
+	}
+	if got, want := kv["clip.vision.image_size"], uint32(256); got != want {
+		t.Fatalf("clip.vision.image_size = %v, want %v", got, want)
+	}
+}
+
+func TestLFM2VLProjectorTensorsPatchReshape(t *testing.T) {
+	p := lfm2VLProjectorModel{}
+	p.VisionModel.NumChannels = 3
+	p.VisionModel.PatchSize = 16
+
+	input := []Tensor{
+		newLFM2StubTensor("v.patch_embd.weight", []uint64{1152, 768}),
+		newLFM2StubTensor("mm.1.weight", []uint64{2048, 4608}),
+		newLFM2StubTensor("model.embed_tokens.weight", []uint64{65536, 2048}),
+	}
+
+	out := p.Tensors(input)
+	if len(out) != 2 {
+		t.Fatalf("expected 2 tensors, got %d", len(out))
+	}
+
+	var patchShape []uint64
+	for _, tns := range out {
+		if tns.Name == "v.patch_embd.weight" {
+			patchShape = tns.Shape
+			break
+		}
+	}
+
+	if !slices.Equal(patchShape, []uint64{1152, 3, 16, 16}) {
+		t.Fatalf("v.patch_embd.weight shape = %v, want [1152 3 16 16]", patchShape)
+	}
+}
+
+func TestRepackPatchEmbeddingWeight(t *testing.T) {
+	data := []float32{
+		0, 1, // y=0,x=0
+		2, 3, // y=0,x=1
+		4, 5, // y=1,x=0
+		6, 7, // y=1,x=1
+	}
+
+	got, err := repackPatchEmbeddingWeight(data, []uint64{1, 8}, 2, 2)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	want := []float32{0, 2, 4, 6, 1, 3, 5, 7}
+	if !slices.Equal(got, want) {
+		t.Fatalf("repacked data = %v, want %v", got, want)
+	}
+}
--- a/convert/convert_nemotron_h.go
+++ b/convert/convert_nemotron_h.go
@@ -0,0 +1,385 @@
+package convert
+
+import (
+	"cmp"
+	"encoding/json"
+	"fmt"
+	"io/fs"
+	"math"
+	"slices"
+	"strings"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type hybridPattern string
+
+func (p *hybridPattern) UnmarshalJSON(data []byte) error {
+	if string(data) == "null" {
+		*p = ""
+		return nil
+	}
+
+	var single string
+	if err := json.Unmarshal(data, &single); err == nil {
+		*p = hybridPattern(strings.TrimSpace(single))
+		return nil
+	}
+
+	var parts []string
+	if err := json.Unmarshal(data, &parts); err == nil {
+		*p = hybridPattern(strings.Join(parts, ""))
+		return nil
+	}
+
+	return fmt.Errorf("hybrid_override_pattern must be a string or string array")
+}
+
+type nemotronHModel struct {
+	ModelParameters
+	MaxPositionEmbeddings uint32        `json:"max_position_embeddings"`
+	HiddenSize            uint32        `json:"hidden_size"`
+	NumHiddenLayers       uint32        `json:"num_hidden_layers"`
+	NumAttentionHeads     uint32        `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32        `json:"num_key_value_heads"`
+	HeadDim               uint32        `json:"head_dim"`
+	LayerNormEpsilon      float32       `json:"layer_norm_epsilon"`
+	NormEpsilon           float32       `json:"norm_eps"`
+	RopeTheta             float32       `json:"rope_theta"`
+	PartialRotaryFactor   float32       `json:"partial_rotary_factor"`
+	ConvKernel            uint32        `json:"conv_kernel"`
+	SSMStateSize          uint32        `json:"ssm_state_size"`
+	MambaNumHeads         uint32        `json:"mamba_num_heads"`
+	MambaHeadDim          uint32        `json:"mamba_head_dim"`
+	NGroups               uint32        `json:"n_groups"`
+	IntermediateSize      uint32        `json:"intermediate_size"`
+	HybridOverridePattern hybridPattern `json:"hybrid_override_pattern"`
+
+	// MoE
+	NumExperts                  uint32  `json:"num_experts"`
+	NumSharedExperts            uint32  `json:"num_shared_experts"`
+	NRoutedExperts              uint32  `json:"n_routed_experts"`
+	NSharedExperts              uint32  `json:"n_shared_experts"`
+	NumExpertsPerTok            uint32  `json:"num_experts_per_tok"`
+	MoEIntermediateSize         uint32  `json:"moe_intermediate_size"`
+	MoESharedExpertIntermediate uint32  `json:"moe_shared_expert_intermediate_size"`
+	NormTopKProb                bool    `json:"norm_topk_prob"`
+	RoutedScalingFactor         float32 `json:"routed_scaling_factor"`
+	ExpertGroupCount            uint32  `json:"n_group"`
+	ExpertGroupUsedCount        uint32  `json:"topk_group"`
+}
+
+var _ ModelConverter = (*nemotronHModel)(nil)
+
+func (n *nemotronHModel) parseMore(_ fs.FS) error {
+	if n.NumHiddenLayers == 0 {
+		return fmt.Errorf("nemotron_h: num_hidden_layers must be set")
+	}
+	if n.HiddenSize == 0 {
+		return fmt.Errorf("nemotron_h: hidden_size must be set")
+	}
+	if n.NumAttentionHeads == 0 {
+		return fmt.Errorf("nemotron_h: num_attention_heads must be set")
+	}
+	if n.HeadDim == 0 {
+		if n.HiddenSize%n.NumAttentionHeads != 0 {
+			return fmt.Errorf("nemotron_h: hidden_size (%d) must be divisible by num_attention_heads (%d)", n.HiddenSize, n.NumAttentionHeads)
+		}
+		n.HeadDim = n.HiddenSize / n.NumAttentionHeads
+	}
+	if n.NumKeyValueHeads == 0 {
+		n.NumKeyValueHeads = n.NumAttentionHeads
+	}
+	if n.ConvKernel == 0 {
+		return fmt.Errorf("nemotron_h: conv_kernel must be set")
+	}
+	if n.SSMStateSize == 0 {
+		return fmt.Errorf("nemotron_h: ssm_state_size must be set")
+	}
+	if n.ssmHeadCount() == 0 {
+		return fmt.Errorf("nemotron_h: mamba_num_heads must be set")
+	}
+	if n.MambaHeadDim == 0 {
+		return fmt.Errorf("nemotron_h: mamba_head_dim must be set")
+	}
+	if n.NGroups == 0 {
+		n.NGroups = 1
+	}
+
+	if _, _, err := n.layerArrays(); err != nil {
+		return err
+	}
+
+	if n.isMoE() {
+		if n.routedExpertCount() == 0 {
+			return fmt.Errorf("nemotron_h: routed expert count must be set for MoE models")
+		}
+		if n.NumExpertsPerTok == 0 {
+			return fmt.Errorf("nemotron_h: num_experts_per_tok must be set for MoE models")
+		}
+		if n.NumExpertsPerTok > n.routedExpertCount() {
+			return fmt.Errorf("nemotron_h: num_experts_per_tok (%d) cannot exceed expert_count (%d)", n.NumExpertsPerTok, n.routedExpertCount())
+		}
+		if n.moeIntermediateSize() == 0 {
+			return fmt.Errorf("nemotron_h: moe_intermediate_size must be set for MoE models")
+		}
+	}
+
+	return nil
+}
+
+func (n *nemotronHModel) isMoE() bool {
+	return cmp.Or(n.routedExpertCount(), n.NumExpertsPerTok, n.MoEIntermediateSize) > 0
+}
+
+func (n *nemotronHModel) routedExpertCount() uint32 {
+	return cmp.Or(n.NRoutedExperts, n.NumExperts)
+}
+
+func (n *nemotronHModel) sharedExpertCount() uint32 {
+	return cmp.Or(n.NSharedExperts, n.NumSharedExperts)
+}
+
+func (n *nemotronHModel) ssmHeadCount() uint32 {
+	return n.MambaNumHeads
+}
+
+func (n *nemotronHModel) ssmInnerSize() uint32 {
+	return n.MambaHeadDim * n.ssmHeadCount()
+}
+
+func (n *nemotronHModel) epsilon() float32 {
+	return cmp.Or(n.NormEpsilon, n.LayerNormEpsilon, float32(1e-5))
+}
+
+func (n *nemotronHModel) moeIntermediateSize() uint32 {
+	return cmp.Or(n.MoEIntermediateSize, n.IntermediateSize)
+}
+
+func (n *nemotronHModel) denseIntermediateSize() uint32 {
+	return cmp.Or(n.IntermediateSize, n.MoEIntermediateSize)
+}
+
+func (n *nemotronHModel) layerArrays() (headCountKV []uint32, ffnLengths []uint32, err error) {
+	pattern := strings.TrimSpace(string(n.HybridOverridePattern))
+	if pattern == "" {
+		return nil, nil, fmt.Errorf("nemotron_h: hybrid_override_pattern must be set")
+	}
+
+	runes := []rune(pattern)
+	if len(runes) != int(n.NumHiddenLayers) {
+		return nil, nil, fmt.Errorf("nemotron_h: hybrid_override_pattern length (%d) must match num_hidden_layers (%d)", len(runes), n.NumHiddenLayers)
+	}
+
+	headCountKV = make([]uint32, n.NumHiddenLayers)
+	ffnLengths = make([]uint32, n.NumHiddenLayers)
+
+	attnKVHeads := cmp.Or(n.NumKeyValueHeads, n.NumAttentionHeads)
+	moeFFN := n.moeIntermediateSize()
+	denseFFN := n.denseIntermediateSize()
+
+	for i, layerType := range runes {
+		switch layerType {
+		case 'M':
+			// Recurrent layer: no KV heads and no FFN.
+		case '*', 'A':
+			// Attention-only layer.
+			headCountKV[i] = attnKVHeads
+		case 'E':
+			// MoE layer.
+			if moeFFN == 0 {
+				return nil, nil, fmt.Errorf("nemotron_h: moe layer at index %d but moe_intermediate_size is zero", i)
+			}
+			ffnLengths[i] = moeFFN
+		case '-':
+			// Dense FFN layer.
+			if denseFFN == 0 {
+				return nil, nil, fmt.Errorf("nemotron_h: dense FFN layer at index %d but intermediate_size is zero", i)
+			}
+			ffnLengths[i] = denseFFN
+		default:
+			return nil, nil, fmt.Errorf("nemotron_h: unsupported layer type %q in hybrid_override_pattern at index %d", layerType, i)
+		}
+	}
+
+	return headCountKV, ffnLengths, nil
+}
+
+func (n *nemotronHModel) KV(t *Tokenizer) KV {
+	kv := n.ModelParameters.KV(t)
+
+	arch := "nemotron_h"
+	if n.isMoE() {
+		arch = "nemotron_h_moe"
+	}
+	kv["general.architecture"] = arch
+	kv["block_count"] = n.NumHiddenLayers
+	kv["context_length"] = n.MaxPositionEmbeddings
+	kv["embedding_length"] = n.HiddenSize
+	kv["attention.head_count"] = n.NumAttentionHeads
+	kv["attention.key_length"] = n.HeadDim
+	kv["attention.value_length"] = n.HeadDim
+	kv["attention.layer_norm_epsilon"] = n.epsilon()
+	kv["attention.layer_norm_rms_epsilon"] = n.epsilon()
+	kv["rope.freq_base"] = cmp.Or(n.RopeTheta, float32(10000))
+	if n.PartialRotaryFactor > 0 && n.PartialRotaryFactor <= 1 {
+		kv["rope.dimension_count"] = uint32(float32(n.HeadDim) * n.PartialRotaryFactor)
+	}
+
+	if headCountKV, ffnLengths, err := n.layerArrays(); err == nil {
+		kv["attention.head_count_kv"] = headCountKV
+		kv["feed_forward_length"] = ffnLengths
+	}
+
+	kv["ssm.conv_kernel"] = n.ConvKernel
+	kv["ssm.inner_size"] = n.ssmInnerSize()
+	kv["ssm.state_size"] = n.SSMStateSize
+	kv["ssm.group_count"] = n.NGroups
+	kv["ssm.time_step_rank"] = n.ssmHeadCount()
+
+	if n.isMoE() {
+		kv["expert_count"] = n.routedExpertCount()
+		kv["expert_used_count"] = n.NumExpertsPerTok
+		kv["expert_feed_forward_length"] = n.moeIntermediateSize()
+		if n.sharedExpertCount() > 0 {
+			kv["expert_shared_count"] = n.sharedExpertCount()
+		}
+		if n.MoESharedExpertIntermediate > 0 {
+			kv["expert_shared_feed_forward_length"] = n.MoESharedExpertIntermediate
+		}
+		kv["expert_weights_norm"] = n.NormTopKProb
+		kv["expert_weights_scale"] = n.RoutedScalingFactor
+		if n.ExpertGroupCount > 0 {
+			kv["expert_group_count"] = n.ExpertGroupCount
+		}
+		if n.ExpertGroupUsedCount > 0 {
+			kv["expert_group_used_count"] = n.ExpertGroupUsedCount
+		}
+	}
+
+	return kv
+}
+
+func normalizeVectorShapeToColumn(shape []uint64) []uint64 {
+	switch len(shape) {
+	case 1:
+		return []uint64{shape[0], 1}
+	case 2:
+		if shape[0] == 1 && shape[1] > 1 {
+			return []uint64{shape[1], 1}
+		}
+		if shape[1] == 1 && shape[0] > 1 {
+			return []uint64{shape[0], 1}
+		}
+	}
+
+	return slices.Clone(shape)
+}
+
+func (n *nemotronHModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	var out []*ggml.Tensor
+
+	remaining := ts
+	if n.isMoE() {
+		merges := make([]merge, 0, n.NumHiddenLayers*2)
+		for i := range n.NumHiddenLayers {
+			merges = append(merges, merge{
+				fmt.Sprintf("blk.%d.mixer.experts.*.up_proj.weight", i),
+				fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
+			}, merge{
+				fmt.Sprintf("blk.%d.mixer.experts.*.down_proj.weight", i),
+				fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
+			})
+		}
+
+		merged, rest := mergeTensors(ts, merges...)
+		out = append(out, merged...)
+		remaining = rest
+	}
+
+	nGroups := uint64(cmp.Or(n.NGroups, uint32(1)))
+	for _, t := range remaining {
+		name := t.Name()
+		shape := slices.Clone(t.Shape())
+
+		switch {
+		case strings.HasSuffix(name, ".ssm_a"):
+			shape = normalizeVectorShapeToColumn(shape)
+			t.SetRepacker(func(_ string, data []float32, _ []uint64) ([]float32, error) {
+				out := make([]float32, len(data))
+				for i, v := range data {
+					out[i] = -float32(math.Exp(float64(v)))
+				}
+				return out, nil
+			})
+		case strings.HasSuffix(name, ".ssm_d"):
+			shape = normalizeVectorShapeToColumn(shape)
+		case strings.HasSuffix(name, ".ssm_norm.weight"):
+			switch len(shape) {
+			case 1:
+				if nGroups > 0 && shape[0]%nGroups == 0 {
+					shape = []uint64{nGroups, shape[0] / nGroups}
+				}
+			case 2:
+				if shape[0] == 1 && nGroups > 0 && shape[1]%nGroups == 0 {
+					shape = []uint64{nGroups, shape[1] / nGroups}
+				}
+			}
+		case strings.HasSuffix(name, ".ssm_conv1d.weight"):
+			if len(shape) == 3 {
+				if shape[0] == 1 {
+					shape = []uint64{shape[1], shape[2]}
+				} else if shape[1] == 1 {
+					shape = []uint64{shape[0], shape[2]}
+				}
+			}
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     name,
+			Kind:     t.Kind(),
+			Shape:    shape,
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (n *nemotronHModel) Replacements() []string {
+	return []string{
+		// Embedding and output
+		"lm_head", "output",
+		"backbone.embeddings", "token_embd",
+		"backbone.norm_f", "output_norm",
+		"backbone.layers", "blk",
+
+		// Recurrent (Mamba2) tensors
+		"mixer.in_proj", "ssm_in",
+		"mixer.out_proj", "ssm_out",
+		"mixer.dt_bias", "ssm_dt.bias",
+		"mixer.A_log", "ssm_a",
+		"mixer.D", "ssm_d",
+		"mixer.conv1d", "ssm_conv1d",
+		"mixer.norm.weight", "ssm_norm.weight",
+
+		// Attention tensors
+		"mixer.q_proj", "attn_q",
+		"mixer.k_proj", "attn_k",
+		"mixer.v_proj", "attn_v",
+		"mixer.o_proj", "attn_output",
+
+		// FFN / MoE tensors
+		"mixer.gate.e_score_correction_bias", "exp_probs_b.bias",
+		"mixer.gate", "ffn_gate_inp",
+		"mixer.fc1_latent_proj", "ffn_latent_in",
+		"mixer.fc2_latent_proj", "ffn_latent_out",
+		"mixer.shared_experts.up_proj", "ffn_up_shexp",
+		"mixer.shared_experts.down_proj", "ffn_down_shexp",
+		"mixer.up_proj", "ffn_up",
+		"mixer.down_proj", "ffn_down",
+
+		// Per-layer pre-norm
+		".norm.weight", ".attn_norm.weight",
+	}
+}
--- a/convert/convert_nemotron_h_test.go
+++ b/convert/convert_nemotron_h_test.go
@@ -0,0 +1,230 @@
+package convert
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"io"
+	"os"
+	"path/filepath"
+	"slices"
+	"strings"
+	"testing"
+)
+
+func TestHybridPatternUnmarshal(t *testing.T) {
+	t.Run("string", func(t *testing.T) {
+		var p hybridPattern
+		if err := json.Unmarshal([]byte(`"MEM*"`), &p); err != nil {
+			t.Fatal(err)
+		}
+		if got, want := string(p), "MEM*"; got != want {
+			t.Fatalf("unexpected pattern: got %q want %q", got, want)
+		}
+	})
+
+	t.Run("array", func(t *testing.T) {
+		var p hybridPattern
+		if err := json.Unmarshal([]byte(`["M","E","M","*"]`), &p); err != nil {
+			t.Fatal(err)
+		}
+		if got, want := string(p), "MEM*"; got != want {
+			t.Fatalf("unexpected pattern: got %q want %q", got, want)
+		}
+	})
+}
+
+func TestNemotronHLayerArrays(t *testing.T) {
+	m := &nemotronHModel{
+		NumHiddenLayers:       5,
+		NumAttentionHeads:     32,
+		NumKeyValueHeads:      8,
+		HybridOverridePattern: "MEM*E",
+		NRoutedExperts:        128,
+		NumExpertsPerTok:      6,
+		MoEIntermediateSize:   1856,
+	}
+
+	headsKV, ffn, err := m.layerArrays()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if got, want := headsKV, []uint32{0, 0, 0, 8, 0}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected head_count_kv: got %v want %v", got, want)
+	}
+	if got, want := ffn, []uint32{0, 1856, 0, 0, 1856}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected feed_forward_length: got %v want %v", got, want)
+	}
+}
+
+func TestNemotronHKV(t *testing.T) {
+	m := &nemotronHModel{
+		MaxPositionEmbeddings:       1048576,
+		HiddenSize:                  2688,
+		NumHiddenLayers:             5,
+		NumAttentionHeads:           32,
+		NumKeyValueHeads:            2,
+		HeadDim:                     128,
+		LayerNormEpsilon:            1e-5,
+		RopeTheta:                   10000,
+		PartialRotaryFactor:         0.5,
+		ConvKernel:                  4,
+		SSMStateSize:                128,
+		MambaNumHeads:               64,
+		MambaHeadDim:                64,
+		NGroups:                     8,
+		HybridOverridePattern:       "MEM*E",
+		NRoutedExperts:              128,
+		NSharedExperts:              1,
+		NumExpertsPerTok:            6,
+		MoEIntermediateSize:         1856,
+		MoESharedExpertIntermediate: 3712,
+		NormTopKProb:                true,
+		RoutedScalingFactor:         2.5,
+	}
+	if err := m.parseMore(nil); err != nil {
+		t.Fatal(err)
+	}
+
+	kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
+	if got, want := kv["general.architecture"], "nemotron_h_moe"; got != want {
+		t.Fatalf("unexpected architecture: got %v want %v", got, want)
+	}
+
+	headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
+	if !ok {
+		t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
+	}
+	if got, want := headCountKV, []uint32{0, 0, 0, 2, 0}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
+	}
+
+	ffnLength, ok := kv["feed_forward_length"].([]uint32)
+	if !ok {
+		t.Fatalf("feed_forward_length has unexpected type: %T", kv["feed_forward_length"])
+	}
+	if got, want := ffnLength, []uint32{0, 1856, 0, 0, 1856}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected feed_forward_length: got %v want %v", got, want)
+	}
+}
+
+func TestNemotronHTensorsTransforms(t *testing.T) {
+	m := &nemotronHModel{NGroups: 8}
+	in := []Tensor{
+		&fakeTensor{
+			name:  "blk.0.ssm_a",
+			shape: []uint64{4},
+			data:  []float32{0, 1, 2, 3},
+		},
+		&fakeTensor{
+			name:  "blk.0.ssm_d",
+			shape: []uint64{4},
+			data:  []float32{0, 1, 2, 3},
+		},
+		&fakeTensor{
+			name:  "blk.0.ssm_norm.weight",
+			shape: []uint64{16},
+			data:  make([]float32, 16),
+		},
+		&fakeTensor{
+			name:  "blk.0.ssm_conv1d.weight",
+			shape: []uint64{10, 1, 4},
+			data:  make([]float32, 40),
+		},
+	}
+
+	out := m.Tensors(in)
+	if len(out) != len(in) {
+		t.Fatalf("unexpected output tensor count: got %d want %d", len(out), len(in))
+	}
+
+	got := map[string]struct {
+		shape  []uint64
+		writer io.WriterTo
+	}{}
+	for _, t := range out {
+		got[t.Name] = struct {
+			shape  []uint64
+			writer io.WriterTo
+		}{shape: t.Shape, writer: t.WriterTo}
+	}
+
+	if shape := got["blk.0.ssm_a"].shape; !slices.Equal(shape, []uint64{4, 1}) {
+		t.Fatalf("unexpected ssm_a shape: %v", shape)
+	}
+	if shape := got["blk.0.ssm_d"].shape; !slices.Equal(shape, []uint64{4, 1}) {
+		t.Fatalf("unexpected ssm_d shape: %v", shape)
+	}
+	if shape := got["blk.0.ssm_norm.weight"].shape; !slices.Equal(shape, []uint64{8, 2}) {
+		t.Fatalf("unexpected ssm_norm shape: %v", shape)
+	}
+	if shape := got["blk.0.ssm_conv1d.weight"].shape; !slices.Equal(shape, []uint64{10, 4}) {
+		t.Fatalf("unexpected ssm_conv1d shape: %v", shape)
+	}
+
+	var b bytes.Buffer
+	if _, err := got["blk.0.ssm_a"].writer.WriteTo(&b); err != nil {
+		t.Fatal(err)
+	}
+	values := make([]float32, 4)
+	if err := binary.Read(&b, binary.LittleEndian, &values); err != nil {
+		t.Fatal(err)
+	}
+	// 0 -> -exp(0) == -1
+	if values[0] != -1 {
+		t.Fatalf("unexpected transformed ssm_a[0]: got %v want -1", values[0])
+	}
+}
+
+func TestNemotronHLoadModelMetadata(t *testing.T) {
+	tempDir := t.TempDir()
+
+	config := `{
+		"architectures": ["NemotronHForCausalLM"],
+		"model_type": "nemotron_h",
+		"num_hidden_layers": 4,
+		"hidden_size": 512,
+		"max_position_embeddings": 32768,
+		"num_attention_heads": 8,
+		"num_key_value_heads": 2,
+		"head_dim": 64,
+		"layer_norm_epsilon": 1e-5,
+		"conv_kernel": 4,
+		"ssm_state_size": 128,
+		"mamba_num_heads": 16,
+		"mamba_head_dim": 32,
+		"n_groups": 8,
+		"hybrid_override_pattern": "ME*M",
+		"n_routed_experts": 16,
+		"num_experts_per_tok": 4,
+		"moe_intermediate_size": 256
+	}`
+
+	if err := os.WriteFile(filepath.Join(tempDir, "config.json"), []byte(config), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(tempDir, "tokenizer.json"), []byte(`{}`), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	kv, _, err := LoadModelMetadata(os.DirFS(tempDir))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, ok := kv.(*nemotronHModel); !ok {
+		t.Fatalf("unexpected converter type: %T", kv)
+	}
+}
+
+func TestNemotronHReplacementsLatentProjections(t *testing.T) {
+	m := &nemotronHModel{}
+	r := strings.NewReplacer(m.Replacements()...)
+
+	if got, want := r.Replace("backbone.layers.1.mixer.fc1_latent_proj.weight"), "blk.1.ffn_latent_in.weight"; got != want {
+		t.Fatalf("unexpected fc1 replacement: got %q want %q", got, want)
+	}
+	if got, want := r.Replace("backbone.layers.1.mixer.fc2_latent_proj.weight"), "blk.1.ffn_latent_out.weight"; got != want {
+		t.Fatalf("unexpected fc2 replacement: got %q want %q", got, want)
+	}
+}
--- a/convert/convert_qwen3next.go
+++ b/convert/convert_qwen3next.go
@@ -1,6 +1,7 @@
 package convert

 import (
+	"encoding/json"
 	"fmt"
 	"io/fs"
 	"math"
@@ -13,8 +14,21 @@ import (
 	"github.com/ollama/ollama/fs/ggml"
 )

-type qwen3NextModel struct {
-	ModelParameters
+type qwen3NextRopeScaling struct {
+	Type         string     `json:"type"`
+	Factor       ropeFactor `json:"factor"`
+	MropeSection []int32    `json:"mrope_section"`
+}
+
+type qwen3NextRopeParams struct {
+	MRopeInterleaved    bool    `json:"mrope_interleaved"`
+	MropeSection        []int32 `json:"mrope_section"`
+	RopeType            string  `json:"rope_type"`
+	RopeTheta           float32 `json:"rope_theta"`
+	PartialRotaryFactor float32 `json:"partial_rotary_factor"`
+}
+
+type qwen3NextTextConfig struct {
 	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
 	HiddenSize            uint32  `json:"hidden_size"`
 	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
@@ -28,12 +42,13 @@ type qwen3NextModel struct {
 	// MoE config
 	NumExperts             uint32 `json:"num_experts"`
 	NumExpertsPerToken     uint32 `json:"num_experts_per_tok"`
-	NormTopkProb           bool   `json:"norm_topk_prob"`
+	NormTopkProb           *bool  `json:"norm_topk_prob"`
 	MoEIntermediateSize    uint32 `json:"moe_intermediate_size"`
 	SharedExpertIntermSize uint32 `json:"shared_expert_intermediate_size"`

 	// Hybrid attention config
-	FullAttentionInterval uint32 `json:"full_attention_interval"`
+	FullAttentionInterval uint32   `json:"full_attention_interval"`
+	LayerTypes            []string `json:"layer_types"`

 	// Linear attention (Gated Delta Net) config
 	LinearConvKernelDim uint32 `json:"linear_conv_kernel_dim"`
@@ -43,16 +58,102 @@ type qwen3NextModel struct {
 	LinearValueHeadDim  uint32 `json:"linear_value_head_dim"`

 	// RoPE config
-	PartialRotaryFactor float32 `json:"partial_rotary_factor"`
-	RopeScaling         struct {
-		Type   string     `json:"type"`
-		Factor ropeFactor `json:"factor"`
-	} `json:"rope_scaling"`
+	PartialRotaryFactor float32              `json:"partial_rotary_factor"`
+	RopeScaling         qwen3NextRopeScaling `json:"rope_scaling"`
+	RopeParameters      qwen3NextRopeParams  `json:"rope_parameters"`
+}
+
+type qwen3NextVisionConfig struct {
+	Depth                  uint32  `json:"depth"`
+	HiddenSize             uint32  `json:"hidden_size"`
+	NumHeads               uint32  `json:"num_heads"`
+	InChannels             uint32  `json:"in_channels"`
+	PatchSize              uint32  `json:"patch_size"`
+	SpatialMergeSize       uint32  `json:"spatial_merge_size"`
+	RMSNormEps             float32 `json:"layer_norm_epsilon"`
+	RopeTheta              float32 `json:"rope_theta"`
+	TemporalPatchSize      uint32  `json:"temporal_patch_size"`
+	DeepstackVisualIndexes []int32 `json:"deepstack_visual_indexes"`
+
+	Size struct {
+		ShortestEdge uint32 `json:"shortest_edge"`
+		LongestEdge  uint32 `json:"longest_edge"`
+	} `json:"size"`
+
+	ImageMean []float32 `json:"image_mean"`
+	ImageStd  []float32 `json:"image_std"`
+}
+
+type qwen3NextModel struct {
+	ModelParameters
+	qwen3NextTextConfig
+
+	TextConfig  *qwen3NextTextConfig  `json:"text_config"`
+	VisionModel qwen3NextVisionConfig `json:"vision_config"`
+
+	ImageTokenID       uint32 `json:"image_token_id"`
+	VisionStartTokenID uint32 `json:"vision_start_token_id"`
+	VisionEndTokenID   uint32 `json:"vision_end_token_id"`
 }

 var _ ModelConverter = (*qwen3NextModel)(nil)

-func (q *qwen3NextModel) parseMore(_ fs.FS) error {
+func (q *qwen3NextModel) parseMore(fsys fs.FS) error {
+	if q.TextConfig != nil {
+		q.qwen3NextTextConfig = *q.TextConfig
+	}
+
+	if q.RopeTheta == 0 {
+		q.RopeTheta = q.RopeParameters.RopeTheta
+	}
+	if q.PartialRotaryFactor == 0 {
+		q.PartialRotaryFactor = q.RopeParameters.PartialRotaryFactor
+	}
+
+	if q.RopeScaling.Type == "" && q.RopeParameters.RopeType != "" {
+		q.RopeScaling.Type = q.RopeParameters.RopeType
+	}
+
+	// Pull vision preprocessing fields when present.
+	if q.VisionModel.Depth > 0 {
+		if bts, err := fs.ReadFile(fsys, "preprocessor_config.json"); err == nil {
+			var pre struct {
+				Size struct {
+					ShortestEdge uint32 `json:"shortest_edge"`
+					LongestEdge  uint32 `json:"longest_edge"`
+				} `json:"size"`
+				PatchSize         uint32    `json:"patch_size"`
+				TemporalPatchSize uint32    `json:"temporal_patch_size"`
+				MergeSize         uint32    `json:"merge_size"`
+				ImageMean         []float32 `json:"image_mean"`
+				ImageStd          []float32 `json:"image_std"`
+			}
+			if json.Unmarshal(bts, &pre) == nil {
+				if q.VisionModel.PatchSize == 0 {
+					q.VisionModel.PatchSize = pre.PatchSize
+				}
+				if q.VisionModel.TemporalPatchSize == 0 {
+					q.VisionModel.TemporalPatchSize = pre.TemporalPatchSize
+				}
+				if q.VisionModel.SpatialMergeSize == 0 {
+					q.VisionModel.SpatialMergeSize = pre.MergeSize
+				}
+				if q.VisionModel.Size.ShortestEdge == 0 {
+					q.VisionModel.Size.ShortestEdge = pre.Size.ShortestEdge
+				}
+				if q.VisionModel.Size.LongestEdge == 0 {
+					q.VisionModel.Size.LongestEdge = pre.Size.LongestEdge
+				}
+				if len(q.VisionModel.ImageMean) == 0 {
+					q.VisionModel.ImageMean = pre.ImageMean
+				}
+				if len(q.VisionModel.ImageStd) == 0 {
+					q.VisionModel.ImageStd = pre.ImageStd
+				}
+			}
+		}
+	}
+
 	if q.NumHiddenLayers == 0 {
 		return fmt.Errorf("qwen3next: num_hidden_layers must be set")
 	}
@@ -74,36 +175,96 @@ func (q *qwen3NextModel) parseMore(_ fs.FS) error {
 	if q.LinearNumKeyHeads == 0 || q.LinearNumValueHeads == 0 || q.LinearKeyHeadDim == 0 || q.LinearValueHeadDim == 0 {
 		return fmt.Errorf("qwen3next: linear attention config must be set (linear_num_key_heads, linear_num_value_heads, linear_key_head_dim, linear_value_head_dim)")
 	}
-	if q.FullAttentionInterval == 0 {
-		return fmt.Errorf("qwen3next: full_attention_interval must be set")
-	}
-	if q.FullAttentionInterval > q.NumHiddenLayers {
-		return fmt.Errorf("qwen3next: full_attention_interval (%d) exceeds num_hidden_layers (%d)", q.FullAttentionInterval, q.NumHiddenLayers)
-	}
-
-	hasFull := false
-	for i := range q.NumHiddenLayers {
-		if (i+1)%q.FullAttentionInterval == 0 {
-			hasFull = true
-			break
-		}
-	}
-	if !hasFull {
-		return fmt.Errorf("qwen3next: head_count_kv would be all zeros (full_attention_interval=%d, num_hidden_layers=%d)", q.FullAttentionInterval, q.NumHiddenLayers)
+	if _, err := q.kvHeadCounts(); err != nil {
+		return err
 	}

 	return nil
 }

+func (q *qwen3NextModel) kvHeadCounts() ([]uint32, error) {
+	if len(q.LayerTypes) > 0 {
+		kv := make([]uint32, q.NumHiddenLayers)
+		hasFull := false
+		hasRecurrent := false
+		for i := range q.NumHiddenLayers {
+			layerType := ""
+			if i < uint32(len(q.LayerTypes)) {
+				layerType = q.LayerTypes[i]
+			}
+			if layerType == "full_attention" {
+				kv[i] = q.NumKeyValueHeads
+				hasFull = true
+			} else {
+				hasRecurrent = true
+			}
+		}
+		if !hasFull || !hasRecurrent {
+			return nil, fmt.Errorf("qwen3next: layer_types must include both full_attention and linear_attention")
+		}
+		return kv, nil
+	}
+
+	if q.FullAttentionInterval == 0 {
+		return nil, fmt.Errorf("qwen3next: full_attention_interval must be set")
+	}
+	if q.FullAttentionInterval > q.NumHiddenLayers {
+		return nil, fmt.Errorf("qwen3next: full_attention_interval (%d) exceeds num_hidden_layers (%d)", q.FullAttentionInterval, q.NumHiddenLayers)
+	}
+
+	kv := make([]uint32, q.NumHiddenLayers)
+	hasFull := false
+	for i := range q.NumHiddenLayers {
+		if (i+1)%q.FullAttentionInterval == 0 {
+			kv[i] = q.NumKeyValueHeads
+			hasFull = true
+		}
+	}
+	if !hasFull {
+		return nil, fmt.Errorf("qwen3next: head_count_kv would be all zeros (full_attention_interval=%d, num_hidden_layers=%d)", q.FullAttentionInterval, q.NumHiddenLayers)
+	}
+	return kv, nil
+}
+
+func (q *qwen3NextModel) ropeSections() []int32 {
+	if len(q.RopeParameters.MropeSection) > 0 {
+		return q.RopeParameters.MropeSection
+	}
+	return q.RopeScaling.MropeSection
+}
+
+func (q *qwen3NextModel) shouldReorderVHeads() bool {
+	modelType := strings.ToLower(q.ModelType)
+	if strings.Contains(modelType, "qwen3_next") || strings.Contains(modelType, "qwen3next") {
+		return false
+	}
+
+	for _, arch := range q.Architectures {
+		arch = strings.ToLower(arch)
+		if strings.Contains(arch, "qwen3next") || strings.Contains(arch, "qwen3_next") {
+			return false
+		}
+	}
+
+	// Default to qwen3.5 layout for all other qwen3next-family imports.
+	return true
+}
+
 func (q *qwen3NextModel) KV(t *Tokenizer) KV {
 	kv := q.ModelParameters.KV(t)
-	kv["general.architecture"] = "qwen3next"
-	kv["tokenizer.ggml.pre"] = "qwen2"
+
+	arch := "qwen35"
+	if q.NumExperts > 0 {
+		arch = "qwen35moe"
+	}
+	kv["general.architecture"] = arch
+	kv["tokenizer.ggml.pre"] = "qwen35"
 	kv["block_count"] = q.NumHiddenLayers
 	kv["context_length"] = q.MaxPositionEmbeddings
 	kv["embedding_length"] = q.HiddenSize
 	kv["feed_forward_length"] = q.IntermediateSize
 	kv["attention.head_count"] = q.NumAttentionHeads
+
 	headDim := q.HeadDim
 	if headDim == 0 && q.NumAttentionHeads > 0 {
 		headDim = q.HiddenSize / q.NumAttentionHeads
@@ -113,18 +274,31 @@ func (q *qwen3NextModel) KV(t *Tokenizer) KV {
 	kv["attention.layer_norm_rms_epsilon"] = q.RMSNormEPS
 	kv["rope.freq_base"] = q.RopeTheta

-	// RoPE dimension count (partial rotary)
-	// partial_rotary_factor = 0.25 means only 25% of head_dim uses RoPE
 	partialRotary := q.PartialRotaryFactor
 	if partialRotary > 0 && partialRotary <= 1 {
 		kv["rope.dimension_count"] = uint32(float32(headDim) * partialRotary)
 	}

-	// MoE config
+	if sections := q.ropeSections(); len(sections) > 0 {
+		kv["mrope_sections"] = sections
+		kv["rope.mrope_section"] = sections
+		kv["rope.dimension_sections"] = sections
+	}
+	if q.RopeParameters.MRopeInterleaved {
+		kv["rope.mrope_interleaved"] = true
+	}
+
+	if q.RopeScaling.Type != "" && q.RopeScaling.Type != "default" {
+		kv["rope.scaling.type"] = q.RopeScaling.Type
+		kv["rope.scaling.factor"] = q.RopeScaling.Factor
+	}
+
 	if q.NumExperts > 0 {
 		kv["expert_count"] = q.NumExperts
 		kv["expert_used_count"] = q.NumExpertsPerToken
-		kv["norm_top_k_prob"] = q.NormTopkProb
+		if q.NormTopkProb != nil {
+			kv["norm_top_k_prob"] = *q.NormTopkProb
+		}
 		if q.MoEIntermediateSize > 0 {
 			kv["expert_feed_forward_length"] = q.MoEIntermediateSize
 		}
@@ -133,33 +307,66 @@ func (q *qwen3NextModel) KV(t *Tokenizer) KV {
 		}
 	}

-	// SSM/Linear attention config
-	// d_inner = linear_value_head_dim * linear_num_value_heads
 	dInner := q.LinearValueHeadDim * q.LinearNumValueHeads
 	kv["ssm.inner_size"] = dInner
-	kv["ssm.state_size"] = q.LinearKeyHeadDim        // head_k_dim
-	kv["ssm.group_count"] = q.LinearNumKeyHeads      // num_k_heads
-	kv["ssm.time_step_rank"] = q.LinearNumValueHeads // num_v_heads
+	kv["ssm.state_size"] = q.LinearKeyHeadDim
+	kv["ssm.group_count"] = q.LinearNumKeyHeads
+	kv["ssm.time_step_rank"] = q.LinearNumValueHeads
 	kv["ssm.conv_kernel"] = q.LinearConvKernelDim
-	interval := q.FullAttentionInterval
-	kv["full_attention_interval"] = interval
-
-	// Build per-layer KV head count array to identify layer types
-	// 0 = recurrent (linear attention), non-zero = full attention
-	kvHeadCounts := make([]uint32, q.NumHiddenLayers)
-	for i := range q.NumHiddenLayers {
-		// Full attention every full_attention_interval layers (starting at interval-1)
-		if interval > 0 && (i+1)%interval == 0 {
-			kvHeadCounts[i] = q.NumKeyValueHeads
-		}
-		// else stays 0 (recurrent layer)
+	if q.shouldReorderVHeads() {
+		kv["ssm.v_head_reordered"] = true
+	}
+	if q.FullAttentionInterval > 0 {
+		kv["full_attention_interval"] = q.FullAttentionInterval
 	}
-	kv["attention.head_count_kv"] = kvHeadCounts

-	// RoPE scaling
-	if q.RopeScaling.Type != "" {
-		kv["rope.scaling.type"] = q.RopeScaling.Type
-		kv["rope.scaling.factor"] = q.RopeScaling.Factor
+	if headCounts, err := q.kvHeadCounts(); err == nil {
+		kv["attention.head_count_kv"] = headCounts
+	}
+
+	if q.VisionModel.Depth > 0 {
+		kv["vision.block_count"] = q.VisionModel.Depth
+		kv["vision.embedding_length"] = q.VisionModel.HiddenSize
+		kv["vision.attention.head_count"] = q.VisionModel.NumHeads
+		kv["vision.num_channels"] = q.VisionModel.InChannels
+		if q.VisionModel.PatchSize > 0 {
+			kv["vision.patch_size"] = q.VisionModel.PatchSize
+		}
+		if q.VisionModel.SpatialMergeSize > 0 {
+			kv["vision.spatial_merge_size"] = q.VisionModel.SpatialMergeSize
+		}
+		if q.VisionModel.RMSNormEps > 0 {
+			kv["vision.attention.layer_norm_epsilon"] = q.VisionModel.RMSNormEps
+		}
+		if q.VisionModel.RopeTheta > 0 {
+			kv["vision.rope.freq_base"] = q.VisionModel.RopeTheta
+		}
+		if q.VisionModel.TemporalPatchSize > 0 {
+			kv["vision.temporal_patch_size"] = q.VisionModel.TemporalPatchSize
+		}
+		kv["vision.deepstack_visual_indexes"] = q.VisionModel.DeepstackVisualIndexes
+		if q.VisionModel.Size.ShortestEdge > 0 {
+			kv["vision.shortest_edge"] = q.VisionModel.Size.ShortestEdge
+		}
+		if q.VisionModel.Size.LongestEdge > 0 {
+			kv["vision.longest_edge"] = q.VisionModel.Size.LongestEdge
+		}
+		if len(q.VisionModel.ImageMean) > 0 {
+			kv["vision.image_mean"] = q.VisionModel.ImageMean
+		}
+		if len(q.VisionModel.ImageStd) > 0 {
+			kv["vision.image_std"] = q.VisionModel.ImageStd
+		}
+	}
+
+	if q.ImageTokenID > 0 {
+		kv["image_token_id"] = q.ImageTokenID
+	}
+	if q.VisionStartTokenID > 0 {
+		kv["vision_start_token_id"] = q.VisionStartTokenID
+	}
+	if q.VisionEndTokenID > 0 {
+		kv["vision_end_token_id"] = q.VisionEndTokenID
 	}

 	return kv
@@ -168,7 +375,6 @@ func (q *qwen3NextModel) KV(t *Tokenizer) KV {
 func (q *qwen3NextModel) Tensors(ts []Tensor) []*ggml.Tensor {
 	var out []*ggml.Tensor

-	// Create merges for expert tensors - stack individual experts into batched tensors
 	merges := make([]merge, q.NumHiddenLayers*3)
 	for i := range q.NumHiddenLayers {
 		merges[i*3+0] = merge{
@@ -185,16 +391,13 @@ func (q *qwen3NextModel) Tensors(ts []Tensor) []*ggml.Tensor {
 		}
 	}

-	// Merge expert tensors
 	merged, remaining := mergeTensors(ts, merges...)
 	out = append(out, merged...)

-	// Process remaining tensors
 	for _, t := range remaining {
 		name := t.Name()
 		shape := t.Shape()

-		// Split linear_attn.in_proj_qkvz (ssm_in) into attn_qkv + attn_gate when possible
 		if strings.HasSuffix(name, ".ssm_in.weight") {
 			if qkv, gate, ok := q.splitQKVZTensor(t); ok {
 				out = append(out, qkv, gate)
@@ -204,84 +407,299 @@ func (q *qwen3NextModel) Tensors(ts []Tensor) []*ggml.Tensor {
 		}

 		switch {
-		// Add 1 to norm weights (except ssm_norm which is linear_attn.norm)
-		// This matches the Python converter behavior for qwen3next
+		case strings.Contains(name, ".mlp.experts.gate_up_proj"):
+			out = append(out, slices.Collect(splitDim(t, 1,
+				split{Replacer: strings.NewReplacer(".mlp.experts.gate_up_proj", ".ffn_gate_exps.weight")},
+				split{Replacer: strings.NewReplacer(".mlp.experts.gate_up_proj", ".ffn_up_exps.weight")},
+			))...)
+
+		case strings.Contains(name, ".mlp.experts.down_proj"):
+			out = append(out, &ggml.Tensor{
+				Name:     strings.NewReplacer(".mlp.experts.down_proj", ".ffn_down_exps.weight").Replace(name),
+				Kind:     t.Kind(),
+				Shape:    slices.Clone(shape),
+				WriterTo: t,
+			})
+
+		case strings.HasPrefix(name, "v.blk.") && strings.Contains(name, ".attn_qkv"):
+			out = append(out, slices.Collect(splitDim(t, 0,
+				split{Replacer: strings.NewReplacer("attn_qkv", "attn_q")},
+				split{Replacer: strings.NewReplacer("attn_qkv", "attn_k")},
+				split{Replacer: strings.NewReplacer("attn_qkv", "attn_v")},
+			))...)
+
+		case strings.Contains(name, "patch_embed") && strings.HasSuffix(name, "weight"):
+			out = append(out, &ggml.Tensor{
+				Name:     name,
+				Kind:     t.Kind(),
+				Shape:    append([]uint64{shape[0] * shape[1]}, shape[2:]...),
+				WriterTo: t,
+			})
+
 		case strings.HasSuffix(name, "_norm.weight") && !strings.HasSuffix(name, ".ssm_norm.weight"):
 			t.SetRepacker(q.addOne)
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    slices.Clone(shape),
-				WriterTo: t,
-			})
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})

-		// Handle linear attention A_log -> ssm_a (negate and exp)
-		// Note: name has already been transformed by Replacements at this point
 		case strings.HasSuffix(name, ".ssm_a"):
-			t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
-				// Compute -exp(A_log)
-				result := make([]float32, len(data))
-				for i, v := range data {
-					// -exp(v)
-					result[i] = -float32(math.Exp(float64(v)))
-				}
-				return result, nil
-			})
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    slices.Clone(shape),
-				WriterTo: t,
-			})
+			t.SetRepacker(q.repackSSMA())
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
+
+		case strings.HasSuffix(name, ".attn_qkv.weight"):
+			if q.shouldReorderVHeads() {
+				t.SetRepacker(q.repackAttnQKV())
+			}
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
+
+		case strings.HasSuffix(name, ".attn_gate.weight"):
+			if q.shouldReorderVHeads() {
+				// HF tensor layout is [out_features, in_features]; reorder rows.
+				t.SetRepacker(q.repackReorderDim(0, int(q.LinearValueHeadDim)))
+			}
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
+
+		case strings.HasSuffix(name, ".ssm_beta.weight"), strings.HasSuffix(name, ".ssm_alpha.weight"):
+			if q.shouldReorderVHeads() {
+				// HF tensor layout is [out_features, in_features]; reorder rows.
+				t.SetRepacker(q.repackReorderDim(0, 1))
+			}
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
+
+		case strings.HasSuffix(name, ".ssm_dt"):
+			if q.shouldReorderVHeads() {
+				t.SetRepacker(q.repackReorderDim(0, 1))
+			}
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
+
+		case strings.HasSuffix(name, ".ssm_out.weight"):
+			if q.shouldReorderVHeads() {
+				// HF out_proj layout is [out_features, in_features]; reorder columns.
+				t.SetRepacker(q.repackReorderDim(1, int(q.LinearValueHeadDim)))
+			}
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})

-		// Squeeze conv1d weights: [1, D, K] or [D, 1, K] -> [D, K]
 		case strings.HasSuffix(name, ".ssm_conv1d.weight"):
 			newShape := slices.Clone(shape)
 			if len(shape) == 3 {
 				if shape[0] == 1 {
-					// [1, D, K] -> [D, K]
 					newShape = []uint64{shape[1], shape[2]}
 				} else if shape[1] == 1 {
-					// [D, 1, K] -> [D, K]
 					newShape = []uint64{shape[0], shape[2]}
 				}
 			}
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    newShape,
-				WriterTo: t,
-			})
-		// Squeeze shared expert gate: [D, 1] or [1, D] -> [D]
-		case strings.HasSuffix(name, ".ffn_gate_inp_shexp.weight"):
-			newShape := slices.Clone(shape)
-			if len(shape) == 2 {
-				if shape[0] == 1 && shape[1] > 1 {
-					newShape = []uint64{shape[1]}
-				} else if shape[1] == 1 && shape[0] > 1 {
-					newShape = []uint64{shape[0]}
-				}
+			if q.shouldReorderVHeads() {
+				t.SetRepacker(q.repackConv1D())
 			}
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    newShape,
-				WriterTo: t,
-			})
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: newShape, WriterTo: t})

 		default:
-			out = append(out, &ggml.Tensor{
-				Name:     name,
-				Kind:     t.Kind(),
-				Shape:    slices.Clone(shape),
-				WriterTo: t,
-			})
+			out = append(out, &ggml.Tensor{Name: name, Kind: t.Kind(), Shape: slices.Clone(shape), WriterTo: t})
 		}
 	}

 	return out
 }

+func (q *qwen3NextModel) repackReorderDim(dim, headDim int) Repacker {
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		if !q.shouldReorderVHeads() {
+			return data, nil
+		}
+		numK := int(q.LinearNumKeyHeads)
+		numVPerK := int(q.LinearNumValueHeads / q.LinearNumKeyHeads)
+		return reorderHeadLayout(data, shape, dim, numK, numVPerK, headDim)
+	}
+}
+
+func (q *qwen3NextModel) repackAttnQKV() Repacker {
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		if !q.shouldReorderVHeads() || len(shape) != 2 {
+			return data, nil
+		}
+
+		rows := int(shape[0])
+		cols := int(shape[1])
+		numK := int(q.LinearNumKeyHeads)
+		numV := int(q.LinearNumValueHeads)
+		headK := int(q.LinearKeyHeadDim)
+		headV := int(q.LinearValueHeadDim)
+		qDim := headK * numK
+		kDim := headK * numK
+		vDim := headV * numV
+		qkvDim := qDim + kDim + vDim
+
+		switch {
+		case rows == qkvDim:
+			// HF layout: [out_features, in_features]. Keep Q/K rows unchanged and
+			// reorder only V rows from grouped -> tiled head layout.
+			out := make([]float32, len(data))
+			qkRows := qDim + kDim
+			qkSize := qkRows * cols
+			copy(out[:qkSize], data[:qkSize])
+
+			vStart := qkSize
+			vEnd := vStart + vDim*cols
+			reorderedV, err := reorderHeadLayout(data[vStart:vEnd], []uint64{uint64(vDim), uint64(cols)}, 0, numK, numV/numK, headV)
+			if err != nil {
+				return nil, err
+			}
+			copy(out[vStart:vEnd], reorderedV)
+			copy(out[vEnd:], data[vEnd:])
+			return out, nil
+
+		case cols == qkvDim:
+			// Fallback for already-transposed [in_features, out_features] tensors.
+			out := make([]float32, len(data))
+			copy(out, data)
+			for r := range rows {
+				base := r * cols
+				vStart := base + qDim + kDim
+				vEnd := vStart + vDim
+				reorderedV, err := reorderHeadLayout(out[vStart:vEnd], []uint64{uint64(vDim)}, 0, numK, numV/numK, headV)
+				if err != nil {
+					return nil, err
+				}
+				copy(out[vStart:vEnd], reorderedV)
+			}
+			return out, nil
+
+		default:
+			return data, nil
+		}
+	}
+}
+
+func (q *qwen3NextModel) repackConv1D() Repacker {
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		if !q.shouldReorderVHeads() {
+			return data, nil
+		}
+
+		normShape := slices.Clone(shape)
+		if len(shape) == 3 {
+			if shape[0] == 1 {
+				normShape = []uint64{shape[1], shape[2]}
+			} else if shape[1] == 1 {
+				normShape = []uint64{shape[0], shape[2]}
+			}
+		}
+		if len(normShape) != 2 {
+			return data, nil
+		}
+
+		rows := int(normShape[0])
+		cols := int(normShape[1])
+		numK := int(q.LinearNumKeyHeads)
+		numV := int(q.LinearNumValueHeads)
+		headK := int(q.LinearKeyHeadDim)
+		headV := int(q.LinearValueHeadDim)
+		qkChannels := 2 * headK * numK
+		totalChannels := qkChannels + headV*numV
+		if qkChannels <= 0 {
+			return data, nil
+		}
+
+		switch {
+		case rows == totalChannels:
+			// HF layout after squeeze: [channels, kernel]
+			out := make([]float32, len(data))
+			prefix := qkChannels * cols
+			copy(out[:prefix], data[:prefix])
+			reorderedV, err := reorderHeadLayout(data[prefix:], []uint64{uint64(totalChannels - qkChannels), uint64(cols)}, 0, numK, numV/numK, headV)
+			if err != nil {
+				return nil, err
+			}
+			copy(out[prefix:], reorderedV)
+			return out, nil
+		case cols == totalChannels:
+			// Fallback for transposed [kernel, channels]
+			out := make([]float32, len(data))
+			copy(out, data)
+			vChannels := totalChannels - qkChannels
+			for r := range rows {
+				base := r * cols
+				vStart := base + qkChannels
+				vEnd := vStart + vChannels
+				reorderedV, err := reorderHeadLayout(out[vStart:vEnd], []uint64{uint64(vChannels)}, 0, numK, numV/numK, headV)
+				if err != nil {
+					return nil, err
+				}
+				copy(out[vStart:vEnd], reorderedV)
+			}
+			return out, nil
+		default:
+			return data, nil
+		}
+	}
+}
+
+func (q *qwen3NextModel) repackSSMA() Repacker {
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		result := make([]float32, len(data))
+		for i, v := range data {
+			result[i] = -float32(math.Exp(float64(v)))
+		}
+		if !q.shouldReorderVHeads() {
+			return result, nil
+		}
+		numK := int(q.LinearNumKeyHeads)
+		numVPerK := int(q.LinearNumValueHeads / q.LinearNumKeyHeads)
+		return reorderHeadLayout(result, shape, 0, numK, numVPerK, 1)
+	}
+}
+
+func reorderHeadLayout(data []float32, shape []uint64, dim int, numKHeads, numVPerK, headDim int) ([]float32, error) {
+	if len(shape) == 0 || numKHeads <= 0 || numVPerK <= 0 || headDim <= 0 {
+		return data, nil
+	}
+
+	dims := make([]int, len(shape))
+	for i := range shape {
+		dims[i] = int(shape[i])
+	}
+	if dim < 0 {
+		dim += len(dims)
+	}
+	if dim < 0 || dim >= len(dims) {
+		return data, nil
+	}
+
+	expected := numKHeads * numVPerK * headDim
+	if dims[dim] != expected {
+		return data, nil
+	}
+
+	newShape := make([]int, 0, len(dims)+2)
+	newShape = append(newShape, dims[:dim]...)
+	newShape = append(newShape, numKHeads, numVPerK, headDim)
+	newShape = append(newShape, dims[dim+1:]...)
+
+	var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+	if err := tt.Reshape(newShape...); err != nil {
+		return nil, err
+	}
+
+	perm := make([]int, len(newShape))
+	for i := range perm {
+		perm[i] = i
+	}
+	perm[dim], perm[dim+1] = perm[dim+1], perm[dim]
+
+	tt, err := tensor.Transpose(tt, perm...)
+	if err != nil {
+		return nil, err
+	}
+	tt = tensor.Materialize(tt)
+
+	total := 1
+	for _, d := range dims {
+		total *= d
+	}
+	if err := tt.Reshape(total); err != nil {
+		return nil, err
+	}
+	return native.VectorF32(tt.(*tensor.Dense))
+}
+
 type qkvzSplitSpec struct {
 	hidden    int
 	headKDim  int
@@ -369,7 +787,6 @@ func (q *qwen3NextModel) repackQKVZ(spec qkvzSplitSpec, extractGate bool) Repack
 		var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
 		var err error

-		// Convert to [hidden, out_features] layout for slicing
 		tt, err = tensor.Transpose(tt, 1, 0)
 		if err != nil {
 			return nil, err
@@ -444,7 +861,6 @@ func (q *qwen3NextModel) repackQKVZ(spec qkvzSplitSpec, extractGate bool) Repack
 	}
 }

-// addOne adds 1.0 to all elements in the tensor (for norm weights)
 func (*qwen3NextModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
 	n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
 	ones := tensor.Ones(tensor.Float32, int(shape[0]))
@@ -471,10 +887,21 @@ func (q *qwen3NextModel) Replacements() []string {
 	return []string{
 		// Embeddings and output
 		"lm_head", "output",
+		"model.language_model.embed_tokens", "token_embd",
+		"model.language_model.norm", "output_norm",
+		"model.language_model.layers", "blk",
 		"model.embed_tokens", "token_embd",
 		"model.norm", "output_norm",
 		"model.layers", "blk",

+		// Vision
+		"model.visual", "v",
+		"patch_embed.proj", "patch_embed",
+		"blocks", "blk",
+		"attn.qkv", "attn_qkv",
+		"attn.proj", "attn_out",
+		"deepstack_merger_list", "deepstack_merger",
+
 		// Layer norms
 		"input_layernorm", "attn_norm",
 		"post_attention_layernorm", "post_attention_norm",
@@ -487,9 +914,16 @@ func (q *qwen3NextModel) Replacements() []string {
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",

-		// Linear attention (Gated Delta Net)
+		// Linear attention (legacy qwen3next)
 		"linear_attn.in_proj_qkvz", "ssm_in",
 		"linear_attn.in_proj_ba", "ssm_ba",
+
+		// Linear attention (qwen35)
+		"linear_attn.in_proj_qkv", "attn_qkv",
+		"linear_attn.in_proj_z", "attn_gate",
+		"linear_attn.in_proj_a", "ssm_alpha",
+		"linear_attn.in_proj_b", "ssm_beta",
+
 		"linear_attn.conv1d", "ssm_conv1d",
 		"linear_attn.dt_bias", "ssm_dt",
 		"linear_attn.dt_proj", "ssm_dt",
@@ -497,14 +931,14 @@ func (q *qwen3NextModel) Replacements() []string {
 		"linear_attn.norm", "ssm_norm",
 		"linear_attn.out_proj", "ssm_out",

-		// MoE (experts are stacked via mergeTensors, not replaced here)
+		// MoE
 		"mlp.gate.weight", "ffn_gate_inp.weight",
 		"mlp.shared_expert.down_proj", "ffn_down_shexp",
 		"mlp.shared_expert.gate_proj", "ffn_gate_shexp",
 		"mlp.shared_expert.up_proj", "ffn_up_shexp",
 		"mlp.shared_expert_gate", "ffn_gate_inp_shexp",

-		// Dense FFN (if any layers use it)
+		// Dense FFN
 		"mlp.down_proj", "ffn_down",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.up_proj", "ffn_up",
--- a/convert/convert_qwen3next_test.go
+++ b/convert/convert_qwen3next_test.go
@@ -0,0 +1,563 @@
+package convert
+
+import (
+	"bytes"
+	"encoding/binary"
+	"os"
+	"slices"
+	"strings"
+	"testing"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+func boolPtr(v bool) *bool {
+	return &v
+}
+
+func readTensorData(t *testing.T, tensor *ggml.Tensor) []float32 {
+	t.Helper()
+
+	var b bytes.Buffer
+	if _, err := tensor.WriteTo(&b); err != nil {
+		t.Fatal(err)
+	}
+
+	numel := 1
+	for _, d := range tensor.Shape {
+		numel *= int(d)
+	}
+
+	values := make([]float32, numel)
+	if err := binary.Read(&b, binary.LittleEndian, &values); err != nil {
+		t.Fatal(err)
+	}
+
+	return values
+}
+
+func TestQwen3NextLegacyModelTypeDisablesReorder(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_next",
+		},
+	}
+
+	if m.shouldReorderVHeads() {
+		t.Fatalf("legacy qwen3_next model_type should not reorder v-head layout")
+	}
+}
+
+func TestQwen3NextLegacyArchitectureDisablesReorder(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			Architectures: []string{"Qwen3NextForCausalLM"},
+		},
+	}
+
+	if m.shouldReorderVHeads() {
+		t.Fatalf("legacy Qwen3Next architecture should not reorder v-head layout")
+	}
+}
+
+func TestQwen3NextKVLegacyConfig(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_next",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			MaxPositionEmbeddings: 8192,
+			HiddenSize:            512,
+			NumHiddenLayers:       4,
+			IntermediateSize:      2048,
+			NumAttentionHeads:     8,
+			NumKeyValueHeads:      2,
+			HeadDim:               64,
+			RopeTheta:             1_000_000,
+			RMSNormEPS:            1e-6,
+
+			NumExperts:             8,
+			NumExpertsPerToken:     2,
+			NormTopkProb:           boolPtr(true),
+			MoEIntermediateSize:    256,
+			SharedExpertIntermSize: 512,
+
+			FullAttentionInterval: 2,
+
+			LinearConvKernelDim: 4,
+			LinearKeyHeadDim:    64,
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearValueHeadDim:  64,
+
+			PartialRotaryFactor: 0.25,
+		},
+	}
+
+	if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
+		t.Fatal(err)
+	}
+
+	kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
+	if got, want := kv["general.architecture"], "qwen35moe"; got != want {
+		t.Fatalf("unexpected architecture: got %v want %v", got, want)
+	}
+	if got, want := kv["tokenizer.ggml.pre"], "qwen35"; got != want {
+		t.Fatalf("unexpected tokenizer pre: got %v want %v", got, want)
+	}
+
+	headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
+	if !ok {
+		t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
+	}
+	if got, want := headCountKV, []uint32{0, 2, 0, 2}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
+	}
+
+	if _, ok := kv["ssm.v_head_reordered"]; ok {
+		t.Fatalf("legacy qwen3next should not enable ssm.v_head_reordered")
+	}
+	if got, want := kv["norm_top_k_prob"], true; got != want {
+		t.Fatalf("unexpected norm_top_k_prob: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35MoeOmitsNormTopKProbWhenUnset(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			MaxPositionEmbeddings: 4096,
+			HiddenSize:            512,
+			NumHiddenLayers:       4,
+			IntermediateSize:      2048,
+			NumAttentionHeads:     8,
+			NumKeyValueHeads:      2,
+			HeadDim:               64,
+			RopeTheta:             1_000_000,
+			RMSNormEPS:            1e-6,
+			NumExperts:            8,
+			NumExpertsPerToken:    2,
+			FullAttentionInterval: 2,
+			LinearConvKernelDim:   4,
+			LinearKeyHeadDim:      64,
+			LinearNumKeyHeads:     2,
+			LinearNumValueHeads:   4,
+			LinearValueHeadDim:    64,
+			PartialRotaryFactor:   0.25,
+		},
+	}
+
+	if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
+		t.Fatal(err)
+	}
+
+	kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
+	if _, ok := kv["norm_top_k_prob"]; ok {
+		t.Fatalf("expected norm_top_k_prob to be omitted when not set in config")
+	}
+}
+
+func TestQwen35KVFromTextConfig(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		TextConfig: &qwen3NextTextConfig{
+			MaxPositionEmbeddings: 16384,
+			HiddenSize:            1024,
+			NumHiddenLayers:       4,
+			IntermediateSize:      4096,
+			NumAttentionHeads:     8,
+			NumKeyValueHeads:      4,
+			HeadDim:               128,
+			RMSNormEPS:            1e-6,
+
+			LayerTypes: []string{
+				"linear_attention",
+				"full_attention",
+				"linear_attention",
+				"full_attention",
+			},
+
+			LinearConvKernelDim: 4,
+			LinearKeyHeadDim:    128,
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearValueHeadDim:  128,
+
+			RopeParameters: qwen3NextRopeParams{
+				MRopeInterleaved:    true,
+				MropeSection:        []int32{11, 11, 10},
+				RopeType:            "default",
+				RopeTheta:           10_000_000,
+				PartialRotaryFactor: 0.25,
+			},
+		},
+		VisionModel: qwen3NextVisionConfig{
+			Depth:                  2,
+			HiddenSize:             128,
+			NumHeads:               4,
+			InChannels:             3,
+			PatchSize:              16,
+			SpatialMergeSize:       2,
+			RMSNormEps:             1e-6,
+			RopeTheta:              10_000,
+			TemporalPatchSize:      2,
+			DeepstackVisualIndexes: []int32{1},
+		},
+		ImageTokenID:       1001,
+		VisionStartTokenID: 1002,
+		VisionEndTokenID:   1003,
+	}
+	m.VisionModel.Size.ShortestEdge = 224
+	m.VisionModel.Size.LongestEdge = 4096
+	m.VisionModel.ImageMean = []float32{0.5, 0.5, 0.5}
+	m.VisionModel.ImageStd = []float32{0.2, 0.2, 0.2}
+
+	if err := m.parseMore(os.DirFS(t.TempDir())); err != nil {
+		t.Fatal(err)
+	}
+
+	kv := m.KV(&Tokenizer{Vocabulary: &Vocabulary{}})
+	if got, want := kv["general.architecture"], "qwen35"; got != want {
+		t.Fatalf("unexpected architecture: got %v want %v", got, want)
+	}
+
+	headCountKV, ok := kv["attention.head_count_kv"].([]uint32)
+	if !ok {
+		t.Fatalf("attention.head_count_kv has unexpected type: %T", kv["attention.head_count_kv"])
+	}
+	if got, want := headCountKV, []uint32{0, 4, 0, 4}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected attention.head_count_kv: got %v want %v", got, want)
+	}
+
+	if got, ok := kv["ssm.v_head_reordered"].(bool); !ok || !got {
+		t.Fatalf("expected ssm.v_head_reordered=true, got %v (%T)", kv["ssm.v_head_reordered"], kv["ssm.v_head_reordered"])
+	}
+
+	mrope, ok := kv["mrope_sections"].([]int32)
+	if !ok {
+		t.Fatalf("mrope_sections has unexpected type: %T", kv["mrope_sections"])
+	}
+	if got, want := mrope, []int32{11, 11, 10}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected mrope_sections: got %v want %v", got, want)
+	}
+	ropeSections, ok := kv["rope.dimension_sections"].([]int32)
+	if !ok {
+		t.Fatalf("rope.dimension_sections has unexpected type: %T", kv["rope.dimension_sections"])
+	}
+	if got, want := ropeSections, []int32{11, 11, 10}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected rope.dimension_sections: got %v want %v", got, want)
+	}
+
+	if got, ok := kv["rope.mrope_interleaved"].(bool); !ok || !got {
+		t.Fatalf("expected rope.mrope_interleaved=true, got %v (%T)", kv["rope.mrope_interleaved"], kv["rope.mrope_interleaved"])
+	}
+
+	if got, want := kv["vision.block_count"], uint32(2); got != want {
+		t.Fatalf("unexpected vision.block_count: got %v want %v", got, want)
+	}
+}
+
+func TestQwen3NextReplacements(t *testing.T) {
+	r := strings.NewReplacer((&qwen3NextModel{}).Replacements()...)
+
+	if got, want := r.Replace("model.language_model.layers.1.linear_attn.in_proj_qkv.weight"), "blk.1.attn_qkv.weight"; got != want {
+		t.Fatalf("unexpected language-model replacement: got %q want %q", got, want)
+	}
+	if got, want := r.Replace("model.visual.blocks.0.attn.qkv.weight"), "v.blk.0.attn_qkv.weight"; got != want {
+		t.Fatalf("unexpected vision replacement: got %q want %q", got, want)
+	}
+	if got, want := r.Replace("model.layers.1.linear_attn.in_proj_qkvz.weight"), "blk.1.ssm_in.weight"; got != want {
+		t.Fatalf("unexpected legacy replacement: got %q want %q", got, want)
+	}
+}
+
+func TestQwen35ReordersVHeads(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearValueHeadDim:  1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.attn_gate.weight",
+			shape: []uint64{4, 2},
+			data:  []float32{0, 1, 2, 3, 4, 5, 6, 7},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected data: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35ReordersAttnQKVOutputDim(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearKeyHeadDim:    1,
+			LinearValueHeadDim:  1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.attn_qkv.weight",
+			shape: []uint64{8, 2}, // [out_features, in_features] (HF layout)
+			data: []float32{
+				0, 1, // q0
+				2, 3, // q1
+				4, 5, // k0
+				6, 7, // k1
+				10, 11, // v(k0,v0)
+				12, 13, // v(k0,v1)
+				20, 21, // v(k1,v0)
+				22, 23, // v(k1,v1)
+			},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{
+		0, 1, 2, 3, 4, 5, 6, 7,
+		10, 11, 20, 21, 12, 13, 22, 23,
+	}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected qkv data: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35ReordersSsmOutInputDim(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearValueHeadDim:  1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.ssm_out.weight",
+			shape: []uint64{2, 4},
+			data:  []float32{0, 1, 2, 3, 4, 5, 6, 7},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{0, 2, 1, 3, 4, 6, 5, 7}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected ssm_out data: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35ReordersSsmBetaRows(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.ssm_beta.weight",
+			shape: []uint64{4, 2},
+			data:  []float32{0, 1, 2, 3, 4, 5, 6, 7},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{0, 1, 4, 5, 2, 3, 6, 7}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected ssm_beta data: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35ReordersConv1DChannelDim(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_5",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearKeyHeadDim:    1,
+			LinearValueHeadDim:  1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.ssm_conv1d.weight",
+			shape: []uint64{8, 2}, // [channels, kernel] after squeeze
+			data: []float32{
+				0, 1, // q0
+				2, 3, // q1
+				4, 5, // k0
+				6, 7, // k1
+				10, 11, // v(k0,v0)
+				12, 13, // v(k0,v1)
+				20, 21, // v(k1,v0)
+				22, 23, // v(k1,v1)
+			},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{
+		0, 1, 2, 3, 4, 5, 6, 7,
+		10, 11, 20, 21, 12, 13, 22, 23,
+	}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected conv1d data: got %v want %v", got, want)
+	}
+}
+
+func TestLegacyQwen3NextDoesNotReorderVHeads(t *testing.T) {
+	m := &qwen3NextModel{
+		ModelParameters: ModelParameters{
+			ModelType: "qwen3_next",
+		},
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			LinearNumKeyHeads:   2,
+			LinearNumValueHeads: 4,
+			LinearValueHeadDim:  1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.attn_gate.weight",
+			shape: []uint64{4, 1},
+			data:  []float32{0, 1, 2, 3},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := readTensorData(t, out[0]), []float32{0, 1, 2, 3}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected data for legacy qwen3next: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35MoePackedExperts(t *testing.T) {
+	m := &qwen3NextModel{
+		qwen3NextTextConfig: qwen3NextTextConfig{
+			NumHiddenLayers: 1,
+		},
+	}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.mlp.experts.gate_up_proj",
+			shape: []uint64{2, 4, 3},
+			data: []float32{
+				0, 1, 2,
+				3, 4, 5,
+				6, 7, 8,
+				9, 10, 11,
+				12, 13, 14,
+				15, 16, 17,
+				18, 19, 20,
+				21, 22, 23,
+			},
+		},
+		&fakeTensor{
+			name:  "blk.0.mlp.experts.down_proj",
+			shape: []uint64{2, 5, 3},
+			data:  make([]float32, 2*5*3),
+		},
+	})
+
+	get := func(name string) *ggml.Tensor {
+		for _, tensor := range out {
+			if tensor.Name == name {
+				return tensor
+			}
+		}
+		return nil
+	}
+
+	gate := get("blk.0.ffn_gate_exps.weight")
+	if gate == nil {
+		t.Fatalf("missing tensor %q", "blk.0.ffn_gate_exps.weight")
+	}
+	if got, want := gate.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected gate shape: got %v want %v", got, want)
+	}
+	if got, want := readTensorData(t, gate), []float32{
+		0, 1, 2, 3, 4, 5,
+		12, 13, 14, 15, 16, 17,
+	}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected gate values: got %v want %v", got, want)
+	}
+
+	up := get("blk.0.ffn_up_exps.weight")
+	if up == nil {
+		t.Fatalf("missing tensor %q", "blk.0.ffn_up_exps.weight")
+	}
+	if got, want := up.Shape, []uint64{2, 2, 3}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected up shape: got %v want %v", got, want)
+	}
+	if got, want := readTensorData(t, up), []float32{
+		6, 7, 8, 9, 10, 11,
+		18, 19, 20, 21, 22, 23,
+	}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected up values: got %v want %v", got, want)
+	}
+
+	down := get("blk.0.ffn_down_exps.weight")
+	if down == nil {
+		t.Fatalf("missing tensor %q", "blk.0.ffn_down_exps.weight")
+	}
+	if got, want := down.Shape, []uint64{2, 5, 3}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected down shape: got %v want %v", got, want)
+	}
+}
+
+func TestQwen35SharedExpertGateKeepsMatrixShape(t *testing.T) {
+	m := &qwen3NextModel{}
+
+	out := m.Tensors([]Tensor{
+		&fakeTensor{
+			name:  "blk.0.ffn_gate_inp_shexp.weight",
+			shape: []uint64{1, 4},
+			data:  []float32{0, 1, 2, 3},
+		},
+	})
+	if len(out) != 1 {
+		t.Fatalf("unexpected output tensor count: got %d want 1", len(out))
+	}
+
+	if got, want := out[0].Shape, []uint64{1, 4}; !slices.Equal(got, want) {
+		t.Fatalf("unexpected shared gate shape: got %v want %v", got, want)
+	}
+}
--- a/convert/json_compat.go
+++ b/convert/json_compat.go
@@ -0,0 +1,97 @@
+package convert
+
+// sanitizeNonFiniteJSON rewrites non-standard JSON numeric tokens that some
+// HF configs emit (Infinity, -Infinity, NaN) into standard JSON numbers.
+//
+// This is intentionally conservative:
+// - only runs outside quoted strings
+// - only rewrites full tokens
+//
+// We map these values to 0 because encoding/json rejects non-finite values,
+// and these fields are typically model-side metadata not consumed by the
+// converter.
+func sanitizeNonFiniteJSON(in []byte) []byte {
+	if len(in) == 0 {
+		return in
+	}
+
+	out := make([]byte, 0, len(in))
+	inString := false
+	escape := false
+
+	for i := 0; i < len(in); {
+		c := in[i]
+
+		if inString {
+			out = append(out, c)
+			if escape {
+				escape = false
+			} else if c == '\\' {
+				escape = true
+			} else if c == '"' {
+				inString = false
+			}
+			i++
+			continue
+		}
+
+		if c == '"' {
+			inString = true
+			out = append(out, c)
+			i++
+			continue
+		}
+
+		if hasToken(in, i, "-Infinity") {
+			out = append(out, '0')
+			i += len("-Infinity")
+			continue
+		}
+
+		if hasToken(in, i, "Infinity") {
+			out = append(out, '0')
+			i += len("Infinity")
+			continue
+		}
+
+		if hasToken(in, i, "NaN") {
+			out = append(out, '0')
+			i += len("NaN")
+			continue
+		}
+
+		out = append(out, c)
+		i++
+	}
+
+	return out
+}
+
+func hasToken(in []byte, at int, tok string) bool {
+	end := at + len(tok)
+	if at < 0 || end > len(in) {
+		return false
+	}
+	if string(in[at:end]) != tok {
+		return false
+	}
+	if at > 0 && !isJSONValuePrefixBoundary(in[at-1]) {
+		return false
+	}
+	if end < len(in) && !isJSONValueSuffixBoundary(in[end]) {
+		return false
+	}
+	return true
+}
+
+func isJSONWhitespace(b byte) bool {
+	return b == ' ' || b == '\t' || b == '\n' || b == '\r'
+}
+
+func isJSONValuePrefixBoundary(b byte) bool {
+	return isJSONWhitespace(b) || b == ':' || b == ',' || b == '['
+}
+
+func isJSONValueSuffixBoundary(b byte) bool {
+	return isJSONWhitespace(b) || b == ',' || b == ']' || b == '}'
+}
--- a/convert/json_compat_test.go
+++ b/convert/json_compat_test.go
@@ -0,0 +1,46 @@
+package convert
+
+import "testing"
+
+func TestSanitizeNonFiniteJSON(t *testing.T) {
+	tests := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{
+			name: "infinity token",
+			in:   `{"a":[0,Infinity,1]}`,
+			want: `{"a":[0,0,1]}`,
+		},
+		{
+			name: "negative infinity token",
+			in:   `{"a":-Infinity}`,
+			want: `{"a":0}`,
+		},
+		{
+			name: "nan token",
+			in:   `{"a":NaN}`,
+			want: `{"a":0}`,
+		},
+		{
+			name: "tokens inside strings untouched",
+			in:   `{"a":"Infinity -Infinity NaN","b":Infinity}`,
+			want: `{"a":"Infinity -Infinity NaN","b":0}`,
+		},
+		{
+			name: "identifier-like token untouched",
+			in:   `{"a":InfinityValue}`,
+			want: `{"a":InfinityValue}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := string(sanitizeNonFiniteJSON([]byte(tt.in)))
+			if got != tt.want {
+				t.Fatalf("sanitizeNonFiniteJSON() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -101,6 +101,8 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
 			t.Pre = "deepseek-coder"
 		case "1ff7f41064896984db5d1bb6ff64fa4bc29007d08c1b439e505b7392777a319e":
 			t.Pre = "qwen2"
+		case "00431aed57e696b747435f734d1e3b9b1bfd931a121fb5cac7129e97c181e9ba":
+			t.Pre = "qwen35"
 		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
 			// noop, empty pretokenizer
 		default:
@@ -212,8 +214,13 @@ type tokenizer struct {

 	PreTokenizer struct {
 		PreTokenizers []struct {
-			Type    string `json:"type"`
-			Pattern struct {
+			Type           string `json:"type"`
+			Behavior       string `json:"behavior"`
+			Invert         bool   `json:"invert"`
+			AddPrefixSpace bool   `json:"add_prefix_space"`
+			TrimOffsets    bool   `json:"trim_offsets"`
+			UseRegex       bool   `json:"use_regex"`
+			Pattern        struct {
 				Regex string `json:"Regex"`
 			} `json:"pattern"`
 		} `json:"pretokenizers"`
--- a/convert/tokenizer_test.go
+++ b/convert/tokenizer_test.go
@@ -191,6 +191,84 @@ func TestParseTokenizer(t *testing.T) {
 				Pre: "default",
 			},
 		},
+		{
+			name: "llama-bpe pretokenizer and control tokens",
+			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
+				"tokenizer.json": strings.NewReader(`{
+					"added_tokens": [
+						{"id": 1, "content": "<|startoftext|>", "special": true},
+						{"id": 6, "content": "<|im_start|>", "special": true},
+						{"id": 7, "content": "<|im_end|>", "special": true},
+						{"id": 8, "content": "<|tool_list_start|>", "special": true},
+						{"id": 9, "content": "<|tool_list_end|>", "special": true},
+						{"id": 10, "content": "<|tool_call_start|>", "special": true},
+						{"id": 11, "content": "<|tool_call_end|>", "special": true},
+						{"id": 12, "content": "<|tool_response_start|>", "special": true},
+						{"id": 13, "content": "<|tool_response_end|>", "special": true},
+						{"id": 396, "content": "<image>", "special": true},
+						{"id": 64400, "content": "<think>", "special": true},
+						{"id": 64401, "content": "</think>", "special": true}
+					],
+					"model": {
+						"vocab": {
+							"<|startoftext|>": 1,
+							"<|im_start|>": 6,
+							"<|im_end|>": 7,
+							"<|tool_list_start|>": 8,
+							"<|tool_list_end|>": 9,
+							"<|tool_call_start|>": 10,
+							"<|tool_call_end|>": 11,
+							"<|tool_response_start|>": 12,
+							"<|tool_response_end|>": 13,
+							"<image>": 396,
+							"<think>": 64400,
+							"</think>": 64401
+						}
+					},
+					"pre_tokenizer": {
+						"type": "Sequence",
+						"pretokenizers": [
+							{
+								"type": "Split",
+								"pattern": {
+									"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+								},
+								"behavior": "Isolated",
+								"invert": false
+							},
+							{
+								"type": "ByteLevel",
+								"add_prefix_space": false,
+								"trim_offsets": true,
+								"use_regex": false
+							}
+						]
+					}
+				}`),
+			}),
+			want: &Tokenizer{
+				Vocabulary: &Vocabulary{
+					Model: "gpt2",
+					Tokens: []string{
+						"<|startoftext|>",
+						"<|im_start|>",
+						"<|im_end|>",
+						"<|tool_list_start|>",
+						"<|tool_list_end|>",
+						"<|tool_call_start|>",
+						"<|tool_call_end|>",
+						"<|tool_response_start|>",
+						"<|tool_response_end|>",
+						"<image>",
+						"<think>",
+						"</think>",
+					},
+					Scores: []float32{1, 6, 7, 8, 9, 10, 11, 12, 13, 396, 64400, 64401},
+					Types:  []int32{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
+				},
+				Pre: "llama-bpe",
+			},
+		},
 		{
 			name: "list string merges",
 			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
@@ -308,6 +386,28 @@ func TestParseTokenizer(t *testing.T) {
 				Pre: "default",
 			},
 		},
+		{
+			name: "qwen35 pretokenizer",
+			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
+				"tokenizer.json": strings.NewReader(`{
+					"pre_tokenizer": {
+						"type": "Sequence",
+						"pretokenizers": [
+							{
+								"type": "Split",
+								"pattern": {
+									"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+								}
+							}
+						]
+					}
+				}`),
+			}),
+			want: &Tokenizer{
+				Vocabulary: &Vocabulary{Model: "gpt2"},
+				Pre:        "qwen35",
+			},
+		},
 	}

 	for _, tt := range cases {
--- a/docs/cloud.mdx
+++ b/docs/cloud.mdx
@@ -226,3 +226,7 @@ curl https://ollama.com/api/chat \

  </Tab>
 </Tabs>
+
+## Local only
+
+Ollama can run in local-only mode by [disabling Ollama's cloud](./faq#how-do-i-disable-ollama-cloud) features.
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -106,20 +106,23 @@
            "group": "Integrations",
            "pages": [
              "/integrations/index",
+              {
+                "group": "Assistants",
+                "expanded": true,
+                "pages": [
+                  "/integrations/openclaw"
+                ]
+              },
              {
                "group": "Coding",
+                "expanded": true,
                "pages": [
                  "/integrations/claude-code",
                  "/integrations/codex",
                  "/integrations/opencode",
                  "/integrations/droid",
-                  "/integrations/goose"
-                ]
-              },
-              {
-                "group": "Assistants",
-                "pages": [
-                  "/integrations/openclaw"
+                  "/integrations/goose",
+                  "/integrations/pi"
                ]
              },
              {
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -160,6 +160,26 @@ docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-

 Ollama runs locally. We don't see your prompts or data when you run locally. When using cloud-hosted models, we process your prompts and responses to provide the service but do not store or log that content and never train on it. We collect basic account info and limited usage metadata to provide the service that does not include prompt or response content. We don't sell your data. You can delete your account anytime.

+## How do I disable Ollama's cloud features?
+
+Ollama can run in local only mode by disabling Ollama's cloud features. By turning off Ollama's cloud features, you will lose the ability to use Ollama's cloud models and web search. 
+
+Set `disable_ollama_cloud` in `~/.ollama/server.json`:
+
+```json
+{
+  "disable_ollama_cloud": true
+}
+```
+
+You can also set the environment variable:
+
+```shell
+OLLAMA_NO_CLOUD=1
+```
+
+Restart Ollama after changing configuration. Once disabled, Ollama's logs will show `Ollama cloud disabled: true`. 
+
 ## How can I expose Ollama on my network?

 Ollama binds 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable.
--- a/docs/integrations/index.mdx
+++ b/docs/integrations/index.mdx
@@ -13,6 +13,7 @@ Coding assistants that can read, modify, and execute code in your projects.
 - [OpenCode](/integrations/opencode)
 - [Droid](/integrations/droid)
 - [Goose](/integrations/goose)
+- [Pi](/integrations/pi)

 ## Assistants

--- a/docs/integrations/openclaw.mdx
+++ b/docs/integrations/openclaw.mdx
@@ -4,47 +4,65 @@ title: OpenClaw

 OpenClaw is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.

-## Install
-
-Install [OpenClaw](https://openclaw.ai/) 
-
-```bash
-npm install -g openclaw@latest
-```
-
-Then run the onboarding wizard:
-
-```bash
-openclaw onboard --install-daemon
-```
-
-<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
-
-## Usage with Ollama
-
-### Quick setup
+## Quick start

 ```bash
 ollama launch openclaw
 ```

+Ollama handles everything automatically:
+
+1. **Install** — If OpenClaw isn't installed, Ollama prompts to install it via npm
+2. **Security** — On the first launch, a security notice explains the risks of tool access
+3. **Model** — Pick a model from the selector (local or cloud)
+4. **Onboarding** — Ollama configures the provider, installs the gateway daemon, and sets your model as the primary
+5. **Gateway** — Starts in the background and opens the OpenClaw TUI
+
+<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens if using local models. See [Context length](/context-length) for more information.</Note>
+
 <Note>Previously known as Clawdbot. `ollama launch clawdbot` still works as an alias.</Note>

-This configures OpenClaw to use Ollama and starts the gateway.
-If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 
+## Configure without launching

+To change the model without starting the gateway and TUI:

-To configure without launching:
-
-```shell
+```bash
 ollama launch openclaw --config
 ```

-## Recommended Models
+To use a specific model directly:

- `qwen3-coder`
- `glm-4.7`
- `gpt-oss:20b`
- `gpt-oss:120b`
+```bash
+ollama launch openclaw --model kimi-k2.5:cloud
+```
+
+If the gateway is already running, it restarts automatically to pick up the new model.
+
+## Recommended models
+
+**Cloud models**:
+
+- `kimi-k2.5:cloud` — Multimodal reasoning with subagents
+- `minimax-m2.5:cloud` — Fast, efficient coding and real-world productivity
+- `glm-5:cloud` — Reasoning and code generation
+
+**Local models:**
+
+- `glm-4.7-flash` — Reasoning and code generation locally (~25 GB VRAM)
+
+More models at [ollama.com/search](https://ollama.com/search?c=cloud).
+
+## Connect messaging apps
+
+```bash
+openclaw configure --section channels
+```
+
+Link WhatsApp, Telegram, Slack, Discord, or iMessage to chat with your local models from anywhere.
+
+## Stopping the gateway
+
+```bash
+openclaw gateway stop
+```

-Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
--- a/docs/integrations/pi.mdx
+++ b/docs/integrations/pi.mdx
@@ -0,0 +1,57 @@
+---
+title: Pi
+---
+
+Pi is a minimal AI agent toolkit with plugin support.
+
+## Install
+
+Install [Pi](https://github.com/badlogic/pi-mono):
+
+```bash
+npm install -g @mariozechner/pi-coding-agent
+```
+
+## Usage with Ollama
+
+### Quick setup
+
+```bash
+ollama launch pi
+```
+
+To configure without launching:
+
+```shell
+ollama launch pi --config
+```
+
+### Manual setup
+
+Add a configuration block to `~/.pi/agent/models.json`:
+
+```json
+{
+  "providers": {
+    "ollama": {
+      "baseUrl": "http://localhost:11434/v1",
+      "api": "openai-completions",
+      "apiKey": "ollama",
+      "models": [
+        {
+          "id": "qwen3-coder"
+        }
+      ]
+    }
+  }
+}
+```
+
+Update `~/.pi/agent/settings.json` to set the default provider:
+
+```json
+{
+  "defaultProvider": "ollama",
+  "defaultModel": "qwen3-coder"
+}
+```
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -27,9 +27,17 @@ The menu provides quick access to:
 - **Launch tools** - Claude Code, Codex, OpenClaw, and more
 - **Additional integrations** - Available under "More..."

+## Assistants
+
+Launch [OpenClaw](/integrations/openclaw), a personal AI with 100+ skills:
+
+```sh
+ollama launch openclaw
+```
+
 ## Coding

-Launch coding tools with Ollama models:
+Launch [Claude Code](/integrations/claude-code) and other coding tools with Ollama models:

 ```sh
 ollama launch claude
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -1,6 +1,8 @@
 package envconfig

 import (
+	"encoding/json"
+	"errors"
 	"fmt"
 	"log/slog"
 	"math"
@@ -11,6 +13,7 @@ import (
 	"runtime"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 )

@@ -206,6 +209,8 @@ var (
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
 	EnableVulkan = Bool("OLLAMA_VULKAN")
+	// NoCloudEnv checks the OLLAMA_NO_CLOUD environment variable.
+	NoCloudEnv = Bool("OLLAMA_NO_CLOUD")
 )

 func String(s string) func() string {
@@ -285,6 +290,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
 		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
 		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_NO_CLOUD":          {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
 		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
@@ -334,3 +340,91 @@ func Values() map[string]string {
 func Var(key string) string {
 	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
 }
+
+// serverConfigData holds the parsed fields from ~/.ollama/server.json.
+type serverConfigData struct {
+	DisableOllamaCloud bool `json:"disable_ollama_cloud,omitempty"`
+}
+
+var (
+	serverCfgMu     sync.RWMutex
+	serverCfgLoaded bool
+	serverCfg       serverConfigData
+)
+
+func loadServerConfig() {
+	serverCfgMu.RLock()
+	if serverCfgLoaded {
+		serverCfgMu.RUnlock()
+		return
+	}
+	serverCfgMu.RUnlock()
+
+	cfg := serverConfigData{}
+	home, err := os.UserHomeDir()
+	if err == nil {
+		path := filepath.Join(home, ".ollama", "server.json")
+		data, err := os.ReadFile(path)
+		if err != nil {
+			if !errors.Is(err, os.ErrNotExist) {
+				slog.Debug("envconfig: could not read server config", "error", err)
+			}
+		} else if err := json.Unmarshal(data, &cfg); err != nil {
+			slog.Debug("envconfig: could not parse server config", "error", err)
+		}
+	}
+
+	serverCfgMu.Lock()
+	defer serverCfgMu.Unlock()
+	if serverCfgLoaded {
+		return
+	}
+	serverCfg = cfg
+	serverCfgLoaded = true
+}
+
+func cachedServerConfig() serverConfigData {
+	serverCfgMu.RLock()
+	defer serverCfgMu.RUnlock()
+	return serverCfg
+}
+
+// ReloadServerConfig refreshes the cached ~/.ollama/server.json settings.
+func ReloadServerConfig() {
+	serverCfgMu.Lock()
+	serverCfgLoaded = false
+	serverCfg = serverConfigData{}
+	serverCfgMu.Unlock()
+
+	loadServerConfig()
+}
+
+// NoCloud returns true if Ollama cloud features are disabled,
+// checking both the OLLAMA_NO_CLOUD environment variable and
+// the disable_ollama_cloud field in ~/.ollama/server.json.
+func NoCloud() bool {
+	if NoCloudEnv() {
+		return true
+	}
+	loadServerConfig()
+	return cachedServerConfig().DisableOllamaCloud
+}
+
+// NoCloudSource returns the source of the cloud-disabled decision.
+// Returns "none", "env", "config", or "both".
+func NoCloudSource() string {
+	envDisabled := NoCloudEnv()
+	loadServerConfig()
+	configDisabled := cachedServerConfig().DisableOllamaCloud
+
+	switch {
+	case envDisabled && configDisabled:
+		return "both"
+	case envDisabled:
+		return "env"
+	case configDisabled:
+		return "config"
+	default:
+		return "none"
+	}
+}
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -3,6 +3,8 @@ package envconfig
 import (
 	"log/slog"
 	"math"
+	"os"
+	"path/filepath"
 	"testing"
 	"time"

@@ -326,3 +328,81 @@ func TestLogLevel(t *testing.T) {
 		})
 	}
 }
+
+func TestNoCloud(t *testing.T) {
+	tests := []struct {
+		name          string
+		envValue      string
+		configContent string
+		wantDisabled  bool
+		wantSource    string
+	}{
+		{
+			name:         "neither env nor config",
+			wantDisabled: false,
+			wantSource:   "none",
+		},
+		{
+			name:         "env only",
+			envValue:     "1",
+			wantDisabled: true,
+			wantSource:   "env",
+		},
+		{
+			name:          "config only",
+			configContent: `{"disable_ollama_cloud": true}`,
+			wantDisabled:  true,
+			wantSource:    "config",
+		},
+		{
+			name:          "both env and config",
+			envValue:      "1",
+			configContent: `{"disable_ollama_cloud": true}`,
+			wantDisabled:  true,
+			wantSource:    "both",
+		},
+		{
+			name:          "config false",
+			configContent: `{"disable_ollama_cloud": false}`,
+			wantDisabled:  false,
+			wantSource:    "none",
+		},
+		{
+			name:          "invalid config ignored",
+			configContent: `{invalid json`,
+			wantDisabled:  false,
+			wantSource:    "none",
+		},
+		{
+			name:         "no config file",
+			wantDisabled: false,
+			wantSource:   "none",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			home := t.TempDir()
+			if tt.configContent != "" {
+				configDir := filepath.Join(home, ".ollama")
+				if err := os.MkdirAll(configDir, 0o755); err != nil {
+					t.Fatal(err)
+				}
+				if err := os.WriteFile(filepath.Join(configDir, "server.json"), []byte(tt.configContent), 0o644); err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			setTestHome(t, home)
+			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
+
+			if got := NoCloud(); got != tt.wantDisabled {
+				t.Errorf("NoCloud() = %v, want %v", got, tt.wantDisabled)
+			}
+
+			if got := NoCloudSource(); got != tt.wantSource {
+				t.Errorf("NoCloudSource() = %q, want %q", got, tt.wantSource)
+			}
+		})
+	}
+}
--- a/envconfig/test_home_test.go
+++ b/envconfig/test_home_test.go
@@ -0,0 +1,10 @@
+package envconfig
+
+import "testing"
+
+func setTestHome(t *testing.T, home string) {
+	t.Helper()
+	t.Setenv("HOME", home)
+	t.Setenv("USERPROFILE", home)
+	ReloadServerConfig()
+}
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -160,6 +160,27 @@ func (kv KV) SSMGroupCount() uint64 {
 	return uint64(kv.Uint("ssm.group_count"))
 }

+func (kv KV) FFNLength() []uint64 {
+	ffnLengthDefault := uint32(0)
+	ffnLength := kv.UintOrArrayValueAsArray("feed_forward_length", ffnLengthDefault)
+	if len(ffnLength) == 1 {
+		ffnLengthDefault = ffnLength[0]
+	}
+	nLayers := int(kv.BlockCount())
+	if len(ffnLength) > nLayers {
+		slog.Warn("got more elements of feed_forward_length than layers", "len(ffnLength)", len(ffnLength), "layers", nLayers)
+	}
+	out := make([]uint64, nLayers)
+	for i := range nLayers {
+		if i >= len(ffnLength) {
+			out[i] = uint64(ffnLengthDefault)
+		} else {
+			out[i] = uint64(ffnLength[i])
+		}
+	}
+	return out
+}
+
 // general types

 func (kv KV) String(key string, defaultValue ...string) string {
@@ -264,15 +285,18 @@ func (kv KV) OllamaEngineRequired() bool {
 		"llama4",
 		"mistral3",
 		"mllama",
+		"nemotron_h", "nemotron_h_moe",
 		"nomic-bert",
 		"olmo3",
 		"qwen25vl",
 		"qwen3", "qwen3moe",
+		"qwen35", "qwen35moe",
 		"qwen3next",
 		"qwen3vl", "qwen3vlmoe",
 		"glm4moelite",
 		"glmocr",
 		"lfm2",
+		"lfm2moe",
 	}, kv.Architecture())
 }

@@ -845,7 +869,12 @@ func (f GGML) SupportsFlashAttention() bool {
 		return false
 	}

-	if arch := f.KV().Architecture(); slices.Contains([]string{"gemma2"}, arch) {
+	arch := f.KV().Architecture()
+	if slices.Contains([]string{"qwen35", "qwen35moe", "qwen3next"}, arch) {
+		return true
+	}
+
+	if slices.Contains([]string{"gemma2"}, arch) {
 		return false
 	}

@@ -864,9 +893,12 @@ func (f GGML) FlashAttention() bool {
 		"glmocr",
 		"gptoss", "gpt-oss",
 		"lfm2",
+		"lfm2moe",
 		"mistral3",
+		"nemotron_h", "nemotron_h_moe",
 		"olmo3",
 		"qwen3", "qwen3moe",
+		"qwen35", "qwen35moe",
 		"qwen3next",
 		"qwen3vl", "qwen3vlmoe",
 	}, f.KV().String("general.architecture"))
--- a/fs/ggml/gguf.go
+++ b/fs/ggml/gguf.go
@@ -245,7 +245,22 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 	padding := ggufPadding(offset, int64(alignment))
 	llm.tensorOffset = uint64(offset + padding)

+	// get file size to validate tensor bounds
+	fileSize, err := rs.Seek(0, io.SeekEnd)
+	if err != nil {
+		return fmt.Errorf("failed to determine file size: %w", err)
+	}
+
+	if _, err := rs.Seek(offset, io.SeekStart); err != nil {
+		return fmt.Errorf("failed to seek back after size check: %w", err)
+	}
+
 	for _, tensor := range llm.tensors {
+		tensorEnd := llm.tensorOffset + tensor.Offset + tensor.Size()
+		if tensorEnd > uint64(fileSize) {
+			return fmt.Errorf("tensor %q offset+size (%d) exceeds file size (%d)", tensor.Name, tensorEnd, fileSize)
+		}
+
 		offset, err := rs.Seek(0, io.SeekCurrent)
 		if err != nil {
 			return fmt.Errorf("failed to get current offset: %w", err)
--- a/fs/ggml/gguf_test.go
+++ b/fs/ggml/gguf_test.go
@@ -11,21 +11,21 @@ import (
 )

 func TestWriteGGUF(t *testing.T) {
-	b := bytes.NewBuffer(make([]byte, 2*3))
+	tensorData := make([]byte, 2*3*4) // 6 F32 elements = 24 bytes
 	for range 8 {
 		t.Run("shuffle", func(t *testing.T) {
 			t.Parallel()

 			ts := []*Tensor{
-				{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: b},
-				{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: b},
-				{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: b},
+				{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.0.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.1.ffn_up.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.2.ffn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.1.ffn_down.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "blk.0.attn_k.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)},
+				{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewReader(tensorData)},
 			}

 			rand.Shuffle(len(ts), func(i, j int) {
@@ -98,4 +98,32 @@ func TestWriteGGUF(t *testing.T) {
 			}
 		})
 	}
+
+	t.Run("truncated_tensor_data", func(t *testing.T) {
+		t.Parallel()
+
+		ts := []*Tensor{
+			{Name: "blk.0.attn.weight", Kind: 0, Shape: []uint64{512, 2}, WriterTo: bytes.NewBuffer(make([]byte, 32))},
+		}
+
+		w, err := os.CreateTemp(t.TempDir(), "truncated_*.bin")
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer w.Close()
+
+		if err := WriteGGUF(w, KV{"general.architecture": "test"}, ts); err != nil {
+			t.Fatal(err)
+		}
+
+		r, err := os.Open(w.Name())
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer r.Close()
+
+		if _, err := Decode(r, -1); err == nil {
+			t.Error("Decode should reject GGUF files where tensor data extends beyond file size")
+		}
+	})
 }
--- a/go.mod
+++ b/go.mod
@@ -26,6 +26,7 @@ require (
 	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
 	github.com/dlclark/regexp2 v1.11.4
 	github.com/emirpasic/gods/v2 v2.0.0-alpha
+	github.com/klauspost/compress v1.18.3
 	github.com/mattn/go-runewidth v0.0.16
 	github.com/nlpodyssey/gopickle v0.3.0
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
--- a/go.sum
+++ b/go.sum
@@ -122,7 +122,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS
 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
-github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
 github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
@@ -150,8 +149,9 @@ github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+
 github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
 github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
+github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
 github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
 github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
 github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
--- a/internal/cloud/policy.go
+++ b/internal/cloud/policy.go
@@ -0,0 +1,25 @@
+package cloud
+
+import (
+	"github.com/ollama/ollama/envconfig"
+)
+
+const DisabledMessagePrefix = "ollama cloud is disabled"
+
+// Status returns whether cloud is disabled and the source of the decision.
+// Source is one of: "none", "env", "config", "both".
+func Status() (disabled bool, source string) {
+	return envconfig.NoCloud(), envconfig.NoCloudSource()
+}
+
+func Disabled() bool {
+	return envconfig.NoCloud()
+}
+
+func DisabledError(operation string) string {
+	if operation == "" {
+		return DisabledMessagePrefix
+	}
+
+	return DisabledMessagePrefix + ": " + operation
+}
--- a/internal/cloud/policy_test.go
+++ b/internal/cloud/policy_test.go
@@ -0,0 +1,85 @@
+package cloud
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestStatus(t *testing.T) {
+	tests := []struct {
+		name          string
+		envValue      string
+		configContent string
+		disabled      bool
+		source        string
+	}{
+		{
+			name:     "none",
+			disabled: false,
+			source:   "none",
+		},
+		{
+			name:     "env only",
+			envValue: "1",
+			disabled: true,
+			source:   "env",
+		},
+		{
+			name:          "config only",
+			configContent: `{"disable_ollama_cloud": true}`,
+			disabled:      true,
+			source:        "config",
+		},
+		{
+			name:          "both",
+			envValue:      "1",
+			configContent: `{"disable_ollama_cloud": true}`,
+			disabled:      true,
+			source:        "both",
+		},
+		{
+			name:          "invalid config ignored",
+			configContent: `{invalid json`,
+			disabled:      false,
+			source:        "none",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			home := t.TempDir()
+			if tt.configContent != "" {
+				configPath := filepath.Join(home, ".ollama", "server.json")
+				if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+					t.Fatal(err)
+				}
+				if err := os.WriteFile(configPath, []byte(tt.configContent), 0o644); err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			setTestHome(t, home)
+			t.Setenv("OLLAMA_NO_CLOUD", tt.envValue)
+
+			disabled, source := Status()
+			if disabled != tt.disabled {
+				t.Fatalf("disabled: expected %v, got %v", tt.disabled, disabled)
+			}
+			if source != tt.source {
+				t.Fatalf("source: expected %q, got %q", tt.source, source)
+			}
+		})
+	}
+}
+
+func TestDisabledError(t *testing.T) {
+	if got := DisabledError(""); got != DisabledMessagePrefix {
+		t.Fatalf("expected %q, got %q", DisabledMessagePrefix, got)
+	}
+
+	want := DisabledMessagePrefix + ": remote inference is unavailable"
+	if got := DisabledError("remote inference is unavailable"); got != want {
+		t.Fatalf("expected %q, got %q", want, got)
+	}
+}
--- a/internal/cloud/test_home_test.go
+++ b/internal/cloud/test_home_test.go
@@ -0,0 +1,14 @@
+package cloud
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+func setTestHome(t *testing.T, home string) {
+	t.Helper()
+	t.Setenv("HOME", home)
+	t.Setenv("USERPROFILE", home)
+	envconfig.ReloadServerConfig()
+}
--- a/kvcache/recurrent.go
+++ b/kvcache/recurrent.go
@@ -0,0 +1,752 @@
+package kvcache
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"slices"
+
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/model/input"
+)
+
+const (
+	DefaultCheckpointCount    = 24
+	DefaultCheckpointMinPos   = int32(16)
+	DefaultCheckpointInterval = int32(1664)
+)
+
+var ErrInvalidRecurrentShape = errors.New("kvcache: invalid recurrent state shape")
+
+// Config configures a shared hybrid recurrent cache.
+type RecurrentConfig struct {
+	Shift               func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error)
+	ConvDim             int
+	ConvChannels        int
+	RecurrentStateSize  int
+	CheckpointLogPrefix string
+}
+
+var (
+	_ Cache           = (*Recurrent)(nil)
+	_ CheckpointCache = (*Recurrent)(nil)
+)
+
+// Cache stores:
+// - a standard causal KV cache
+// - per-sequence conv state for recurrent operators
+// - per-sequence recurrent state for recurrent operators
+//
+// Conv state shape (per layer, per sequence): [convDim, convChannels]
+// Recurrent state shape (per layer, per sequence): [recurrentStateSize]
+type Recurrent struct {
+	kv *Causal
+
+	backend      ml.Backend
+	dtype        ml.DType
+	maxSequences int
+
+	// Conv state dimensions
+	convDim      int
+	convChannels int
+
+	// Recurrent state dimensions
+	recurrentStateSize int
+
+	logPrefix string
+
+	// slot mapping for recurrent state (copy-on-write)
+	slotForSeq  map[int]int
+	refCount    []int
+	freeSlots   []int
+	seqCounts   map[int]int
+	slotScratch [1]int32
+
+	// per-layer conv state buffers (allocated lazily)
+	convCtxs   map[int]ml.Context
+	convStates map[int]ml.Tensor // [convDim*convChannels, maxSlots]
+
+	// per-layer recurrent state buffers (allocated lazily)
+	recurrentCtxs   map[int]ml.Context
+	recurrentStates map[int]ml.Tensor // [recurrentStateSize, maxSlots]
+
+	// recurrent checkpoints (per slot)
+	checkpointCount     int
+	checkpointMinPos    int32
+	checkpointInterval  int32
+	checkpointCtxSize   int
+	checkpoints         map[int]*slotCheckpointStore
+	pendingRestore      map[int]checkpointRestore
+	curCheckpointPos    []int32
+	curCheckpointSlots  map[int]int
+	reserveCheckpoints  bool
+	checkpointConvCtxs  map[int]ml.Context
+	checkpointRecurCtxs map[int]ml.Context
+	checkpointReserved  map[int]struct{}
+
+	// current forward batch (derived in StartForward)
+	curSeqs       []int
+	curSlots      []int
+	curSlotsInput ml.Tensor
+	curSeqTokens  int
+
+	// track if EnsureWritable has been called for this forward pass
+	writableEnsured bool
+	writableError   error
+}
+
+func NewRecurrentCache(config RecurrentConfig) *Recurrent {
+	return &Recurrent{
+		kv:                  NewCausalCache(config.Shift),
+		convDim:             config.ConvDim,
+		convChannels:        config.ConvChannels,
+		recurrentStateSize:  config.RecurrentStateSize,
+		logPrefix:           config.CheckpointLogPrefix,
+		slotForSeq:          make(map[int]int),
+		seqCounts:           make(map[int]int),
+		convCtxs:            make(map[int]ml.Context),
+		convStates:          make(map[int]ml.Tensor),
+		recurrentCtxs:       make(map[int]ml.Context),
+		recurrentStates:     make(map[int]ml.Tensor),
+		checkpointCount:     DefaultCheckpointCount,
+		checkpointMinPos:    DefaultCheckpointMinPos,
+		checkpointInterval:  DefaultCheckpointInterval,
+		checkpoints:         make(map[int]*slotCheckpointStore),
+		pendingRestore:      make(map[int]checkpointRestore),
+		curCheckpointSlots:  make(map[int]int),
+		checkpointConvCtxs:  make(map[int]ml.Context),
+		checkpointRecurCtxs: make(map[int]ml.Context),
+		checkpointReserved:  make(map[int]struct{}),
+	}
+}
+
+func (c *Recurrent) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
+	c.backend = backend
+	c.dtype = dtype
+	c.maxSequences = maxSequences
+	c.checkpoints = make(map[int]*slotCheckpointStore)
+	c.pendingRestore = make(map[int]checkpointRestore)
+	c.curCheckpointPos = c.curCheckpointPos[:0]
+	c.curCheckpointSlots = make(map[int]int)
+	c.checkpointReserved = make(map[int]struct{})
+	c.checkpointCtxSize = c.checkpointCount * c.maxSequences
+	if c.checkpointCtxSize < 8 {
+		c.checkpointCtxSize = 8
+	}
+
+	// initialize slot allocator
+	c.refCount = make([]int, maxSequences)
+	c.freeSlots = c.freeSlots[:0]
+	for i := maxSequences - 1; i >= 0; i-- {
+		c.freeSlots = append(c.freeSlots, i)
+	}
+
+	c.kv.Init(backend, dtype, maxSequences, capacity, maxBatch)
+}
+
+func (c *Recurrent) Close() {
+	for _, ctx := range c.convCtxs {
+		ctx.Close()
+	}
+	for _, ctx := range c.recurrentCtxs {
+		ctx.Close()
+	}
+	for _, ctx := range c.checkpointConvCtxs {
+		ctx.Close()
+	}
+	for _, ctx := range c.checkpointRecurCtxs {
+		ctx.Close()
+	}
+	c.kv.Close()
+}
+
+func (c *Recurrent) SetConfig(config ml.CacheConfig) {
+	c.kv.SetConfig(config)
+}
+
+func (c *Recurrent) SetLayer(layer int) {
+	c.kv.SetLayer(layer)
+}
+
+func (c *Recurrent) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
+	return c.kv.Get(ctx)
+}
+
+func (c *Recurrent) Put(ctx ml.Context, key, value ml.Tensor) {
+	c.kv.Put(ctx, key, value)
+}
+
+func (c *Recurrent) StartForward(ctx ml.Context, batch input.Batch, reserve bool) error {
+	if err := c.kv.StartForward(ctx, batch, reserve); err != nil {
+		return err
+	}
+
+	nTokens := len(batch.Sequences)
+	if nTokens == 0 {
+		c.curSeqs = c.curSeqs[:0]
+		c.curSlots = c.curSlots[:0]
+		c.curSlotsInput = nil
+		c.curSeqTokens = 0
+		c.reserveCheckpoints = false
+		c.writableEnsured = false
+		c.writableError = nil
+		return nil
+	}
+
+	// Fast path for single-sequence batches (common during decode and prefill).
+	firstSeq := batch.Sequences[0]
+	singleSeq := true
+	for _, s := range batch.Sequences[1:] {
+		if s != firstSeq {
+			singleSeq = false
+			break
+		}
+	}
+	if singleSeq {
+		return c.startForwardSingleSeq(ctx, firstSeq, nTokens, batch, reserve)
+	}
+
+	// Derive equal-length sequence layout for recurrent layers.
+	seqCounts := c.seqCounts
+	for s := range seqCounts {
+		delete(seqCounts, s)
+	}
+
+	c.curSeqs = c.curSeqs[:0]
+	for _, s := range batch.Sequences {
+		if seqCounts[s] == 0 {
+			c.curSeqs = append(c.curSeqs, s)
+		}
+		seqCounts[s]++
+	}
+
+	nSeqs := len(c.curSeqs)
+	want := nTokens / nSeqs
+	for _, s := range c.curSeqs {
+		if seqCounts[s] != want {
+			return ErrNotSupported
+		}
+	}
+
+	c.curSeqTokens = want
+
+	if reserve {
+		c.curSlots = c.curSlots[:0]
+		for i := range nSeqs {
+			c.curSlots = append(c.curSlots, i)
+		}
+		c.finalizeStartForward(ctx, batch, true)
+		return nil
+	}
+
+	// Ensure slots exist for sequences in this batch.
+	c.curSlots = c.curSlots[:0]
+	var newSlots []int
+	for _, s := range c.curSeqs {
+		slot, ok := c.slotForSeq[s]
+		if !ok {
+			var err error
+			slot, err = c.allocSlot()
+			if err != nil {
+				return err
+			}
+			c.slotForSeq[s] = slot
+			c.refCount[slot] = 1
+			newSlots = append(newSlots, slot)
+		}
+		c.curSlots = append(c.curSlots, slot)
+	}
+
+	if len(newSlots) > 0 {
+		c.zeroSlots(ctx, newSlots)
+	}
+
+	c.finalizeStartForward(ctx, batch, false)
+
+	return nil
+}
+
+func (c *Recurrent) startForwardSingleSeq(ctx ml.Context, seq, seqTokens int, batch input.Batch, reserve bool) error {
+	c.curSeqs = append(c.curSeqs[:0], seq)
+	c.curSeqTokens = seqTokens
+
+	if reserve {
+		c.curSlots = append(c.curSlots[:0], 0)
+		c.finalizeStartForward(ctx, batch, true)
+		return nil
+	}
+
+	slot, ok := c.slotForSeq[seq]
+	if !ok {
+		var err error
+		slot, err = c.allocSlot()
+		if err != nil {
+			return err
+		}
+
+		c.slotForSeq[seq] = slot
+		c.refCount[slot] = 1
+		slotList := [1]int{slot}
+		c.zeroSlots(ctx, slotList[:])
+	}
+
+	c.curSlots = append(c.curSlots[:0], slot)
+	c.finalizeStartForward(ctx, batch, false)
+
+	return nil
+}
+
+func (c *Recurrent) finalizeStartForward(ctx ml.Context, batch input.Batch, reserve bool) {
+	c.setCurSlotsInput(ctx)
+	c.writableEnsured = false
+	c.writableError = nil
+	c.reserveCheckpoints = reserve
+	c.planCheckpoints(batch)
+}
+
+func (c *Recurrent) setCurSlotsInput(ctx ml.Context) {
+	c.curSlotsInput = c.slotsInput(ctx, c.curSlots)
+}
+
+func (c *Recurrent) slotsInput(ctx ml.Context, slots []int) ml.Tensor {
+	switch len(slots) {
+	case 0:
+		return nil
+	case 1:
+		c.slotScratch[0] = int32(slots[0])
+		return ctx.Input().FromInts(c.slotScratch[:], 1)
+	default:
+		slotIndices := make([]int32, len(slots))
+		for i, v := range slots {
+			slotIndices[i] = int32(v)
+		}
+		return ctx.Input().FromInts(slotIndices, len(slotIndices))
+	}
+}
+
+func (c *Recurrent) allocSlot() (int, error) {
+	if len(c.freeSlots) == 0 {
+		return 0, ErrKvCacheFull
+	}
+	slot := c.freeSlots[len(c.freeSlots)-1]
+	c.freeSlots = c.freeSlots[:len(c.freeSlots)-1]
+	return slot, nil
+}
+
+func (c *Recurrent) freeSlot(slot int) {
+	if slot >= 0 && slot < c.maxSequences {
+		c.freeSlots = append(c.freeSlots, slot)
+	}
+}
+
+// zeroSlots zeros recurrent state for the given slots across all cached layers.
+func (c *Recurrent) zeroSlots(ctx ml.Context, slots []int) {
+	if len(slots) == 0 {
+		return
+	}
+
+	inputCtx := ctx.Input()
+	slotsTensor := c.slotsInput(ctx, slots)
+
+	if len(c.convStates) > 0 {
+		zeros := inputCtx.Zeros(ml.DTypeF32, c.convDim*c.convChannels, len(slots))
+		for _, buf := range c.convStates {
+			ctx.Forward(buf.SetRows(ctx, zeros, slotsTensor))
+		}
+	}
+
+	if len(c.recurrentStates) > 0 {
+		zeros := inputCtx.Zeros(ml.DTypeF32, c.recurrentStateSize, len(slots))
+		for _, buf := range c.recurrentStates {
+			ctx.Forward(buf.SetRows(ctx, zeros, slotsTensor))
+		}
+	}
+}
+
+// EnsureWritable ensures sequences have private slots (copy-on-write).
+func (c *Recurrent) EnsureWritable(ctx ml.Context) error {
+	for i, seq := range c.curSeqs {
+		slot, ok := c.slotForSeq[seq]
+		if !ok {
+			continue
+		}
+
+		if slot < 0 || slot >= len(c.refCount) {
+			continue
+		}
+
+		if c.refCount[slot] <= 1 {
+			continue
+		}
+
+		newSlot, err := c.allocSlot()
+		if err != nil {
+			return err
+		}
+		c.refCount[slot]--
+		c.refCount[newSlot] = 1
+		c.slotForSeq[seq] = newSlot
+		c.curSlots[i] = newSlot
+
+		c.copyRecurrentState(ctx, slot, newSlot)
+		c.copyCheckpoints(ctx, slot, newSlot)
+	}
+
+	c.setCurSlotsInput(ctx)
+
+	return nil
+}
+
+func (c *Recurrent) copyRecurrentState(ctx ml.Context, srcSlot, dstSlot int) {
+	src := ctx.Input().FromInts([]int32{int32(srcSlot)}, 1)
+	dst := ctx.Input().FromInts([]int32{int32(dstSlot)}, 1)
+
+	for _, buf := range c.convStates {
+		rows := buf.Rows(ctx, src)
+		if rows.DType() != ml.DTypeF32 {
+			rows = rows.Cast(ctx, ml.DTypeF32)
+		}
+		ctx.Forward(buf.SetRows(ctx, rows, dst))
+	}
+
+	for _, buf := range c.recurrentStates {
+		rows := buf.Rows(ctx, src)
+		if rows.DType() != ml.DTypeF32 {
+			rows = rows.Cast(ctx, ml.DTypeF32)
+		}
+		ctx.Forward(buf.SetRows(ctx, rows, dst))
+	}
+}
+
+func (c *Recurrent) CopyPrefix(srcSeq, dstSeq int, prefixLen int32) {
+	c.kv.CopyPrefix(srcSeq, dstSeq, prefixLen)
+
+	if dstSlot, ok := c.slotForSeq[dstSeq]; ok {
+		if c.validSlot(dstSlot) {
+			c.refCount[dstSlot]--
+			if c.refCount[dstSlot] <= 0 {
+				c.refCount[dstSlot] = 0
+				c.freeSlot(dstSlot)
+			}
+		}
+		delete(c.slotForSeq, dstSeq)
+	}
+
+	srcSlot, ok := c.slotForSeq[srcSeq]
+	if !ok {
+		return
+	}
+
+	if c.validSlot(srcSlot) {
+		c.slotForSeq[dstSeq] = srcSlot
+		c.refCount[srcSlot]++
+	}
+}
+
+func (c *Recurrent) CanResume(seq int, pos int32) bool {
+	if !c.kv.CanResume(seq, pos) {
+		return false
+	}
+	if pos == 0 {
+		return true
+	}
+	return c.hasCheckpoint(seq, pos)
+}
+
+func (c *Recurrent) Remove(seq int, beginIndex, endIndex int32) error {
+	if beginIndex > 0 && endIndex != math.MaxInt32 {
+		if err := c.kv.Remove(seq, beginIndex, endIndex); err != nil {
+			return err
+		}
+		delete(c.pendingRestore, seq)
+
+		slot, ok := c.slotForSeq[seq]
+		if !ok || !c.validSlot(slot) {
+			return nil
+		}
+
+		// Detach shared recurrent state/checkpoints before mutating checkpoint positions.
+		if c.refCount[slot] > 1 {
+			newSlot, err := c.allocSlot()
+			if err != nil {
+				return err
+			}
+			ctx := c.backend.NewContext()
+			c.copyRecurrentState(ctx, slot, newSlot)
+			c.copyCheckpoints(ctx, slot, newSlot)
+			if len(c.convStates) > 0 || len(c.recurrentStates) > 0 {
+				ctx.Compute()
+			}
+			ctx.Close()
+
+			c.refCount[slot]--
+			c.refCount[newSlot] = 1
+			c.slotForSeq[seq] = newSlot
+			slot = newSlot
+		}
+
+		c.shiftCheckpoints(slot, beginIndex, endIndex)
+		return nil
+	}
+
+	if beginIndex > 0 {
+		restore, ok := c.pendingRestore[seq]
+		if !ok || restore.pos+1 != beginIndex {
+			return ErrNotSupported
+		}
+		if !c.restoreComplete(restore) {
+			return ErrNotSupported
+		}
+		if slot, ok := c.slotForSeq[seq]; ok && c.validSlot(slot) && c.refCount[slot] > 1 {
+			newSlot, err := c.allocSlot()
+			if err != nil {
+				return err
+			}
+			ctx := c.backend.NewContext()
+			c.copyRecurrentState(ctx, slot, newSlot)
+			c.copyCheckpoints(ctx, slot, newSlot)
+			if len(c.convStates) > 0 || len(c.recurrentStates) > 0 {
+				ctx.Compute()
+			}
+			ctx.Close()
+
+			c.refCount[slot]--
+			c.refCount[newSlot] = 1
+			c.slotForSeq[seq] = newSlot
+
+			restore.slot = newSlot
+			c.pendingRestore[seq] = restore
+		}
+	}
+
+	if err := c.kv.Remove(seq, beginIndex, endIndex); err != nil {
+		return err
+	}
+
+	if beginIndex > 0 {
+		restore := c.pendingRestore[seq]
+		delete(c.pendingRestore, seq)
+		return c.applyCheckpointRestore(restore)
+	}
+
+	slot, ok := c.slotForSeq[seq]
+	delete(c.pendingRestore, seq)
+	if !ok {
+		return nil
+	}
+
+	if !c.validSlot(slot) {
+		delete(c.slotForSeq, seq)
+		return nil
+	}
+
+	c.refCount[slot]--
+	if c.refCount[slot] <= 0 {
+		c.refCount[slot] = 0
+		c.clearCheckpoints(slot)
+		c.freeSlot(slot)
+	}
+	delete(c.slotForSeq, seq)
+
+	return nil
+}
+
+func (c *Recurrent) validSlot(slot int) bool {
+	return slot >= 0 && slot < len(c.refCount)
+}
+
+func (c *Recurrent) SlotsTensor() ml.Tensor {
+	return c.curSlotsInput
+}
+
+// contiguousSlots returns the starting slot if current slots are contiguous and ordered.
+func (c *Recurrent) contiguousSlots() (int, bool) {
+	if len(c.curSlots) == 0 {
+		return 0, false
+	}
+	start := c.curSlots[0]
+	for i, s := range c.curSlots {
+		if s != start+i {
+			return 0, false
+		}
+	}
+	return start, true
+}
+
+func (c *Recurrent) SeqTokens() int {
+	return c.curSeqTokens
+}
+
+func (c *Recurrent) NumSeqs() int {
+	return len(c.curSeqs)
+}
+
+func (c *Recurrent) convBuffer(layer int) ml.Tensor {
+	if buf, ok := c.convStates[layer]; ok {
+		return buf
+	}
+
+	if _, ok := c.convCtxs[layer]; !ok {
+		c.convCtxs[layer] = c.backend.NewContextSize(1).Layer(layer)
+	}
+
+	buf := c.convCtxs[layer].Zeros(ml.DTypeF32, c.convDim*c.convChannels, c.maxSequences)
+	c.convStates[layer] = buf
+	return buf
+}
+
+func (c *Recurrent) recurrentBuffer(layer int) ml.Tensor {
+	if buf, ok := c.recurrentStates[layer]; ok {
+		return buf
+	}
+
+	if _, ok := c.recurrentCtxs[layer]; !ok {
+		c.recurrentCtxs[layer] = c.backend.NewContextSize(1).Layer(layer)
+	}
+
+	buf := c.recurrentCtxs[layer].Zeros(ml.DTypeF32, c.recurrentStateSize, c.maxSequences)
+	c.recurrentStates[layer] = buf
+	return buf
+}
+
+func (c *Recurrent) ensureWritable(ctx ml.Context) error {
+	c.ensureWritableOnce(ctx)
+	return c.writableError
+}
+
+func (c *Recurrent) currentSlotRows(ctx ml.Context, buf ml.Tensor, rowSize int) ml.Tensor {
+	if start, ok := c.contiguousSlots(); ok {
+		offset := start * buf.Stride(1)
+		return buf.View(ctx, offset, rowSize, buf.Stride(1), c.NumSeqs())
+	}
+
+	return buf.Rows(ctx, c.SlotsTensor())
+}
+
+func (c *Recurrent) writeCurrentSlotRows(ctx ml.Context, buf ml.Tensor, rowSize int, src ml.Tensor) {
+	if start, ok := c.contiguousSlots(); ok {
+		offset := start * buf.Stride(1)
+		view := buf.View(ctx, offset, rowSize, buf.Stride(1), c.NumSeqs())
+		ctx.Forward(src.Copy(ctx, view))
+		return
+	}
+
+	ctx.Forward(buf.SetRows(ctx, src, c.SlotsTensor()))
+}
+
+func (c *Recurrent) ensureWritableOnce(ctx ml.Context) {
+	if !c.writableEnsured {
+		needsWritable := false
+		for _, seq := range c.curSeqs {
+			slot, ok := c.slotForSeq[seq]
+			if !ok {
+				continue
+			}
+			if slot >= 0 && slot < len(c.refCount) && c.refCount[slot] > 1 {
+				needsWritable = true
+				break
+			}
+		}
+
+		if needsWritable {
+			if err := c.EnsureWritable(ctx); err != nil {
+				c.writableError = err
+			}
+		}
+		c.writableEnsured = true
+	}
+}
+
+// ConvState returns conv state for current batch sequences as [convDim, convChannels, nSeqs].
+func (c *Recurrent) ConvState(ctx ml.Context, layer int) (ml.Tensor, error) {
+	if err := c.ensureWritable(ctx); err != nil {
+		return nil, err
+	}
+
+	buf := c.convBuffer(layer)
+	cur := c.currentSlotRows(ctx, buf, c.convDim*c.convChannels)
+	return cur.Reshape(ctx, c.convDim, c.convChannels, c.NumSeqs()), nil
+}
+
+// UpdateConvState writes new conv state for current batch sequences.
+func (c *Recurrent) UpdateConvState(ctx ml.Context, layer int, newState ml.Tensor) {
+	buf := c.convBuffer(layer)
+	src := newState.Reshape(ctx, c.convDim*c.convChannels, c.NumSeqs())
+	srcF32 := src
+	if src.DType() != ml.DTypeF32 {
+		srcF32 = src.Cast(ctx, ml.DTypeF32)
+	}
+	c.writeCurrentSlotRows(ctx, buf, c.convDim*c.convChannels, srcF32)
+
+	c.captureConvCheckpoint(ctx, layer, srcF32)
+}
+
+// RecurrentState returns recurrent state for current batch sequences with shape [dims..., nSeqs].
+func (c *Recurrent) RecurrentState(ctx ml.Context, layer int, dims ...int) (ml.Tensor, error) {
+	if err := c.ensureWritable(ctx); err != nil {
+		return nil, err
+	}
+	if len(dims) == 0 {
+		return nil, ErrInvalidRecurrentShape
+	}
+
+	size := 1
+	for _, d := range dims {
+		if d <= 0 {
+			return nil, ErrInvalidRecurrentShape
+		}
+		size *= d
+	}
+	if size != c.recurrentStateSize {
+		return nil, fmt.Errorf("%w: got %v (size %d), want size %d", ErrInvalidRecurrentShape, dims, size, c.recurrentStateSize)
+	}
+
+	buf := c.recurrentBuffer(layer)
+	cur := c.currentSlotRows(ctx, buf, c.recurrentStateSize)
+	shape := make([]int, 0, len(dims)+1)
+	shape = append(shape, dims...)
+	shape = append(shape, c.NumSeqs())
+	return cur.Reshape(ctx, shape...), nil
+}
+
+// RecurrentState4D returns recurrent state as [dim0, dim1, dim2, nSeqs].
+func (c *Recurrent) RecurrentState4D(ctx ml.Context, layer int, dim0, dim1, dim2 int) (ml.Tensor, error) {
+	if err := c.ensureWritable(ctx); err != nil {
+		return nil, err
+	}
+	if dim0 <= 0 || dim1 <= 0 || dim2 <= 0 {
+		return nil, ErrInvalidRecurrentShape
+	}
+
+	size := dim0 * dim1 * dim2
+	if size != c.recurrentStateSize {
+		return nil, fmt.Errorf("%w: got [%d %d %d] (size %d), want size %d", ErrInvalidRecurrentShape, dim0, dim1, dim2, size, c.recurrentStateSize)
+	}
+
+	buf := c.recurrentBuffer(layer)
+	cur := c.currentSlotRows(ctx, buf, c.recurrentStateSize)
+	return cur.Reshape(ctx, dim0, dim1, dim2, c.NumSeqs()), nil
+}
+
+// UpdateRecurrentState writes new recurrent state for current batch sequences.
+func (c *Recurrent) UpdateRecurrentState(ctx ml.Context, layer int, newState ml.Tensor) {
+	buf := c.recurrentBuffer(layer)
+	src := newState.Reshape(ctx, c.recurrentStateSize, c.NumSeqs())
+	srcF32 := src
+	if src.DType() != ml.DTypeF32 {
+		srcF32 = src.Cast(ctx, ml.DTypeF32)
+	}
+	c.writeCurrentSlotRows(ctx, buf, c.recurrentStateSize, srcF32)
+
+	c.captureRecurrentCheckpoint(ctx, layer, srcF32)
+}
+
+// IsSupportedForBatch returns true if the current batch layout supports recurrent layers.
+func (c *Recurrent) IsSupportedForBatch() bool {
+	return c.curSeqTokens > 0 && len(c.curSeqs) > 0
+}
+
+// Seqs returns the ordered unique sequences for the current forward pass.
+func (c *Recurrent) Seqs() []int {
+	return slices.Clone(c.curSeqs)
+}
--- a/model/models/qwen3next/checkpoints.go
+++ b/model/models/qwen3next/checkpoints.go
@@ -1,27 +1,20 @@
-package qwen3next
+package kvcache

 import (
 	"log/slog"
 	"math"

-	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model/input"
 )

-const (
-	checkpointCountDefault    = 32
-	checkpointMinPosDefault   = int32(16)
-	checkpointIntervalDefault = int32(1280)
-)
-
 // TODO(jmorganca): Add byte-serialized host-RAM checkpoints to reduce GPU
 // memory usage while preserving prefix reuse for recurrent state.

 type checkpointEntry struct {
-	pos   int32
-	conv  map[int]ml.Tensor
-	delta map[int]ml.Tensor
+	pos       int32
+	conv      map[int]ml.Tensor
+	recurrent map[int]ml.Tensor
 }

 type slotCheckpointStore struct {
@@ -132,6 +125,63 @@ func (s *slotCheckpointStore) pruneAfter(pos int32) {
 	s.lastPos = pos
 }

+func (s *slotCheckpointStore) shiftRange(beginIndex, endIndex int32) {
+	if len(s.entries) == 0 {
+		s.size = 0
+		s.next = 0
+		s.lastPos = -1
+		return
+	}
+
+	offset := beginIndex - endIndex
+
+	size := 0
+	next := -1
+	minPos := int32(math.MaxInt32)
+	maxPos := int32(-1)
+	minIdx := 0
+
+	for i := range s.entries {
+		pos := s.entries[i].pos
+		if pos >= 0 {
+			if pos >= beginIndex && pos < endIndex {
+				s.entries[i].pos = -1
+			} else if pos >= endIndex {
+				s.entries[i].pos = pos + offset
+			}
+		}
+
+		pos = s.entries[i].pos
+		if pos >= 0 {
+			size++
+			if pos < minPos {
+				minPos = pos
+				minIdx = i
+			}
+			if pos > maxPos {
+				maxPos = pos
+			}
+		} else if next == -1 {
+			next = i
+		}
+	}
+
+	s.size = size
+	if size == 0 {
+		s.next = 0
+		s.lastPos = -1
+		return
+	}
+
+	if next != -1 {
+		s.next = next
+	} else {
+		// Full ring: overwrite the oldest checkpoint next.
+		s.next = minIdx
+	}
+	s.lastPos = maxPos
+}
+
 func (s *slotCheckpointStore) window() (size int, minPos, maxPos, lastPos int32) {
 	minPos = int32(math.MaxInt32)
 	maxPos = int32(-1)
@@ -155,7 +205,14 @@ func (s *slotCheckpointStore) window() (size int, minPos, maxPos, lastPos int32)
 	return size, minPos, maxPos, s.lastPos
 }

-func (c *HybridCache) planCheckpoints(batch input.Batch) {
+func (c *Recurrent) checkpointTag() string {
+	if c.logPrefix == "" {
+		return "kvcache.recurrent"
+	}
+	return c.logPrefix
+}
+
+func (c *Recurrent) planCheckpoints(batch input.Batch) {
 	if c.checkpointCount == 0 || len(c.curSeqs) == 0 {
 		c.curCheckpointPos = c.curCheckpointPos[:0]
 		for k := range c.curCheckpointSlots {
@@ -201,7 +258,7 @@ func (c *HybridCache) planCheckpoints(batch input.Batch) {
 	}
 }

-func (c *HybridCache) checkpointStore(slot int) *slotCheckpointStore {
+func (c *Recurrent) checkpointStore(slot int) *slotCheckpointStore {
 	store, ok := c.checkpoints[slot]
 	if ok {
 		return store
@@ -211,7 +268,7 @@ func (c *HybridCache) checkpointStore(slot int) *slotCheckpointStore {
 	return store
 }

-func (c *HybridCache) checkpointIndexForSlot(slot int, pos int32) int {
+func (c *Recurrent) checkpointIndexForSlot(slot int, pos int32) int {
 	if c.checkpointCount == 0 {
 		return -1
 	}
@@ -226,7 +283,7 @@ func (c *HybridCache) checkpointIndexForSlot(slot int, pos int32) int {
 	return idx
 }

-func (c *HybridCache) hasCheckpoint(seq int, pos int32) bool {
+func (c *Recurrent) hasCheckpoint(seq int, pos int32) bool {
 	if pos <= 0 {
 		return false
 	}
@@ -242,7 +299,7 @@ func (c *HybridCache) hasCheckpoint(seq int, pos int32) bool {
 	return ok
 }

-func (c *HybridCache) PrepareRestore(seq int, targetPos int32) (int32, bool) {
+func (c *Recurrent) PrepareRestore(seq int, targetPos int32) (int32, bool) {
 	if targetPos <= 0 {
 		return 0, false
 	}
@@ -252,13 +309,13 @@ func (c *HybridCache) PrepareRestore(seq int, targetPos int32) (int32, bool) {
 	}
 	store, ok := c.checkpoints[slot]
 	if !ok {
-		slog.Debug("qwen3next: checkpoint miss", "seq", seq, "slot", slot, "target", targetPos, "size", 0)
+		slog.Debug(c.checkpointTag()+": checkpoint miss", "seq", seq, "slot", slot, "target", targetPos, "size", 0)
 		return 0, false
 	}
 	idx, pos, ok := store.bestIndex(targetPos)
 	if !ok {
 		size, minPos, maxPos, lastPos := store.window()
-		slog.Debug("qwen3next: checkpoint miss", "seq", seq, "slot", slot, "target", targetPos, "size", size,
+		slog.Debug(c.checkpointTag()+": checkpoint miss", "seq", seq, "slot", slot, "target", targetPos, "size", size,
 			"min", minPos, "max", maxPos, "last", lastPos)
 		return 0, false
 	}
@@ -270,10 +327,10 @@ func (c *HybridCache) PrepareRestore(seq int, targetPos int32) (int32, bool) {
 	return pos + 1, true
 }

-func (c *HybridCache) applyCheckpointRestore(restore checkpointRestore) error {
+func (c *Recurrent) applyCheckpointRestore(restore checkpointRestore) error {
 	entry, ok := c.restoreEntry(restore)
 	if !ok {
-		return kvcache.ErrNotSupported
+		return ErrNotSupported
 	}

 	ctx := c.backend.NewContext()
@@ -281,15 +338,15 @@ func (c *HybridCache) applyCheckpointRestore(restore checkpointRestore) error {

 	slotIdx := ctx.Input().FromInts([]int32{int32(restore.slot)}, 1)
 	for layer, src := range entry.conv {
-		buf := c.convBuffer(ctx, layer)
+		buf := c.convBuffer(layer)
 		ctx.Forward(buf.SetRows(ctx, src, slotIdx))
 	}
-	for layer, src := range entry.delta {
-		buf := c.deltaBuffer(ctx, layer)
+	for layer, src := range entry.recurrent {
+		buf := c.recurrentBuffer(layer)
 		ctx.Forward(buf.SetRows(ctx, src, slotIdx))
 	}

-	if len(entry.conv) > 0 || len(entry.delta) > 0 {
+	if len(entry.conv) > 0 || len(entry.recurrent) > 0 {
 		ctx.Compute()
 	}
 	store := c.checkpoints[restore.slot]
@@ -297,12 +354,12 @@ func (c *HybridCache) applyCheckpointRestore(restore checkpointRestore) error {
 	return nil
 }

-func (c *HybridCache) restoreComplete(restore checkpointRestore) bool {
+func (c *Recurrent) restoreComplete(restore checkpointRestore) bool {
 	_, ok := c.restoreEntry(restore)
 	return ok
 }

-func (c *HybridCache) restoreEntry(restore checkpointRestore) (*checkpointEntry, bool) {
+func (c *Recurrent) restoreEntry(restore checkpointRestore) (*checkpointEntry, bool) {
 	store, ok := c.checkpoints[restore.slot]
 	if !ok || restore.idx < 0 || restore.idx >= len(store.entries) {
 		return nil, false
@@ -317,27 +374,33 @@ func (c *HybridCache) restoreEntry(restore checkpointRestore) (*checkpointEntry,
 	return entry, true
 }

-func (c *HybridCache) entryComplete(entry *checkpointEntry) bool {
+func (c *Recurrent) entryComplete(entry *checkpointEntry) bool {
 	for layer := range c.convStates {
 		if entry.conv == nil || entry.conv[layer] == nil {
 			return false
 		}
 	}
-	for layer := range c.deltaStates {
-		if entry.delta == nil || entry.delta[layer] == nil {
+	for layer := range c.recurrentStates {
+		if entry.recurrent == nil || entry.recurrent[layer] == nil {
 			return false
 		}
 	}
 	return true
 }

-func (c *HybridCache) clearCheckpoints(slot int) {
+func (c *Recurrent) clearCheckpoints(slot int) {
 	if store, ok := c.checkpoints[slot]; ok {
 		store.reset()
 	}
 }

-func (c *HybridCache) copyCheckpoints(ctx ml.Context, srcSlot, dstSlot int) {
+func (c *Recurrent) shiftCheckpoints(slot int, beginIndex, endIndex int32) {
+	if store, ok := c.checkpoints[slot]; ok {
+		store.shiftRange(beginIndex, endIndex)
+	}
+}
+
+func (c *Recurrent) copyCheckpoints(ctx ml.Context, srcSlot, dstSlot int) {
 	if c.checkpointCount == 0 {
 		return
 	}
@@ -363,19 +426,19 @@ func (c *HybridCache) copyCheckpoints(ctx ml.Context, srcSlot, dstSlot int) {
 				ctx.Forward(src.Copy(ctx, dst))
 			}
 		}
-		if srcEntry.delta != nil {
-			if dstEntry.delta == nil {
-				dstEntry.delta = make(map[int]ml.Tensor)
+		if srcEntry.recurrent != nil {
+			if dstEntry.recurrent == nil {
+				dstEntry.recurrent = make(map[int]ml.Tensor)
 			}
-			for layer, src := range srcEntry.delta {
-				dst := c.ensureCheckpointDelta(layer, dstEntry)
+			for layer, src := range srcEntry.recurrent {
+				dst := c.ensureCheckpointRecurrent(layer, dstEntry)
 				ctx.Forward(src.Copy(ctx, dst))
 			}
 		}
 	}
 }

-func (c *HybridCache) captureConvCheckpoint(ctx ml.Context, layer int, src ml.Tensor) {
+func (c *Recurrent) captureConvCheckpoint(ctx ml.Context, layer int, src ml.Tensor) {
 	if c.checkpointCount == 0 {
 		return
 	}
@@ -402,12 +465,12 @@ func (c *HybridCache) captureConvCheckpoint(ctx ml.Context, layer int, src ml.Te
 	}
 }

-func (c *HybridCache) captureDeltaCheckpoint(ctx ml.Context, layer int, src ml.Tensor) {
+func (c *Recurrent) captureRecurrentCheckpoint(ctx ml.Context, layer int, src ml.Tensor) {
 	if c.checkpointCount == 0 {
 		return
 	}
 	if c.reserveCheckpoints {
-		c.reserveCheckpointDelta(layer)
+		c.reserveCheckpointRecurrent(layer)
 		return
 	}
 	if len(c.curCheckpointPos) == 0 {
@@ -423,13 +486,13 @@ func (c *HybridCache) captureDeltaCheckpoint(ctx ml.Context, layer int, src ml.T
 			continue
 		}
 		entry := &c.checkpoints[slot].entries[idx]
-		dst := c.ensureCheckpointDelta(layer, entry)
+		dst := c.ensureCheckpointRecurrent(layer, entry)
 		seqSlice := src.Slice(ctx, 1, i, i+1, 1)
 		ctx.Forward(seqSlice.Copy(ctx, dst))
 	}
 }

-func (c *HybridCache) ensureCheckpointConv(layer int, entry *checkpointEntry) ml.Tensor {
+func (c *Recurrent) ensureCheckpointConv(layer int, entry *checkpointEntry) ml.Tensor {
 	if entry.conv == nil {
 		entry.conv = make(map[int]ml.Tensor)
 	}
@@ -446,24 +509,24 @@ func (c *HybridCache) ensureCheckpointConv(layer int, entry *checkpointEntry) ml
 	return t
 }

-func (c *HybridCache) ensureCheckpointDelta(layer int, entry *checkpointEntry) ml.Tensor {
-	if entry.delta == nil {
-		entry.delta = make(map[int]ml.Tensor)
+func (c *Recurrent) ensureCheckpointRecurrent(layer int, entry *checkpointEntry) ml.Tensor {
+	if entry.recurrent == nil {
+		entry.recurrent = make(map[int]ml.Tensor)
 	}
-	if t, ok := entry.delta[layer]; ok {
+	if t, ok := entry.recurrent[layer]; ok {
 		return t
 	}
-	ctx, ok := c.checkpointDeltaCtxs[layer]
+	ctx, ok := c.checkpointRecurCtxs[layer]
 	if !ok {
 		ctx = c.backend.NewContextSize(c.checkpointCtxSize).Layer(layer)
-		c.checkpointDeltaCtxs[layer] = ctx
+		c.checkpointRecurCtxs[layer] = ctx
 	}
-	t := ctx.Zeros(ml.DTypeF32, c.deltaStateSize, 1)
-	entry.delta[layer] = t
+	t := ctx.Zeros(ml.DTypeF32, c.recurrentStateSize, 1)
+	entry.recurrent[layer] = t
 	return t
 }

-func (c *HybridCache) reserveCheckpointConv(layer int) {
+func (c *Recurrent) reserveCheckpointConv(layer int) {
 	key := checkpointReserveKey(layer, 0)
 	if _, ok := c.checkpointReserved[key]; ok {
 		return
@@ -478,7 +541,7 @@ func (c *HybridCache) reserveCheckpointConv(layer int) {
 	c.checkpointReserved[key] = struct{}{}
 }

-func (c *HybridCache) reserveCheckpointDelta(layer int) {
+func (c *Recurrent) reserveCheckpointRecurrent(layer int) {
 	key := checkpointReserveKey(layer, 1)
 	if _, ok := c.checkpointReserved[key]; ok {
 		return
@@ -487,7 +550,7 @@ func (c *HybridCache) reserveCheckpointDelta(layer int) {
 		store := c.checkpointStore(slot)
 		for i := range store.entries {
 			entry := &store.entries[i]
-			_ = c.ensureCheckpointDelta(layer, entry)
+			_ = c.ensureCheckpointRecurrent(layer, entry)
 		}
 	}
 	c.checkpointReserved[key] = struct{}{}
--- a/model/models/qwen3next/checkpoints_test.go
+++ b/model/models/qwen3next/checkpoints_test.go
@@ -1,40 +1,16 @@
-package qwen3next
+package kvcache

 import (
 	"errors"
 	"math"
-	"os"
+	"slices"
 	"testing"

-	"github.com/ollama/ollama/fs/ggml"
-	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 )

-func newTestBackend(tb testing.TB) ml.Backend {
-	tb.Helper()
-
-	f, err := os.CreateTemp(tb.TempDir(), "*.gguf")
-	if err != nil {
-		tb.Fatal(err)
-	}
-	if err := ggml.WriteGGUF(f, ggml.KV{"general.architecture": "test"}, nil); err != nil {
-		_ = f.Close()
-		tb.Fatal(err)
-	}
-	if err := f.Close(); err != nil {
-		tb.Fatal(err)
-	}
-
-	b, err := ml.NewBackend(f.Name(), ml.BackendParams{AllocMemory: true})
-	if err != nil {
-		tb.Fatal(err)
-	}
-	tb.Cleanup(func() {
-		b.Close()
-	})
-
-	return b
+func newTestCache() *Recurrent {
+	return NewRecurrentCache(RecurrentConfig{ConvDim: 1, ConvChannels: 2, RecurrentStateSize: 2})
 }

 func TestSlotCheckpointStoreBestIndex(t *testing.T) {
@@ -59,8 +35,8 @@ func TestSlotCheckpointStoreBestIndex(t *testing.T) {
 	}
 }

-func TestHybridCachePrepareRestore(t *testing.T) {
-	cache := NewHybridCache(nil, 1, 1, 1)
+func TestCachePrepareRestore(t *testing.T) {
+	cache := newTestCache()
 	cache.checkpointCount = 3
 	cache.checkpoints = make(map[int]*slotCheckpointStore)
 	cache.pendingRestore = make(map[int]checkpointRestore)
@@ -110,45 +86,8 @@ func TestSlotCheckpointStorePruneAfter(t *testing.T) {
 	}
 }

-func TestHybridCacheRestoreDetachesSharedSlot(t *testing.T) {
-	backend := newTestBackend(t)
-
-	cache := NewHybridCache(nil, 1, 2, 2)
-	cache.Init(backend, ml.DTypeF16, 2, 8, 2)
-
-	cache.slotForSeq[1] = 0
-	cache.slotForSeq[2] = 0
-	cache.refCount[0] = 2
-	cache.refCount[1] = 0
-	cache.freeSlots = []int{1}
-
-	store := cache.checkpointStore(0)
-	idx := store.record(9)
-	cache.pendingRestore[1] = checkpointRestore{slot: 0, idx: idx, pos: 9}
-
-	if err := cache.Remove(1, 10, math.MaxInt32); err != nil {
-		t.Fatalf("Remove failed: %v", err)
-	}
-
-	if cache.slotForSeq[1] == cache.slotForSeq[2] {
-		t.Fatalf("expected restore to detach shared slot, got same slot %d", cache.slotForSeq[1])
-	}
-	if cache.slotForSeq[1] != 1 {
-		t.Fatalf("expected seq 1 to move to slot 1, got %d", cache.slotForSeq[1])
-	}
-	if cache.slotForSeq[2] != 0 {
-		t.Fatalf("expected seq 2 to remain on slot 0, got %d", cache.slotForSeq[2])
-	}
-	if cache.refCount[0] != 1 || cache.refCount[1] != 1 {
-		t.Fatalf("unexpected refCounts: slot0=%d slot1=%d", cache.refCount[0], cache.refCount[1])
-	}
-	if _, ok := cache.pendingRestore[1]; ok {
-		t.Fatalf("expected pending restore to be cleared")
-	}
-}
-
-func TestHybridCacheRestoreRejectsIncompleteCheckpoint(t *testing.T) {
-	cache := NewHybridCache(nil, 1, 2, 2)
+func TestCacheRestoreRejectsIncompleteCheckpoint(t *testing.T) {
+	cache := newTestCache()
 	cache.checkpointCount = 3
 	cache.checkpoints = make(map[int]*slotCheckpointStore)
 	cache.pendingRestore = make(map[int]checkpointRestore)
@@ -157,27 +96,26 @@ func TestHybridCacheRestoreRejectsIncompleteCheckpoint(t *testing.T) {
 	cache.refCount = []int{1}
 	cache.freeSlots = nil

-	// Simulate that layer 0 has both conv and delta state (so entryComplete expects both)
-	cache.convStates[0] = nil  // placeholder to indicate layer 0 exists
-	cache.deltaStates[0] = nil // placeholder to indicate layer 0 exists
+	// Simulate layer 0 requires both conv and recurrent checkpoints.
+	cache.convStates[0] = nil
+	cache.recurrentStates[0] = nil

 	store := cache.checkpointStore(0)
 	idx := store.record(9)
 	entry := &store.entries[idx]
-	// Only set conv checkpoint, not delta - making it incomplete
 	entry.conv = map[int]ml.Tensor{0: nil}
-	// entry.delta is not set, so checkpoint is incomplete
+	// entry.recurrent intentionally missing

 	cache.pendingRestore[1] = checkpointRestore{slot: 0, idx: idx, pos: 9}

 	err := cache.Remove(1, 10, math.MaxInt32)
-	if !errors.Is(err, kvcache.ErrNotSupported) {
+	if !errors.Is(err, ErrNotSupported) {
 		t.Fatalf("expected ErrNotSupported for incomplete checkpoint, got %v", err)
 	}
 }

-func TestHybridCacheRestoreAcceptsCompleteCheckpoint(t *testing.T) {
-	cache := NewHybridCache(nil, 1, 2, 2)
+func TestCacheRestoreAcceptsCompleteCheckpoint(t *testing.T) {
+	cache := newTestCache()
 	cache.checkpointCount = 3
 	cache.checkpoints = make(map[int]*slotCheckpointStore)
 	cache.pendingRestore = make(map[int]checkpointRestore)
@@ -186,55 +124,111 @@ func TestHybridCacheRestoreAcceptsCompleteCheckpoint(t *testing.T) {
 	cache.refCount = []int{1}
 	cache.freeSlots = nil

-	// Don't set convStates/deltaStates - with no layers to check,
-	// entryComplete will return true as long as entry.pos >= 0
-
 	store := cache.checkpointStore(0)
 	idx := store.record(9)

 	cache.pendingRestore[1] = checkpointRestore{slot: 0, idx: idx, pos: 9}

-	// Test that restoreComplete returns true when no layers need checkpoints
 	restore := cache.pendingRestore[1]
 	if !cache.restoreComplete(restore) {
 		t.Fatalf("expected restoreComplete to return true for complete checkpoint")
 	}
 }

+func TestCacheRecurrentStateShapeValidation(t *testing.T) {
+	cache := newTestCache()
+	_, err := cache.RecurrentState(nil, 0, 3)
+	if !errors.Is(err, ErrInvalidRecurrentShape) {
+		t.Fatalf("expected ErrInvalidRecurrentShape, got %v", err)
+	}
+}
+
+func TestSlotCheckpointStoreShiftRange(t *testing.T) {
+	store := newSlotCheckpointStore(5)
+	store.record(1)
+	store.record(4)
+	store.record(7)
+	store.record(10)
+
+	store.shiftRange(2, 6)
+
+	var positions []int32
+	for i := range store.entries {
+		if store.entries[i].pos >= 0 {
+			positions = append(positions, store.entries[i].pos)
+		}
+	}
+	slices.Sort(positions)
+
+	want := []int32{1, 3, 6}
+	if !slices.Equal(positions, want) {
+		t.Fatalf("unexpected shifted positions: got=%v want=%v", positions, want)
+	}
+	if store.lastPos != 6 {
+		t.Fatalf("expected lastPos 6, got %d", store.lastPos)
+	}
+}
+
+func TestCacheRemoveMiddleShiftsCheckpoints(t *testing.T) {
+	cache := newTestCache()
+	cache.slotForSeq[1] = 0
+	cache.refCount = []int{1}
+	cache.pendingRestore[1] = checkpointRestore{slot: 0, idx: 0, pos: 1}
+
+	store := cache.checkpointStore(0)
+	store.record(1)
+	store.record(4)
+	store.record(7)
+	store.record(10)
+
+	if err := cache.Remove(1, 2, 6); err != nil {
+		t.Fatalf("expected middle remove to succeed, got %v", err)
+	}
+
+	if _, ok := cache.pendingRestore[1]; ok {
+		t.Fatalf("expected pending restore to be cleared after middle remove")
+	}
+
+	var positions []int32
+	for i := range store.entries {
+		if store.entries[i].pos >= 0 {
+			positions = append(positions, store.entries[i].pos)
+		}
+	}
+	slices.Sort(positions)
+
+	want := []int32{1, 3, 6}
+	if !slices.Equal(positions, want) {
+		t.Fatalf("unexpected checkpoint positions after remove: got=%v want=%v", positions, want)
+	}
+}
+
 func TestSlotCheckpointStoreRingBufferWrapAround(t *testing.T) {
-	// Test that ring buffer wrap-around reuses entries without clearing maps.
 	store := newSlotCheckpointStore(3)

-	// Fill the buffer
 	store.record(10)
 	store.record(20)
 	store.record(30)

-	// Create fake tensor data in the first entry's maps
 	store.entries[0].conv = make(map[int]ml.Tensor)
-	store.entries[0].conv[0] = nil // Simulated tensor reference
-	store.entries[0].delta = make(map[int]ml.Tensor)
-	store.entries[0].delta[0] = nil // Simulated tensor reference
+	store.entries[0].conv[0] = nil
+	store.entries[0].recurrent = make(map[int]ml.Tensor)
+	store.entries[0].recurrent[0] = nil

-	// Record another entry, which should wrap around and overwrite entry 0
 	store.record(40)

-	// Verify the maps are still present (we reuse tensors)
 	if store.entries[0].conv == nil {
 		t.Fatalf("expected conv map to be preserved on reuse")
 	}
-	if store.entries[0].delta == nil {
-		t.Fatalf("expected delta map to be preserved on reuse")
+	if store.entries[0].recurrent == nil {
+		t.Fatalf("expected recurrent map to be preserved on reuse")
 	}
-
-	// Verify the new position was recorded
 	if store.entries[0].pos != 40 {
 		t.Fatalf("expected entry 0 pos to be 40, got %d", store.entries[0].pos)
 	}
 }

 func TestSlotCheckpointStoreFullCapacity(t *testing.T) {
-	// Test behavior when buffer is exactly at capacity
 	store := newSlotCheckpointStore(2)

 	idx1 := store.record(10)
@@ -243,12 +237,10 @@ func TestSlotCheckpointStoreFullCapacity(t *testing.T) {
 	if idx1 != 0 || idx2 != 1 {
 		t.Fatalf("expected indices 0, 1, got %d, %d", idx1, idx2)
 	}
-
 	if store.size != 2 {
 		t.Fatalf("expected size 2, got %d", store.size)
 	}

-	// Verify both checkpoints are accessible
 	_, pos1, ok1 := store.bestIndex(15)
 	_, pos2, ok2 := store.bestIndex(25)

@@ -261,7 +253,6 @@ func TestSlotCheckpointStoreFullCapacity(t *testing.T) {
 }

 func TestSlotCheckpointStoreEmptyBuffer(t *testing.T) {
-	// Test behavior with zero-size buffer
 	store := newSlotCheckpointStore(0)

 	idx := store.record(10)
@@ -276,19 +267,16 @@ func TestSlotCheckpointStoreEmptyBuffer(t *testing.T) {
 }

 func TestSlotCheckpointStorePruneAfterAll(t *testing.T) {
-	// Test pruning that removes all checkpoints
 	store := newSlotCheckpointStore(3)
 	store.record(10)
 	store.record(20)
 	store.record(30)

-	// Prune everything by setting threshold below all positions
 	store.pruneAfter(5)

 	if store.size != 0 {
 		t.Fatalf("expected size 0 after pruning all, got %d", store.size)
 	}
-	// When all checkpoints are pruned, lastPos is reset to -1
 	if store.lastPos != -1 {
 		t.Fatalf("expected lastPos -1 after pruning all, got %d", store.lastPos)
 	}
--- a/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch
+++ b/llama/patches/0034-ggml-metal-guard-mul_mat_id-map0-and-add-ne20-22-spe.patch
@@ -0,0 +1,37 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: jmorganca <jmorganca@gmail.com>
+Date: Sun, 22 Feb 2026 14:12:30 -0800
+Subject: [PATCH] ggml-metal: guard mul_mat_id map0 and add ne20=22
+ specialization
+
+---
+ ggml/src/ggml-metal/ggml-metal-ops.cpp | 3 ++-
+ ggml/src/ggml-metal/ggml-metal.metal   | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp
+index 4ac135603..ac5ad53db 100644
+--- a/ggml/src/ggml-metal/ggml-metal-ops.cpp
+++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp
+@@ -1961,7 +1961,8 @@ int ggml_metal_op_mul_mat_id(ggml_metal_op_t ctx, int idx) {
+     // ne21 = n_rows (batch size)
+     const int ne21_mm_id_min = 32;
+ 
+-    if (props_dev->has_simdgroup_mm && ne00 >= 64 && (ne21 >= ne21_mm_id_min)) {
+    if (props_dev->has_simdgroup_mm && ne00 >= 64 && (ne21 >= ne21_mm_id_min) &&
+            (ne20 == 1 || ne20 == 2 || ne20 == 4 || ne20 == 6 || ne20 == 8 || ne20 == 10 || ne20 == 16 || ne20 == 22)) {
+         // some Metal matrix data types require aligned pointers
+         // ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5)
+         //switch (op->src[0]->type) {
+diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
+index c37447a10..4f338aa13 100644
+--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
+@@ -9427,6 +9427,7 @@ template [[host_name("kernel_mul_mm_id_map0_ne20_6" )]] kernel kernel_mul_mm_id_
+ template [[host_name("kernel_mul_mm_id_map0_ne20_8" )]] kernel kernel_mul_mm_id_map0_t kernel_mul_mm_id_map0<8>;
+ template [[host_name("kernel_mul_mm_id_map0_ne20_10")]] kernel kernel_mul_mm_id_map0_t kernel_mul_mm_id_map0<10>;
+ template [[host_name("kernel_mul_mm_id_map0_ne20_16")]] kernel kernel_mul_mm_id_map0_t kernel_mul_mm_id_map0<16>;
+template [[host_name("kernel_mul_mm_id_map0_ne20_22")]] kernel kernel_mul_mm_id_map0_t kernel_mul_mm_id_map0<22>;
+ 
+ template<typename S0, typename S0_4x4, typename S0_8x8, typename S1, typename S1_2x4, typename S1_8x8, typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread S0_4x4 &), typename T0, typename T0_4x4, typename T1, typename T1_2x4>
+ kernel void kernel_mul_mm_id(
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Eva H	d69ddc1edc	fix: window app crash on startup when update is pending (#14451 )	2026-02-26 16:47:12 -05:00
Eva H	9bf41969f0	app: fix first update check delayed by 1 hour (#14427 )	2026-02-25 18:29:55 -05:00
Jesse Gross	0f23b7bff5	mlxrunner: Cancel in-flight requests when the client disconnects Currently, a canceled request can result in computation continuing in the background to completion. It can also trigger a deadlock when there is nobody to read the output tokens and the pipeline cannot continue to the next request.	2026-02-25 14:00:42 -08:00
Jesse Gross	4e57d2094e	mlxrunner: Simplify pipeline memory and cache management Particularly in error cases, it can be difficult to ensure that all pinned memory is unpinned, MLX buffers are released and cache state is consistent. This encapsulates those pieces and sets up proper deferrals so that this happens automatically on exit.	2026-02-25 14:00:42 -08:00
Jeffrey Morgan	7f9efd53df	model: add support for qwen3.5-27b model (#14415 )	2026-02-25 01:09:58 -08:00
Jeffrey Morgan	da70c3222e	model: support for qwen3.5 architecture (#14378 )	2026-02-24 20:08:05 -08:00
Bruce MacDonald	9d902d63ce	ggml: ensure tensor size is valid (#14406 ) When quantizing tensors during model creation validate that the resulting sizes match what is expected based on the shape.	2026-02-24 21:52:44 -04:00
Daniel Hiltgen	f4f0a4a471	update mlx-c bindings to 0.5.0 (#14380 ) * chore: update mlx-c bindings to 0.5.0 (#14303) * linux: use gcc 11 --------- Co-authored-by: Patrick Devine <patrick@infrahq.com>	2026-02-23 16:44:29 -08:00
Eva H	3323c1d319	app: add upgrade configuration to settings page (#13512 )	2026-02-23 18:08:52 -05:00
Jesse Gross	f20dc6b698	mlx: don't default to affine quantization for unquantized models Otherwise the BF16 version of models trigger segfaults when they call into quantized kernels.	2026-02-23 15:03:53 -08:00
Jeffrey Morgan	4b2ac1f369	model: improvements to LFM architectures (#14368 )	2026-02-23 14:38:10 -08:00
Jesse Gross	8daf47fb3a	mlxrunner: Fix duplicate log prefixes and reduce log noise Pass subprocess stdout/stderr through to the parent's stderr directly instead of re-wrapping each line with slog. The subprocess already writes structured slog output, so the re-wrapping produced nested timestamps, levels, and message fields that were hard to read. Also downgrade verbose KV cache debug logs to trace level.	2026-02-23 14:09:20 -08:00
Eva H	6c980579cd	ui: use capability-based detection for web search (#14336 )	2026-02-23 15:00:09 -05:00
Jesse Gross	5c73c4e2ee	mlxrunner: Simplify KV cache to single-entry prefix matching The KV cache previously used a tree structure which could store multiple divergent sequences, which is good for cache reuse. However, this is typically used in conjunction with paged attention so each node in the tree can store just a chunk of the KV cache and they can be stitched together later. We don't currently do this, so the cache was storing copies of the full cache for each past sequence. This redundancy plus the lack of resource limits, caused significant memory use as a conversation grew. Instead, this changes to store a single entry for the cache, which can be prefix matched. Although it is less ideal for multiple users, it largely matches Ollama's current behavior. It can be improved as additional pieces are fleshed out.	2026-02-23 09:50:07 -08:00
Jesse Gross	5daf59cc66	mlxrunner: Fix memory leaks with pin/sweep lifecycle management The previous approach tracked array lifecycles through reference counting, where each array recorded its inputs and a reference count that was decremented as dependents were freed. This is not really necessary as MLX tracks references internally. It is also error prone as it is easy to create new arrays and forget to free them when the Go variable goes out of scope. Instead, we can pin just the arrays we want (typically outputs and specific intermediates, like the cache). All other arrays are freed by default when we run sweep. This avoids most causes of memory leaks while still giving the freedom to save what we want.	2026-02-23 09:50:07 -08:00
Jeffrey Morgan	0ade9205cc	models: add nemotronh architecture support (#14356 )	2026-02-22 15:09:14 -08:00
Parth Sareen	06edabdde1	cmd/config: install web search plugin to user-level extensions dir (#14362 )	2026-02-22 02:17:03 -08:00
Jeffrey Morgan	8b4e5a82a8	mlx: remove noisy error output from dynamic library loading (#14346 ) The recent change in #14322 added tryLoadByName() which attempts to load libmlxc.dylib via rpath before searching directories. This is an optimization for Homebrew installations where rpath is correctly set. However, when rpath isn't set (which is the common case for app bundle installations), dlopen fails and the CHECK macro prints an error to stderr: ERROR - dynamic.c:21 - CHECK failed: handle->ctx != NULL This error is misleading because it's an expected failure path - the code correctly falls back to searching the executable directory and loads the library successfully. The error message causes user confusion and makes it appear that something is broken. Replace the CHECK macro with a simple return code so the C code fails silently. The Go code already handles error logging appropriately: tryLoadByName() fails silently (intentional fallback), while tryLoadFromDir() logs via slog.Error() when explicit path loading fails.	2026-02-20 23:46:07 -08:00
Parth Sareen	3445223311	cmd: openclaw onboarding (#14344 )	2026-02-20 19:08:38 -08:00
Jeffrey Morgan	fa6c0127e6	app: expose server's default context length to UI (#14037 ) Parse the default_num_ctx from the server's "vram-based default context" log line and expose it through the inference compute API. This eliminates duplicate VRAM tier calculation logic in the frontend. - Add InferenceInfo struct with Computes and DefaultContextLength - Rename GetInferenceComputer to GetInferenceInfo - Handle missing default context line gracefully (older servers) - Add DefaultContextLength to InferenceComputeResponse - Update Settings UI to use server's default, disable slider while loading - Add disabled prop to Slider component (grays out + hides handle) - Migrate existing users with context_length=4096 to 0 (auto mode)	2026-02-20 18:56:30 -08:00
Patrick Devine	97323d1c68	consolidate the tokenizer (#14327 ) This change adds a new x/tokenizer package which includes: * New BPE and SentencePiece tokenizers * Removing the dependency on the imagegen tokenizers * Fixes to multibyte decoding in the pipeline * Various correctness and benchmark tests Not included in this PR is the WordPiece tokenizer for BERT models which will be added when we add embedding models. The imagegen tokenizers will also be removed in a follow-up PR.	2026-02-19 15:55:45 -08:00
natl-set	458dd1b9d9	mlx: try loading library via rpath before searching directories (#14322 ) The existing code manually searches directories for libmlxc.* and passes full paths to dlopen, bypassing the binary's rpath. This means MLX libraries installed via package managers (e.g., Homebrew) aren't found even when rpath is correctly set at link time. This change adds a fallback that tries loading via rpath first (using just the library name), before falling back to the existing directory search. This follows standard Unix/macOS conventions and works with any installation that sets rpath. Fixes library loading on macOS with Homebrew-installed mlx-c without requiring OLLAMA_LIBRARY_PATH environment variable. Co-authored-by: Natl <nat@MacBook-Pro.local>	2026-02-19 10:55:02 -08:00
Bruce MacDonald	9d02d1d767	install: prevent partial download script execution (#14311 ) Wrap script in main function so that a truncated partial download doesn't end up executing half a script.	2026-02-18 18:32:45 -08:00
Bruce MacDonald	1a636fb47a	cmd: set codex env vars on launch and handle zstd request bodies (#14122 ) The Codex runner was not setting OPENAI_BASE_URL or OPENAI_API_KEY, this prevents Codex from sending requests to api.openai.com instead of the local Ollama server. This mirrors the approach used by the Claude runner. Codex v0.98.0 sends zstd-compressed request bodies to the /v1/responses endpoint. Add decompression support in ResponsesMiddleware with an 8MB max decompressed size limit to prevent resource exhaustion.	2026-02-18 17:19:36 -08:00
Patrick Devine	0759fface9	Revert "chore: update mlx-c bindings to 0.5.0 (#14303 )" (#14316 ) This reverts commit `f01a9a7859`.	2026-02-18 17:01:25 -08:00
Parth Sareen	325b72bc31	cmd/tui: default to single-select for editor integrations (#14302 )	2026-02-17 18:17:27 -08:00
Patrick Devine	f01a9a7859	chore: update mlx-c bindings to 0.5.0 (#14303 )	2026-02-17 16:48:16 -08:00
Patrick Devine	9aefd2dfee	model: add qwen3 support to mlxrunner (#14293 )	2026-02-17 13:58:49 -08:00
Patrick Devine	d07e4a1dd3	bugfix: better mlx model scheduling (#14290 ) This fixes a bug with current MLX based models which don't get loaded/unloaded correctly. The first model currently gets loaded and then subsequent model starts get shunted to the first runner which results in the wrong model being run.	2026-02-17 13:57:05 -08:00
Parth Sareen	8a257ec00a	docs: make integrations more discoverable (#14301 ) * docs: add Pi integration page * docs: flatten integration sidebar with expanded subheadings * docs: add OpenClaw and Claude Code to quickstart	2026-02-17 13:27:25 -08:00
Parth Sareen	2f4de1acf7	cmd: ollama launch always show model picker (#14299 )	2026-02-17 12:02:14 -08:00
Parth Sareen	ec95c45f70	cmd/config: ollama launch cline CLI (#14294 )	2026-02-17 11:37:53 -08:00
Patrick Devine	3a88f7eb20	bugfix: add missing linear layer factory (#14289 )	2026-02-16 17:22:20 -08:00
Patrick Devine	0d5da826d4	bugfix: display the parameter count correctly in mlx for ollama show (#14285 )	2026-02-16 13:03:34 -08:00
Patrick Devine	9b795698b8	model: add llama3 architecture to mlxrunner (#14277 )	2026-02-15 23:06:28 -08:00
Patrick Devine	041fb77639	model: add gemma3 to the mlxrunner (#14276 ) This change adds the gemma3 model to the mlxrunner and simplifies some of the quantization code for loading weights.	2026-02-15 22:47:59 -08:00
Saumil Shah	8224cce583	readme: update download link for macOS (#1 ) (#14271 )	2026-02-15 15:25:15 -08:00
Patrick Devine	d18dcd7775	mlxrunner fixes (#14247 ) * load glm4_moe_lite from the mlxrunner * fix loading diffusion models * remove log lines * fix --imagegen flag	2026-02-13 22:30:42 -08:00
Parth Sareen	5f5ef20131	anthropic: enable websearch (#14246 )	2026-02-13 19:20:46 -08:00
Parth Sareen	f0a07a353b	cmd/tui: fix powershell search (#14242 )	2026-02-13 15:53:11 -08:00
Devon Rifkin	948de6bbd2	add ability to disable cloud (#14221 ) * add ability to disable cloud Users can now easily opt-out of cloud inference and web search by setting ``` "disable_ollama_cloud": true ``` in their `~/.ollama/server.json` settings file. After a setting update, the server must be restarted. Alternatively, setting the environment variable `OLLAMA_NO_CLOUD=1` will also disable cloud features. While users previously were able to avoid cloud models by not pulling or `ollama run`ing them, this gives them an easy way to enforce that decision. Any attempt to run a cloud model when cloud is disabled will fail. The app's old "airplane mode" setting, which did a similar thing for hiding cloud models within the app is now unified with this new cloud disabled mode. That setting has been replaced with a "Cloud" toggle, which behind the scenes edits `server.json` and then restarts the server. * gate cloud models across TUI and launch flows when cloud is disabled Block cloud models from being selected, launched, or written to integration configs when cloud mode is turned off: - TUI main menu: open model picker instead of launching with a disabled cloud model - cmd.go: add IsCloudModelDisabled checks for all Selection* paths - LaunchCmd: filter cloud models from saved Editor configs before launch, fall through to picker if none remain - Editor Run() methods (droid, opencode, openclaw): filter cloud models before calling Edit() and persist the cleaned list - Export SaveIntegration, remove SaveIntegrationModel wrapper that was accumulating models instead of replacing them * rename saveIntegration to SaveIntegration in config.go and tests * cmd/config: add --model guarding and empty model list fixes * Update docs/faq.mdx Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * Update internal/cloud/policy.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * Update internal/cloud/policy.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * Update server/routes.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * Revert "Update internal/cloud/policy.go" This reverts commit `8bff8615f9`. Since this error shows up in other integrations, we want it to be prefixed with Ollama * rename cloud status * more status renaming * fix tests that weren't updated after rename --------- Co-authored-by: ParthSareen <parth.sareen@ollama.com> Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>	2026-02-12 15:47:00 -08:00