uip

api: expose usage data
x: agent loop ux improvements (#13635 )
2026-01-21 13:59:37 -05:00 · 2026-01-16 00:24:07 -08:00 · 2026-01-07 01:27:15 -08:00 · 2026-01-06 18:33:57 -08:00 · 2026-01-05 23:38:40 -08:00 · 2026-01-05 22:37:20 -08:00
616 changed files with 65734 additions and 32644 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
 ml/backend/**/*.glsl linguist-vendored
 ml/backend/**/CMakeLists.txt linguist-vendored

+app/webview linguist-vendored
+
 llama/build-info.cpp linguist-generated
 ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -16,13 +16,15 @@ jobs:
    outputs:
      GOFLAGS: ${{ steps.goflags.outputs.GOFLAGS }}
      VERSION: ${{ steps.goflags.outputs.VERSION }}
+      vendorsha: ${{ steps.changes.outputs.vendorsha }}
    steps:
      - uses: actions/checkout@v4
      - name: Set environment
        id: goflags
        run: |
-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_OUTPUT
-          echo VERSION="${GITHUB_REF_NAME#v}" >>$GITHUB_OUTPUT
+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" | tee -a $GITHUB_OUTPUT
+          echo VERSION="${GITHUB_REF_NAME#v}" | tee -a $GITHUB_OUTPUT
+          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT

  darwin-build:
    runs-on: macos-14-xlarge
@@ -53,6 +55,9 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
      - run: |
          ./scripts/build_darwin.sh
      - name: Log build results
@@ -185,7 +190,7 @@ jobs:
      - uses: actions/cache@v4
        with:
          path: ${{ github.workspace }}\.ccache
-          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
+          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}-${{ needs.setup-environment.outputs.vendorsha }}
      - name: Build target "${{ matrix.preset }}"
        run: |
          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
@@ -249,6 +254,9 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
      - name: Verify gcc is actually clang
        run: |
          $ErrorActionPreference='Continue'
@@ -302,6 +310,9 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
      - uses: actions/download-artifact@v4
        with:
          pattern: depends-windows*
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -22,6 +22,7 @@ jobs:
    runs-on: ubuntu-latest
    outputs:
      changed: ${{ steps.changes.outputs.changed }}
+      vendorsha: ${{ steps.changes.outputs.vendorsha }}
    steps:
      - uses: actions/checkout@v4
        with:
@@ -37,6 +38,7 @@ jobs:
          }

          echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
+          echo vendorsha=$(make -f Makefile.sync print-base) | tee -a $GITHUB_OUTPUT

  linux:
    needs: [changes]
@@ -83,7 +85,7 @@ jobs:
      - uses: actions/cache@v4
        with:
          path: /github/home/.cache/ccache
-          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
+          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
      - run: |
          cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
          cmake --build --preset ${{ matrix.preset }} --parallel
@@ -178,7 +180,7 @@ jobs:
      - uses: actions/cache@v4
        with:
          path: ${{ github.workspace }}\.ccache
-          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
+          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
      - run: |
          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
@@ -206,6 +208,9 @@ jobs:
      - uses: actions/setup-go@v5
        with:
          go-version-file: 'go.mod'
+          cache-dependency-path: |
+            go.sum
+            Makefile.sync
      - uses: actions/setup-node@v4
        with:
          node-version: '20'
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -1,81 +1,51 @@
 version: "2"
 linters:
-  default: none
  enable:
    - asasalint
    - bidichk
    - bodyclose
    - containedctx
-    - copyloopvar
-    - errcheck
-    - errorlint
-    - exptostd
    - gocheckcompilerdirectives
-    - gocritic
-    - govet
-    - ineffassign
    - intrange
    - makezero
    - misspell
-    - modernize
    - nilerr
-    - nilnil
    - nolintlint
    - nosprintfhostport
-    - perfsprint
-    - prealloc
-    - sloglint
-    - staticcheck
    - unconvert
-    - unused
-    - usestdlibvars
    - usetesting
    - wastedassign
    - whitespace
+  disable:
+    - errcheck
+    - usestdlibvars
  settings:
-    errcheck:
-      exclude-functions:
-        - fmt.Fprintf
-    gocritic:
-      disabled-checks:
-        # Detects suspicious duplicated sub-expressions.
-        # Prone to false positives when used on cgo code
-        # https://github.com/go-critic/go-critic/issues/897#issuecomment-568892104
-        - dupSubExpr
-    perfsprint:
-      strconcat: false
-      concat-loop: false
+    govet:
+      disable:
+        - unusedresult
    staticcheck:
      checks:
        - all
-        # Using a deprecated function, variable, constant or field.
-        # https://staticcheck.dev/docs/checks/#SA1019
+        - -QF* # disable quick fix suggestions
        - -SA1019
-        # Poorly chosen identifier.
-        # https://staticcheck.dev/docs/checks/#ST1003
-        - -ST1003
-    usestdlibvars:
-      http-method: false
-      http-status-code: false
-  exclusions:
-    presets:
-      - comments
-      - common-false-positives
-      - legacy
-      - std-error-handling
-    rules:
-      - path: _test\.go
-        linters:
-          - prealloc
-
+        - -ST1000 # package comment format
+        - -ST1003 # underscores in package names
+        - -ST1005 # error strings should not be capitalized
+        - -ST1012 # error var naming (ErrFoo)
+        - -ST1016 # receiver name consistency
+        - -ST1020 # comment on exported function format
+        - -ST1021 # comment on exported type format
+        - -ST1022 # comment on exported var format
+        - -ST1023 # omit type from declaration
+severity:
+  default: error
+  rules:
+    - linters:
+        - gofmt
+        - goimports
+        - intrange
+      severity: info
 formatters:
  enable:
-    - gci
    - gofmt
    - gofumpt
-  settings:
-    gci:
-      sections:
-        - standard
-        - default
-        - localmodule
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,13 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cp

 add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)

+# Define GGML version variables for shared library SOVERSION
+# These are required by ggml/src/CMakeLists.txt for proper library versioning
+set(GGML_VERSION_MAJOR 0)
+set(GGML_VERSION_MINOR 0)
+set(GGML_VERSION_PATCH 0)
+set(GGML_VERSION "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
+
 set(GGML_CPU ON)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
 set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
--- a/Makefile.sync
+++ b/Makefile.sync
@@ -1,6 +1,6 @@
 UPSTREAM=https://github.com/ggml-org/llama.cpp.git
 WORKDIR=llama/vendor
-FETCH_HEAD=3cfa9c3f125763305b4226bc032f1954f08990dc
+FETCH_HEAD=ec98e2002

 .PHONY: help
 help:
@@ -57,7 +57,7 @@ checkout: $(WORKDIR)
 $(WORKDIR):
 	git clone $(UPSTREAM) $(WORKDIR)

-.PHONE: format-patches
+.PHONY: format-patches
 format-patches: llama/patches
 	git -C $(WORKDIR) format-patch \
 		--no-signature \
@@ -66,7 +66,11 @@ format-patches: llama/patches
 		-o $(realpath $<) \
 		$(FETCH_HEAD)

-.PHONE: clean
+.PHONY: clean
 clean: checkout
 	@git -C $(WORKDIR) am --abort || true
 	$(RM) llama/patches/.*.patched
+
+.PHONY: print-base
+print-base:
+	@echo $(FETCH_HEAD)
--- a/README.md
+++ b/README.md
@@ -555,7 +555,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Parakeet](https://github.com/parakeet-nest/parakeet) is a GoLang library, made to simplify the development of small generative AI applications with Ollama.
 - [Haverscript](https://github.com/andygill/haverscript) with [examples](https://github.com/andygill/haverscript/tree/main/examples)
 - [Ollama for Swift](https://github.com/mattt/ollama-swift)
- [Swollama for Swift](https://github.com/marcusziade/Swollama) with [DocC](https://marcusziade.github.io/Swollama/documentation/swollama/)
+- [Swollama for Swift](https://github.com/guitaripod/Swollama) with [DocC](https://guitaripod.github.io/Swollama/documentation/swollama)
 - [GoLamify](https://github.com/prasad89/golamify)
 - [Ollama for Haskell](https://github.com/tusharad/ollama-haskell)
 - [multi-llm-ts](https://github.com/nbonamy/multi-llm-ts) (A Typescript/JavaScript library allowing access to different LLM in a unified API)
--- a/api/client.go
+++ b/api/client.go
@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f

 		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
-			return fmt.Errorf("unmarshal: %w", err)
+			if response.StatusCode >= http.StatusBadRequest {
+				return StatusError{
+					StatusCode:   response.StatusCode,
+					Status:       response.Status,
+					ErrorMessage: string(bts),
+				}
+			}
+			return errors.New(string(bts))
 		}

 		if response.StatusCode == http.StatusUnauthorized {
@@ -340,7 +347,7 @@ type CreateProgressFunc func(ProgressResponse) error
 // Create creates a model from a [Modelfile]. fn is a progress function that
 // behaves similarly to other methods (see [Client.Pull]).
 //
-// [Modelfile]: https://github.com/ollama/ollama/blob/main/docs/modelfile.md
+// [Modelfile]: https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx
 func (c *Client) Create(ctx context.Context, req *CreateRequest, fn CreateProgressFunc) error {
 	return c.stream(ctx, http.MethodPost, "/api/create", req, func(bts []byte) error {
 		var resp ProgressResponse
@@ -370,6 +377,15 @@ func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
 	return &lr, nil
 }

+// Usage returns usage statistics and system info.
+func (c *Client) Usage(ctx context.Context) (*UsageResponse, error) {
+	var ur UsageResponse
+	if err := c.do(ctx, http.MethodGet, "/api/usage", nil, &ur); err != nil {
+		return nil, err
+	}
+	return &ur, nil
+}
+
 // Copy copies a model - creating a model with another name from an existing
 // model.
 func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -2,7 +2,6 @@ package api

 import (
 	"encoding/json"
-	"errors"
 	"fmt"
 	"net/http"
 	"net/http/httptest"
@@ -40,7 +39,7 @@ func TestClientFromEnvironment(t *testing.T) {
 			t.Setenv("OLLAMA_HOST", v.value)

 			client, err := ClientFromEnvironment()
-			if !errors.Is(err, v.err) {
+			if err != v.err {
 				t.Fatalf("expected %s, got %s", v.err, err)
 			}

@@ -56,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
 type testError struct {
 	message    string
 	statusCode int
+	raw        bool // if true, write message as-is instead of JSON encoding
 }

 func (e testError) Error() string {
@@ -112,6 +112,20 @@ func TestClientStream(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "plain text error response",
+			responses: []any{
+				"internal server error",
+			},
+			wantErr: "internal server error",
+		},
+		{
+			name: "HTML error page",
+			responses: []any{
+				"<html><body>404 Not Found</body></html>",
+			},
+			wantErr: "404 Not Found",
+		},
 	}

 	for _, tc := range testCases {
@@ -136,6 +150,12 @@ func TestClientStream(t *testing.T) {
 						return
 					}

+					if str, ok := resp.(string); ok {
+						fmt.Fprintln(w, str)
+						flusher.Flush()
+						continue
+					}
+
 					if err := json.NewEncoder(w).Encode(resp); err != nil {
 						t.Fatalf("failed to encode response: %v", err)
 					}
@@ -174,9 +194,10 @@ func TestClientStream(t *testing.T) {

 func TestClientDo(t *testing.T) {
 	testCases := []struct {
-		name     string
-		response any
-		wantErr  string
+		name           string
+		response       any
+		wantErr        string
+		wantStatusCode int
 	}{
 		{
 			name: "immediate error response",
@@ -184,7 +205,8 @@ func TestClientDo(t *testing.T) {
 				message:    "test error message",
 				statusCode: http.StatusBadRequest,
 			},
-			wantErr: "test error message",
+			wantErr:        "test error message",
+			wantStatusCode: http.StatusBadRequest,
 		},
 		{
 			name: "server error response",
@@ -192,7 +214,8 @@ func TestClientDo(t *testing.T) {
 				message:    "internal error",
 				statusCode: http.StatusInternalServerError,
 			},
-			wantErr: "internal error",
+			wantErr:        "internal error",
+			wantStatusCode: http.StatusInternalServerError,
 		},
 		{
 			name: "successful response",
@@ -204,6 +227,26 @@ func TestClientDo(t *testing.T) {
 				Success: true,
 			},
 		},
+		{
+			name: "plain text error response",
+			response: testError{
+				message:    "internal server error",
+				statusCode: http.StatusInternalServerError,
+				raw:        true,
+			},
+			wantErr:        "internal server error",
+			wantStatusCode: http.StatusInternalServerError,
+		},
+		{
+			name: "HTML error page",
+			response: testError{
+				message:    "<html><body>404 Not Found</body></html>",
+				statusCode: http.StatusNotFound,
+				raw:        true,
+			},
+			wantErr:        "<html><body>404 Not Found</body></html>",
+			wantStatusCode: http.StatusNotFound,
+		},
 	}

 	for _, tc := range testCases {
@@ -211,11 +254,16 @@ func TestClientDo(t *testing.T) {
 			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				if errResp, ok := tc.response.(testError); ok {
 					w.WriteHeader(errResp.statusCode)
-					err := json.NewEncoder(w).Encode(map[string]string{
-						"error": errResp.message,
-					})
-					if err != nil {
-						t.Fatal("failed to encode error response:", err)
+					if !errResp.raw {
+						err := json.NewEncoder(w).Encode(map[string]string{
+							"error": errResp.message,
+						})
+						if err != nil {
+							t.Fatal("failed to encode error response:", err)
+						}
+					} else {
+						// Write raw message (simulates non-JSON error responses)
+						fmt.Fprint(w, errResp.message)
 					}
 					return
 				}
@@ -242,6 +290,15 @@ func TestClientDo(t *testing.T) {
 				if err.Error() != tc.wantErr {
 					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
 				}
+				if tc.wantStatusCode != 0 {
+					if statusErr, ok := err.(StatusError); ok {
+						if statusErr.StatusCode != tc.wantStatusCode {
+							t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
+						}
+					} else {
+						t.Errorf("expected StatusError, got %T", err)
+					}
+				}
 				return
 			}

--- a/api/examples/chat/main.go
+++ b/api/examples/chat/main.go
@@ -15,19 +15,19 @@ func main() {
 	}

 	messages := []api.Message{
-		api.Message{
+		{
 			Role:    "system",
 			Content: "Provide very brief, concise responses",
 		},
-		api.Message{
+		{
 			Role:    "user",
 			Content: "Name some unusual animals",
 		},
-		api.Message{
+		{
 			Role:    "assistant",
 			Content: "Monotreme, platypus, echidna",
 		},
-		api.Message{
+		{
 			Role:    "user",
 			Content: "which of these is the most dangerous?",
 		},
--- a/api/types.go
+++ b/api/types.go
@@ -2,8 +2,8 @@ package api

 import (
 	"encoding/json"
-	"errors"
 	"fmt"
+	"iter"
 	"log/slog"
 	"math"
 	"os"
@@ -15,6 +15,7 @@ import (
 	"github.com/google/uuid"

 	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/internal/orderedmap"
 	"github.com/ollama/ollama/types/model"
 )

@@ -228,13 +229,79 @@ type ToolCallFunction struct {
 	Arguments ToolCallFunctionArguments `json:"arguments"`
 }

-type ToolCallFunctionArguments map[string]any
+// ToolCallFunctionArguments holds tool call arguments in insertion order.
+type ToolCallFunctionArguments struct {
+	om *orderedmap.Map[string, any]
+}
+
+// NewToolCallFunctionArguments creates a new empty ToolCallFunctionArguments.
+func NewToolCallFunctionArguments() ToolCallFunctionArguments {
+	return ToolCallFunctionArguments{om: orderedmap.New[string, any]()}
+}
+
+// Get retrieves a value by key.
+func (t *ToolCallFunctionArguments) Get(key string) (any, bool) {
+	if t == nil || t.om == nil {
+		return nil, false
+	}
+	return t.om.Get(key)
+}
+
+// Set sets a key-value pair, preserving insertion order.
+func (t *ToolCallFunctionArguments) Set(key string, value any) {
+	if t == nil {
+		return
+	}
+	if t.om == nil {
+		t.om = orderedmap.New[string, any]()
+	}
+	t.om.Set(key, value)
+}
+
+// Len returns the number of arguments.
+func (t *ToolCallFunctionArguments) Len() int {
+	if t == nil || t.om == nil {
+		return 0
+	}
+	return t.om.Len()
+}
+
+// All returns an iterator over all key-value pairs in insertion order.
+func (t *ToolCallFunctionArguments) All() iter.Seq2[string, any] {
+	if t == nil || t.om == nil {
+		return func(yield func(string, any) bool) {}
+	}
+	return t.om.All()
+}
+
+// ToMap returns a regular map (order not preserved).
+func (t *ToolCallFunctionArguments) ToMap() map[string]any {
+	if t == nil || t.om == nil {
+		return nil
+	}
+	return t.om.ToMap()
+}

 func (t *ToolCallFunctionArguments) String() string {
-	bts, _ := json.Marshal(t)
+	if t == nil || t.om == nil {
+		return "{}"
+	}
+	bts, _ := json.Marshal(t.om)
 	return string(bts)
 }

+func (t *ToolCallFunctionArguments) UnmarshalJSON(data []byte) error {
+	t.om = orderedmap.New[string, any]()
+	return json.Unmarshal(data, t.om)
+}
+
+func (t ToolCallFunctionArguments) MarshalJSON() ([]byte, error) {
+	if t.om == nil {
+		return []byte("{}"), nil
+	}
+	return json.Marshal(t.om)
+}
+
 type Tool struct {
 	Type     string       `json:"type"`
 	Items    any          `json:"items,omitempty"`
@@ -283,12 +350,78 @@ func (pt PropertyType) String() string {
 	return fmt.Sprintf("%v", []string(pt))
 }

+// ToolPropertiesMap holds tool properties in insertion order.
+type ToolPropertiesMap struct {
+	om *orderedmap.Map[string, ToolProperty]
+}
+
+// NewToolPropertiesMap creates a new empty ToolPropertiesMap.
+func NewToolPropertiesMap() *ToolPropertiesMap {
+	return &ToolPropertiesMap{om: orderedmap.New[string, ToolProperty]()}
+}
+
+// Get retrieves a property by name.
+func (t *ToolPropertiesMap) Get(key string) (ToolProperty, bool) {
+	if t == nil || t.om == nil {
+		return ToolProperty{}, false
+	}
+	return t.om.Get(key)
+}
+
+// Set sets a property, preserving insertion order.
+func (t *ToolPropertiesMap) Set(key string, value ToolProperty) {
+	if t == nil {
+		return
+	}
+	if t.om == nil {
+		t.om = orderedmap.New[string, ToolProperty]()
+	}
+	t.om.Set(key, value)
+}
+
+// Len returns the number of properties.
+func (t *ToolPropertiesMap) Len() int {
+	if t == nil || t.om == nil {
+		return 0
+	}
+	return t.om.Len()
+}
+
+// All returns an iterator over all properties in insertion order.
+func (t *ToolPropertiesMap) All() iter.Seq2[string, ToolProperty] {
+	if t == nil || t.om == nil {
+		return func(yield func(string, ToolProperty) bool) {}
+	}
+	return t.om.All()
+}
+
+// ToMap returns a regular map (order not preserved).
+func (t *ToolPropertiesMap) ToMap() map[string]ToolProperty {
+	if t == nil || t.om == nil {
+		return nil
+	}
+	return t.om.ToMap()
+}
+
+func (t ToolPropertiesMap) MarshalJSON() ([]byte, error) {
+	if t.om == nil {
+		return []byte("null"), nil
+	}
+	return json.Marshal(t.om)
+}
+
+func (t *ToolPropertiesMap) UnmarshalJSON(data []byte) error {
+	t.om = orderedmap.New[string, ToolProperty]()
+	return json.Unmarshal(data, t.om)
+}
+
 type ToolProperty struct {
-	AnyOf       []ToolProperty `json:"anyOf,omitempty"`
-	Type        PropertyType   `json:"type,omitempty"`
-	Items       any            `json:"items,omitempty"`
-	Description string         `json:"description,omitempty"`
-	Enum        []any          `json:"enum,omitempty"`
+	AnyOf       []ToolProperty     `json:"anyOf,omitempty"`
+	Type        PropertyType       `json:"type,omitempty"`
+	Items       any                `json:"items,omitempty"`
+	Description string             `json:"description,omitempty"`
+	Enum        []any              `json:"enum,omitempty"`
+	Properties  *ToolPropertiesMap `json:"properties,omitempty"`
 }

 // ToTypeScriptType converts a ToolProperty to a TypeScript type string
@@ -309,9 +442,9 @@ func (tp ToolProperty) ToTypeScriptType() string {
 		return mapToTypeScriptType(tp.Type[0])
 	}

-	types := make([]string, len(tp.Type))
-	for i, t := range tp.Type {
-		types[i] = mapToTypeScriptType(t)
+	var types []string
+	for _, t := range tp.Type {
+		types = append(types, mapToTypeScriptType(t))
 	}
 	return strings.Join(types, " | ")
 }
@@ -337,11 +470,11 @@ func mapToTypeScriptType(jsonType string) string {
 }

 type ToolFunctionParameters struct {
-	Type       string                  `json:"type"`
-	Defs       any                     `json:"$defs,omitempty"`
-	Items      any                     `json:"items,omitempty"`
-	Required   []string                `json:"required,omitempty"`
-	Properties map[string]ToolProperty `json:"properties"`
+	Type       string             `json:"type"`
+	Defs       any                `json:"$defs,omitempty"`
+	Items      any                `json:"items,omitempty"`
+	Required   []string           `json:"required,omitempty"`
+	Properties *ToolPropertiesMap `json:"properties"`
 }

 func (t *ToolFunctionParameters) String() string {
@@ -554,6 +687,9 @@ type CreateRequest struct {
 	Renderer string `json:"renderer,omitempty"`
 	Parser   string `json:"parser,omitempty"`

+	// Requires is the minimum version of Ollama required by the model.
+	Requires string `json:"requires,omitempty"`
+
 	// Info is a map of additional information for the model
 	Info map[string]any `json:"info,omitempty"`

@@ -604,6 +740,7 @@ type ShowResponse struct {
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
 	ModifiedAt    time.Time          `json:"modified_at,omitempty"`
+	Requires      string             `json:"requires,omitempty"`
 }

 // CopyRequest is the request passed to [Client.Copy].
@@ -655,6 +792,33 @@ type ProcessResponse struct {
 	Models []ProcessModelResponse `json:"models"`
 }

+// UsageResponse is the response from [Client.Usage].
+type UsageResponse struct {
+	GPUs []GPUUsage `json:"gpus,omitempty"`
+}
+
+// GPUUsage contains GPU/device memory usage breakdown.
+type GPUUsage struct {
+	Name    string `json:"name"`    // Device name (e.g., "Apple M2 Max", "NVIDIA GeForce RTX 4090")
+	Backend string `json:"backend"` // CUDA, ROCm, Metal, etc.
+	Total   uint64 `json:"total"`
+	Free    uint64 `json:"free"`
+	Used    uint64 `json:"used"`  // Memory used by Ollama
+	Other   uint64 `json:"other"` // Memory used by other processes
+}
+
+// UsageStats contains usage statistics.
+type UsageStats struct {
+	Requests         int64            `json:"requests"`
+	TokensInput      int64            `json:"tokens_input"`
+	TokensOutput     int64            `json:"tokens_output"`
+	TotalTokens      int64            `json:"total_tokens"`
+	Models           map[string]int64 `json:"models,omitempty"`
+	Sources          map[string]int64 `json:"sources,omitempty"`
+	ToolCalls        int64            `json:"tool_calls,omitempty"`
+	StructuredOutput int64            `json:"structured_output,omitempty"`
+}
+
 // ListModelResponse is a single model description in [ListResponse].
 type ListModelResponse struct {
 	Name        string       `json:"name"`
@@ -784,7 +948,7 @@ func (m *Metrics) Summary() {

 func (opts *Options) FromMap(m map[string]any) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
-	typeOpts := reflect.TypeFor[Options]()    // types of the fields in the options struct
+	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct

 	// build map of json struct tags to their types
 	jsonOpts := make(map[string]reflect.StructField)
@@ -855,7 +1019,8 @@ func (opts *Options) FromMap(m map[string]any) error {
 				}
 				field.Set(reflect.ValueOf(slice))
 			case reflect.Pointer:
-				if field.Type() == reflect.TypeFor[*bool]() {
+				var b bool
+				if field.Type() == reflect.TypeOf(&b) {
 					val, ok := val.(bool)
 					if !ok {
 						return fmt.Errorf("option %q must be of type boolean", key)
@@ -906,7 +1071,7 @@ func DefaultOptions() Options {
 // ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low")
 type ThinkValue struct {
 	// Value can be a bool or string
-	Value any
+	Value interface{}
 }

 // IsValid checks if the ThinkValue is valid
@@ -999,7 +1164,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
 		return nil
 	}

-	return errors.New("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
+	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
 }

 // MarshalJSON implements json.Marshaler
@@ -1018,7 +1183,7 @@ func (d Duration) MarshalJSON() ([]byte, error) {
 	if d.Duration < 0 {
 		return []byte("-1"), nil
 	}
-	return []byte("\"" + d.String() + "\""), nil
+	return []byte("\"" + d.Duration.String() + "\""), nil
 }

 func (d *Duration) UnmarshalJSON(b []byte) (err error) {
@@ -1045,7 +1210,7 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
 			d.Duration = time.Duration(math.MaxInt64)
 		}
 	default:
-		return fmt.Errorf("unsupported type: '%s'", reflect.TypeOf(v))
+		return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
 	}

 	return nil
@@ -1055,7 +1220,7 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
 func FormatParams(params map[string][]string) (map[string]any, error) {
 	opts := Options{}
 	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
-	typeOpts := reflect.TypeFor[Options]()     // types of the fields in the options struct
+	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct

 	// build map of json struct tags to their types
 	jsonOpts := make(map[string]reflect.StructField)
@@ -1102,7 +1267,8 @@ func FormatParams(params map[string][]string) (map[string]any, error) {
 					// TODO: only string slices are supported right now
 					out[key] = vals
 				case reflect.Pointer:
-					if field.Type() == reflect.TypeFor[*bool]() {
+					var b bool
+					if field.Type() == reflect.TypeOf(&b) {
 						boolVal, err := strconv.ParseBool(vals[0])
 						if err != nil {
 							return nil, fmt.Errorf("invalid bool value %s", vals)
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -11,6 +11,24 @@ import (
 	"github.com/stretchr/testify/require"
 )

+// testPropsMap creates a ToolPropertiesMap from a map (convenience function for tests, order not preserved)
+func testPropsMap(m map[string]ToolProperty) *ToolPropertiesMap {
+	props := NewToolPropertiesMap()
+	for k, v := range m {
+		props.Set(k, v)
+	}
+	return props
+}
+
+// testArgs creates ToolCallFunctionArguments from a map (convenience function for tests, order not preserved)
+func testArgs(m map[string]any) ToolCallFunctionArguments {
+	args := NewToolCallFunctionArguments()
+	for k, v := range m {
+		args.Set(k, v)
+	}
+	return args
+}
+
 func TestKeepAliveParsingFromJSON(t *testing.T) {
 	tests := []struct {
 		name string
@@ -309,9 +327,9 @@ func TestToolFunctionParameters_MarshalJSON(t *testing.T) {
 			input: ToolFunctionParameters{
 				Type:     "object",
 				Required: []string{"name"},
-				Properties: map[string]ToolProperty{
+				Properties: testPropsMap(map[string]ToolProperty{
 					"name": {Type: PropertyType{"string"}},
-				},
+				}),
 			},
 			expected: `{"type":"object","required":["name"],"properties":{"name":{"type":"string"}}}`,
 		},
@@ -319,9 +337,9 @@ func TestToolFunctionParameters_MarshalJSON(t *testing.T) {
 			name: "no required",
 			input: ToolFunctionParameters{
 				Type: "object",
-				Properties: map[string]ToolProperty{
+				Properties: testPropsMap(map[string]ToolProperty{
 					"name": {Type: PropertyType{"string"}},
-				},
+				}),
 			},
 			expected: `{"type":"object","properties":{"name":{"type":"string"}}}`,
 		},
@@ -339,7 +357,7 @@ func TestToolFunctionParameters_MarshalJSON(t *testing.T) {
 func TestToolCallFunction_IndexAlwaysMarshals(t *testing.T) {
 	fn := ToolCallFunction{
 		Name:      "echo",
-		Arguments: ToolCallFunctionArguments{"message": "hi"},
+		Arguments: testArgs(map[string]any{"message": "hi"}),
 	}

 	data, err := json.Marshal(fn)
@@ -504,6 +522,116 @@ func TestThinking_UnmarshalJSON(t *testing.T) {
 	}
 }

+func TestToolPropertyNestedProperties(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected ToolProperty
+	}{
+		{
+			name: "nested object properties",
+			input: `{
+				"type": "object",
+				"description": "Location details",
+				"properties": {
+					"address": {
+						"type": "string",
+						"description": "Street address"
+					},
+					"city": {
+						"type": "string",
+						"description": "City name"
+					}
+				}
+			}`,
+			expected: ToolProperty{
+				Type:        PropertyType{"object"},
+				Description: "Location details",
+				Properties: testPropsMap(map[string]ToolProperty{
+					"address": {
+						Type:        PropertyType{"string"},
+						Description: "Street address",
+					},
+					"city": {
+						Type:        PropertyType{"string"},
+						Description: "City name",
+					},
+				}),
+			},
+		},
+		{
+			name: "deeply nested properties",
+			input: `{
+				"type": "object",
+				"description": "Event",
+				"properties": {
+					"location": {
+						"type": "object",
+						"description": "Location",
+						"properties": {
+							"coordinates": {
+								"type": "object",
+								"description": "GPS coordinates",
+								"properties": {
+									"lat": {"type": "number", "description": "Latitude"},
+									"lng": {"type": "number", "description": "Longitude"}
+								}
+							}
+						}
+					}
+				}
+			}`,
+			expected: ToolProperty{
+				Type:        PropertyType{"object"},
+				Description: "Event",
+				Properties: testPropsMap(map[string]ToolProperty{
+					"location": {
+						Type:        PropertyType{"object"},
+						Description: "Location",
+						Properties: testPropsMap(map[string]ToolProperty{
+							"coordinates": {
+								Type:        PropertyType{"object"},
+								Description: "GPS coordinates",
+								Properties: testPropsMap(map[string]ToolProperty{
+									"lat": {Type: PropertyType{"number"}, Description: "Latitude"},
+									"lng": {Type: PropertyType{"number"}, Description: "Longitude"},
+								}),
+							},
+						}),
+					},
+				}),
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var prop ToolProperty
+			err := json.Unmarshal([]byte(tt.input), &prop)
+			require.NoError(t, err)
+
+			// Compare JSON representations since pointer comparison doesn't work
+			expectedJSON, err := json.Marshal(tt.expected)
+			require.NoError(t, err)
+			actualJSON, err := json.Marshal(prop)
+			require.NoError(t, err)
+			assert.JSONEq(t, string(expectedJSON), string(actualJSON))
+
+			// Round-trip test: marshal and unmarshal again
+			data, err := json.Marshal(prop)
+			require.NoError(t, err)
+
+			var prop2 ToolProperty
+			err = json.Unmarshal(data, &prop2)
+			require.NoError(t, err)
+
+			prop2JSON, err := json.Marshal(prop2)
+			require.NoError(t, err)
+			assert.JSONEq(t, string(expectedJSON), string(prop2JSON))
+		})
+	}
+}
+
 func TestToolFunctionParameters_String(t *testing.T) {
 	tests := []struct {
 		name     string
@@ -515,12 +643,12 @@ func TestToolFunctionParameters_String(t *testing.T) {
 			params: ToolFunctionParameters{
 				Type:     "object",
 				Required: []string{"name"},
-				Properties: map[string]ToolProperty{
+				Properties: testPropsMap(map[string]ToolProperty{
 					"name": {
 						Type:        PropertyType{"string"},
 						Description: "The name of the person",
 					},
-				},
+				}),
 			},
 			expected: `{"type":"object","required":["name"],"properties":{"name":{"type":"string","description":"The name of the person"}}}`,
 		},
@@ -537,7 +665,7 @@ func TestToolFunctionParameters_String(t *testing.T) {
 					s.Self = s
 					return s
 				}(),
-				Properties: map[string]ToolProperty{},
+				Properties: testPropsMap(map[string]ToolProperty{}),
 			},
 			expected: "",
 		},
@@ -550,3 +678,235 @@ func TestToolFunctionParameters_String(t *testing.T) {
 		})
 	}
 }
+
+func TestToolCallFunctionArguments_OrderPreservation(t *testing.T) {
+	t.Run("marshal preserves insertion order", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		args.Set("zebra", "z")
+		args.Set("apple", "a")
+		args.Set("mango", "m")
+
+		data, err := json.Marshal(args)
+		require.NoError(t, err)
+
+		// Should preserve insertion order, not alphabetical
+		assert.Equal(t, `{"zebra":"z","apple":"a","mango":"m"}`, string(data))
+	})
+
+	t.Run("unmarshal preserves JSON order", func(t *testing.T) {
+		jsonData := `{"zebra":"z","apple":"a","mango":"m"}`
+
+		var args ToolCallFunctionArguments
+		err := json.Unmarshal([]byte(jsonData), &args)
+		require.NoError(t, err)
+
+		// Verify iteration order matches JSON order
+		var keys []string
+		for k := range args.All() {
+			keys = append(keys, k)
+		}
+		assert.Equal(t, []string{"zebra", "apple", "mango"}, keys)
+	})
+
+	t.Run("round trip preserves order", func(t *testing.T) {
+		original := `{"z":1,"a":2,"m":3,"b":4}`
+
+		var args ToolCallFunctionArguments
+		err := json.Unmarshal([]byte(original), &args)
+		require.NoError(t, err)
+
+		data, err := json.Marshal(args)
+		require.NoError(t, err)
+
+		assert.Equal(t, original, string(data))
+	})
+
+	t.Run("String method returns ordered JSON", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		args.Set("c", 3)
+		args.Set("a", 1)
+		args.Set("b", 2)
+
+		assert.Equal(t, `{"c":3,"a":1,"b":2}`, args.String())
+	})
+
+	t.Run("Get retrieves correct values", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		args.Set("key1", "value1")
+		args.Set("key2", 42)
+
+		v, ok := args.Get("key1")
+		assert.True(t, ok)
+		assert.Equal(t, "value1", v)
+
+		v, ok = args.Get("key2")
+		assert.True(t, ok)
+		assert.Equal(t, 42, v)
+
+		_, ok = args.Get("nonexistent")
+		assert.False(t, ok)
+	})
+
+	t.Run("Len returns correct count", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		assert.Equal(t, 0, args.Len())
+
+		args.Set("a", 1)
+		assert.Equal(t, 1, args.Len())
+
+		args.Set("b", 2)
+		assert.Equal(t, 2, args.Len())
+	})
+
+	t.Run("empty args marshal to empty object", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		data, err := json.Marshal(args)
+		require.NoError(t, err)
+		assert.Equal(t, `{}`, string(data))
+	})
+
+	t.Run("zero value args marshal to empty object", func(t *testing.T) {
+		var args ToolCallFunctionArguments
+		assert.Equal(t, "{}", args.String())
+	})
+}
+
+func TestToolPropertiesMap_OrderPreservation(t *testing.T) {
+	t.Run("marshal preserves insertion order", func(t *testing.T) {
+		props := NewToolPropertiesMap()
+		props.Set("zebra", ToolProperty{Type: PropertyType{"string"}})
+		props.Set("apple", ToolProperty{Type: PropertyType{"number"}})
+		props.Set("mango", ToolProperty{Type: PropertyType{"boolean"}})
+
+		data, err := json.Marshal(props)
+		require.NoError(t, err)
+
+		// Should preserve insertion order, not alphabetical
+		expected := `{"zebra":{"type":"string"},"apple":{"type":"number"},"mango":{"type":"boolean"}}`
+		assert.Equal(t, expected, string(data))
+	})
+
+	t.Run("unmarshal preserves JSON order", func(t *testing.T) {
+		jsonData := `{"zebra":{"type":"string"},"apple":{"type":"number"},"mango":{"type":"boolean"}}`
+
+		var props ToolPropertiesMap
+		err := json.Unmarshal([]byte(jsonData), &props)
+		require.NoError(t, err)
+
+		// Verify iteration order matches JSON order
+		var keys []string
+		for k := range props.All() {
+			keys = append(keys, k)
+		}
+		assert.Equal(t, []string{"zebra", "apple", "mango"}, keys)
+	})
+
+	t.Run("round trip preserves order", func(t *testing.T) {
+		original := `{"z":{"type":"string"},"a":{"type":"number"},"m":{"type":"boolean"}}`
+
+		var props ToolPropertiesMap
+		err := json.Unmarshal([]byte(original), &props)
+		require.NoError(t, err)
+
+		data, err := json.Marshal(props)
+		require.NoError(t, err)
+
+		assert.Equal(t, original, string(data))
+	})
+
+	t.Run("Get retrieves correct values", func(t *testing.T) {
+		props := NewToolPropertiesMap()
+		props.Set("name", ToolProperty{Type: PropertyType{"string"}, Description: "The name"})
+		props.Set("age", ToolProperty{Type: PropertyType{"integer"}, Description: "The age"})
+
+		v, ok := props.Get("name")
+		assert.True(t, ok)
+		assert.Equal(t, "The name", v.Description)
+
+		v, ok = props.Get("age")
+		assert.True(t, ok)
+		assert.Equal(t, "The age", v.Description)
+
+		_, ok = props.Get("nonexistent")
+		assert.False(t, ok)
+	})
+
+	t.Run("Len returns correct count", func(t *testing.T) {
+		props := NewToolPropertiesMap()
+		assert.Equal(t, 0, props.Len())
+
+		props.Set("a", ToolProperty{})
+		assert.Equal(t, 1, props.Len())
+
+		props.Set("b", ToolProperty{})
+		assert.Equal(t, 2, props.Len())
+	})
+
+	t.Run("nil props marshal to null", func(t *testing.T) {
+		var props *ToolPropertiesMap
+		data, err := json.Marshal(props)
+		require.NoError(t, err)
+		assert.Equal(t, `null`, string(data))
+	})
+
+	t.Run("ToMap returns regular map", func(t *testing.T) {
+		props := NewToolPropertiesMap()
+		props.Set("a", ToolProperty{Type: PropertyType{"string"}})
+		props.Set("b", ToolProperty{Type: PropertyType{"number"}})
+
+		m := props.ToMap()
+		assert.Equal(t, 2, len(m))
+		assert.Equal(t, PropertyType{"string"}, m["a"].Type)
+		assert.Equal(t, PropertyType{"number"}, m["b"].Type)
+	})
+}
+
+func TestToolCallFunctionArguments_ComplexValues(t *testing.T) {
+	t.Run("nested objects preserve order", func(t *testing.T) {
+		jsonData := `{"outer":{"z":1,"a":2},"simple":"value"}`
+
+		var args ToolCallFunctionArguments
+		err := json.Unmarshal([]byte(jsonData), &args)
+		require.NoError(t, err)
+
+		// Outer keys should be in order
+		var keys []string
+		for k := range args.All() {
+			keys = append(keys, k)
+		}
+		assert.Equal(t, []string{"outer", "simple"}, keys)
+	})
+
+	t.Run("arrays as values", func(t *testing.T) {
+		args := NewToolCallFunctionArguments()
+		args.Set("items", []string{"a", "b", "c"})
+		args.Set("numbers", []int{1, 2, 3})
+
+		data, err := json.Marshal(args)
+		require.NoError(t, err)
+
+		assert.Equal(t, `{"items":["a","b","c"],"numbers":[1,2,3]}`, string(data))
+	})
+}
+
+func TestToolPropertiesMap_NestedProperties(t *testing.T) {
+	t.Run("nested properties preserve order", func(t *testing.T) {
+		props := NewToolPropertiesMap()
+
+		nestedProps := NewToolPropertiesMap()
+		nestedProps.Set("z_field", ToolProperty{Type: PropertyType{"string"}})
+		nestedProps.Set("a_field", ToolProperty{Type: PropertyType{"number"}})
+
+		props.Set("outer", ToolProperty{
+			Type:       PropertyType{"object"},
+			Properties: nestedProps,
+		})
+
+		data, err := json.Marshal(props)
+		require.NoError(t, err)
+
+		// Both outer and inner should preserve order
+		expected := `{"outer":{"type":"object","properties":{"z_field":{"type":"string"},"a_field":{"type":"number"}}}}`
+		assert.Equal(t, expected, string(data))
+	})
+}
--- a/app/cmd/app/app.go
+++ b/app/cmd/app/app.go
@@ -273,10 +273,6 @@ func main() {
 		Handler: uiServer.Handler(),
 	}

-	if _, err := uiServer.UserData(ctx); err != nil {
-		slog.Warn("failed to load user data", "error", err)
-	}
-
 	// Start the UI server
 	slog.Info("starting ui server", "port", port)
 	go func() {
@@ -320,6 +316,17 @@ func main() {
 		slog.Debug("no URL scheme request to handle")
 	}

+	go func() {
+		slog.Debug("waiting for ollama server to be ready")
+		if err := ui.WaitForServer(ctx, 10*time.Second); err != nil {
+			slog.Warn("ollama server not ready, continuing anyway", "error", err)
+		}
+
+		if _, err := uiServer.UserData(ctx); err != nil {
+			slog.Warn("failed to load user data", "error", err)
+		}
+	}()
+
 	osRun(cancel, hasCompletedFirstRun, startHidden)

 	slog.Info("shutting down desktop server")
@@ -361,7 +368,7 @@ func checkUserLoggedIn(uiServerPort int) bool {
 		return false
 	}

-	resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/api/v1/me", uiServerPort))
+	resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/api/me", uiServerPort), "application/json", nil)
 	if err != nil {
 		slog.Debug("failed to call local auth endpoint", "error", err)
 		return false
@@ -397,8 +404,8 @@ func checkUserLoggedIn(uiServerPort int) bool {
 // handleConnectURLScheme fetches the connect URL and opens it in the browser
 func handleConnectURLScheme() {
 	if checkUserLoggedIn(uiServerPort) {
-		slog.Info("user is already logged in, opening settings instead")
-		sendUIRequestMessage("/")
+		slog.Info("user is already logged in, opening app instead")
+		showWindow(wv.webview.Window())
 		return
 	}

@@ -466,6 +473,8 @@ func handleURLSchemeInCurrentInstance(urlSchemeRequest string) {
 	if isConnect {
 		handleConnectURLScheme()
 	} else {
-		sendUIRequestMessage("/")
+		if wv.webview != nil {
+			showWindow(wv.webview.Window())
+		}
 	}
 }
--- a/app/cmd/app/app_darwin.go
+++ b/app/cmd/app/app_darwin.go
@@ -191,13 +191,6 @@ func LaunchNewApp() {
 	C.launchApp(appName)
 }

-// Send a request to the main app thread to load a UI page
-func sendUIRequestMessage(path string) {
-	p := C.CString(path)
-	defer C.free(unsafe.Pointer(p))
-	C.uiRequest(p)
-}
-
 func registerLaunchAgent(hasCompletedFirstRun bool) {
 	// Remove any stale Login Item registrations
 	C.unregisterSelfFromLoginItem()
--- a/app/cmd/app/app_darwin.m
+++ b/app/cmd/app/app_darwin.m
@@ -24,27 +24,14 @@ bool firstTimeRun,startHidden; // Set in run before initialization
    for (NSURL *url in urls) {
        if ([url.scheme isEqualToString:@"ollama"]) {
            NSString *path = url.path;
-            if (!path || [path isEqualToString:@""]) {
-                // For URLs like ollama://settings (without triple slash),
-                // the "settings" part is parsed as the host, not the path.
-                // We need to convert it to a path by prepending "/"
-                if (url.host && ![url.host isEqualToString:@""]) {
-                    path = [@"/" stringByAppendingString:url.host];
-                } else {
-                    path = @"/";
-                }
-            }
-            
-            if ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"]) {
+
+            if (path && ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"])) {
                // Special case: handle connect by opening browser instead of app
                handleConnectURL();
            } else {
                // Set app to be active and visible
                [NSApp setActivationPolicy:NSApplicationActivationPolicyRegular];
                [NSApp activateIgnoringOtherApps:YES];
-                
-                // Open the path with the UI
-                [self uiRequest:path];
            }
            
            break;
@@ -260,7 +247,7 @@ bool firstTimeRun,startHidden; // Set in run before initialization
 }

 - (void)openHelp:(id)sender {
-    NSURL *url = [NSURL URLWithString:@"https://github.com/ollama/ollama/tree/main/docs"];
+    NSURL *url = [NSURL URLWithString:@"https://docs.ollama.com/"];
    [[NSWorkspace sharedWorkspace] openURL:url];
 }

--- a/app/cmd/app/app_windows.go
+++ b/app/cmd/app/app_windows.go
@@ -147,7 +147,9 @@ func handleURLSchemeRequest(urlScheme string) {
 	if isConnect {
 		handleConnectURLScheme()
 	} else {
-		sendUIRequestMessage("/")
+		if wv.webview != nil {
+			showWindow(wv.webview.Window())
+		}
 	}
 }

@@ -261,11 +263,6 @@ func createLoginShortcut() error {
 	return nil
 }

-// Send a request to the main app thread to load a UI page
-func sendUIRequestMessage(path string) {
-	wintray.SendUIRequestMessage(path)
-}
-
 func LaunchNewApp() {
 }

--- a/app/dialog/cocoa/dlg.m
+++ b/app/dialog/cocoa/dlg.m
@@ -169,37 +169,47 @@ DlgResult fileDlg(FileDlgParams* params) {
 	}
 	
 	NSArray* urls = [panel URLs];
-	if(self->params->allowMultiple && [urls count] >= 1) {
+	if([urls count] == 0) {
+		return DLG_CANCEL;
+	}
+	
+	if(self->params->allowMultiple) {
 		// For multiple files, we need to return all paths separated by null bytes
 		char* bufPtr = self->params->buf;
 		int remainingBuf = self->params->nbuf;
 		
-  // Calculate total required buffer size first
-  int totalSize = 0;
-  for(NSURL* url in urls) {
-      char tempBuf[PATH_MAX];
-      if(![url getFileSystemRepresentation:tempBuf maxLength:PATH_MAX]) {
-          return DLG_URLFAIL;
-      }
-      totalSize += strlen(tempBuf) + 1; // +1 for null terminator
-  }
-  totalSize += 1; // Final null terminator
+		// Calculate total required buffer size first
+		int totalSize = 0;
+		for(NSURL* url in urls) {
+			char tempBuf[PATH_MAX];
+			if(![url getFileSystemRepresentation:tempBuf maxLength:PATH_MAX]) {
+				return DLG_URLFAIL;
+			}
+			totalSize += strlen(tempBuf) + 1; // +1 for null terminator
+		}
+		totalSize += 1; // Final null terminator

-  if(totalSize > self->params->nbuf) {
-      // Not enough buffer space
-      return DLG_URLFAIL;
-  }
+		if(totalSize > self->params->nbuf) {
+			// Not enough buffer space
+			return DLG_URLFAIL;
+		}

-  // Now actually copy the paths (we know we have space)
-  bufPtr = self->params->buf;
-  for(NSURL* url in urls) {
-      char tempBuf[PATH_MAX];
-      [url getFileSystemRepresentation:tempBuf maxLength:PATH_MAX];
-      int pathLen = strlen(tempBuf);
-      strcpy(bufPtr, tempBuf);
-      bufPtr += pathLen + 1;
-  }
-  *bufPtr = '\0'; // Final null terminator
+		// Now actually copy the paths (we know we have space)
+		bufPtr = self->params->buf;
+		for(NSURL* url in urls) {
+			char tempBuf[PATH_MAX];
+			[url getFileSystemRepresentation:tempBuf maxLength:PATH_MAX];
+			int pathLen = strlen(tempBuf);
+			strcpy(bufPtr, tempBuf);
+			bufPtr += pathLen + 1;
+		}
+		*bufPtr = '\0'; // Final null terminator
+	} else {
+		// Single file/directory selection - write path to buffer
+		NSURL* url = [urls firstObject];
+		if(![url getFileSystemRepresentation:self->params->buf maxLength:self->params->nbuf]) {
+			return DLG_URLFAIL;
+		}
 	}
 	
 	return DLG_OK;
--- a/app/dialog/dlgs.go
+++ b/app/dialog/dlgs.go
@@ -22,7 +22,6 @@ import (
 var ErrCancelled = errors.New("Cancelled")

 // Cancelled refers to ErrCancelled.
-//
 // Deprecated: Use ErrCancelled instead.
 var Cancelled = ErrCancelled

@@ -38,7 +37,7 @@ type MsgBuilder struct {
 }

 // Message initialises a MsgBuilder with the provided message.
-func Message(format string, args ...any) *MsgBuilder {
+func Message(format string, args ...interface{}) *MsgBuilder {
 	return &MsgBuilder{Msg: fmt.Sprintf(format, args...)}
 }

--- a/app/dialog/dlgs_windows.go
+++ b/app/dialog/dlgs_windows.go
@@ -15,7 +15,7 @@ const multiFileBufferSize = w32.MAX_PATH * 10
 type WinDlgError int

 func (e WinDlgError) Error() string {
-	return fmt.Sprintf("CommDlgExtendedError: %#x", e)
+	return fmt.Sprintf("CommDlgExtendedError: %#x", int(e))
 }

 func err() error {
--- a/app/server/server.go
+++ b/app/server/server.go
@@ -224,9 +224,7 @@ func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
 		if _, err := os.Stat(settings.Models); err == nil {
 			env["OLLAMA_MODELS"] = settings.Models
 		} else {
-			slog.Warn("models path not accessible, clearing models setting", "path", settings.Models, "err", err)
-			settings.Models = ""
-			s.store.SetSettings(settings)
+			slog.Warn("models path not accessible, using default", "path", settings.Models, "err", err)
 		}
 	}
 	if settings.ContextLength > 0 {
@@ -319,7 +317,7 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
 	for {
 		select {
 		case <-ctx.Done():
-			return nil, errors.New("timeout scanning server log for inference compute details")
+			return nil, fmt.Errorf("timeout scanning server log for inference compute details")
 		default:
 		}
 		file, err := os.Open(serverLogPath)
@@ -345,9 +343,11 @@ func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {

 				slog.Info("Matched", "inference compute", ic)
 				inference = append(inference, ic)
-			} else if len(inference) > 0 {
+			} else {
 				// Break out on first non matching line after we start matching
-				return inference, nil
+				if len(inference) > 0 {
+					return inference, nil
+				}
 			}
 		}
 		time.Sleep(100 * time.Millisecond)
--- a/app/server/server_unix.go
+++ b/app/server/server_unix.go
@@ -31,7 +31,7 @@ func terminate(proc *os.Process) error {
 func terminated(pid int) (bool, error) {
 	proc, err := os.FindProcess(pid)
 	if err != nil {
-		return false, fmt.Errorf("failed to find process: %w", err)
+		return false, fmt.Errorf("failed to find process: %v", err)
 	}

 	err = proc.Signal(syscall.Signal(0))
@@ -40,7 +40,7 @@ func terminated(pid int) (bool, error) {
 			return true, nil
 		}

-		return false, fmt.Errorf("error signaling process: %w", err)
+		return false, fmt.Errorf("error signaling process: %v", err)
 	}

 	return false, nil
@@ -67,7 +67,8 @@ func reapServers() error {
 		return nil
 	}

-	for pidStr := range strings.SplitSeq(pidsStr, "\n") {
+	pids := strings.Split(pidsStr, "\n")
+	for _, pidStr := range pids {
 		pidStr = strings.TrimSpace(pidStr)
 		if pidStr == "" {
 			continue
--- a/app/store/database.go
+++ b/app/store/database.go
@@ -5,7 +5,6 @@ package store
 import (
 	"database/sql"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"strings"
 	"time"
@@ -483,8 +482,7 @@ func (db *database) cleanupOrphanedData() error {
 }

 func duplicateColumnError(err error) bool {
-	var sqlite3Err sqlite3.Error
-	if errors.As(err, &sqlite3Err) {
+	if sqlite3Err, ok := err.(sqlite3.Error); ok {
 		return sqlite3Err.Code == sqlite3.ErrError &&
 			strings.Contains(sqlite3Err.Error(), "duplicate column name")
 	}
@@ -492,8 +490,7 @@ func duplicateColumnError(err error) bool {
 }

 func columnNotExists(err error) bool {
-	var sqlite3Err sqlite3.Error
-	if errors.As(err, &sqlite3Err) {
+	if sqlite3Err, ok := err.(sqlite3.Error); ok {
 		return sqlite3Err.Code == sqlite3.ErrError &&
 			strings.Contains(sqlite3Err.Error(), "no such column")
 	}
@@ -589,8 +586,8 @@ func (db *database) getChatWithOptions(id string, loadAttachmentData bool) (*Cha
 		&browserState,
 	)
 	if err != nil {
-		if errors.Is(err, sql.ErrNoRows) {
-			return nil, errors.New("chat not found")
+		if err == sql.ErrNoRows {
+			return nil, fmt.Errorf("chat not found")
 		}
 		return nil, fmt.Errorf("query chat: %w", err)
 	}
@@ -755,7 +752,7 @@ func (db *database) updateLastMessage(chatID string, msg Message) error {
 		return fmt.Errorf("get rows affected: %w", err)
 	}
 	if rowsAffected == 0 {
-		return errors.New("no message found to update")
+		return fmt.Errorf("no message found to update")
 	}

 	_, err = tx.Exec("DELETE FROM attachments WHERE message_id = ?", messageID)
--- a/app/store/database_test.go
+++ b/app/store/database_test.go
@@ -282,7 +282,7 @@ func countRows(t *testing.T, db *database, table string) int {
 	return count
 }

-func countRowsWithCondition(t *testing.T, db *database, table, condition string, args ...any) int {
+func countRowsWithCondition(t *testing.T, db *database, table, condition string, args ...interface{}) int {
 	t.Helper()
 	var count int
 	query := fmt.Sprintf("SELECT COUNT(*) FROM %s WHERE %s", table, condition)
@@ -296,7 +296,7 @@ func countRowsWithCondition(t *testing.T, db *database, table, condition string,
 // Test helpers for schema migration testing

 // schemaMap returns both tables/columns and indexes (ignoring order)
-func schemaMap(db *database) map[string]any {
+func schemaMap(db *database) map[string]interface{} {
 	result := make(map[string]any)

 	result["tables"] = columnMap(db)
--- a/app/store/image.go
+++ b/app/store/image.go
@@ -5,7 +5,6 @@ package store
 import (
 	"crypto/sha256"
 	"encoding/hex"
-	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -27,7 +26,7 @@ func (i *Image) Bytes() ([]byte, error) {
 // ImgBytes reads image data from the specified file path
 func ImgBytes(path string) ([]byte, error) {
 	if path == "" {
-		return nil, errors.New("empty image path")
+		return nil, fmt.Errorf("empty image path")
 	}

 	data, err := os.ReadFile(path)
--- a/app/tools/browser.go
+++ b/app/tools/browser.go
@@ -4,7 +4,6 @@ package tools

 import (
 	"context"
-	"errors"
 	"fmt"
 	"net/url"
 	"regexp"
@@ -131,7 +130,7 @@ func (b *BrowserSearch) Schema() map[string]any {
 func (b *BrowserSearch) Execute(ctx context.Context, args map[string]any) (any, string, error) {
 	query, ok := args["query"].(string)
 	if !ok {
-		return nil, "", errors.New("query parameter is required")
+		return nil, "", fmt.Errorf("query parameter is required")
 	}

 	topn, ok := args["topn"].(int)
@@ -151,7 +150,7 @@ func (b *BrowserSearch) Execute(ctx context.Context, args map[string]any) (any,

 	searchResponse, ok := result.(*WebSearchResponse)
 	if !ok {
-		return nil, "", errors.New("invalid search results format")
+		return nil, "", fmt.Errorf("invalid search results format")
 	}

 	// Build main search results page that contains all search results
@@ -384,9 +383,15 @@ func wrapLines(text string, width int) []string {
 			wrapped = append(wrapped, "")
 		} else if len(line) <= width {
 			wrapped = append(wrapped, line)
-		} else if words := strings.Fields(line); len(words) == 0 {
-			wrapped = append(wrapped, line)
 		} else {
+			// Word wrapping while preserving whitespace structure
+			words := strings.Fields(line)
+			if len(words) == 0 {
+				// Line with only whitespace
+				wrapped = append(wrapped, line)
+				continue
+			}
+
 			currentLine := ""
 			for _, word := range words {
 				// Check if adding this word would exceed width
@@ -531,13 +536,15 @@ func (b *BrowserOpen) Execute(ctx context.Context, args map[string]any) (any, st
 		if err != nil {
 			return nil, "", fmt.Errorf("page not found for cursor %d: %w", cursor, err)
 		}
-	} else if len(b.state.Data.PageStack) != 0 {
+	} else {
 		// get last page
-		pageURL := b.state.Data.PageStack[len(b.state.Data.PageStack)-1]
-		var err error
-		page, err = b.getPageFromStack(pageURL)
-		if err != nil {
-			return nil, "", fmt.Errorf("page not found for cursor %d: %w", cursor, err)
+		if len(b.state.Data.PageStack) != 0 {
+			pageURL := b.state.Data.PageStack[len(b.state.Data.PageStack)-1]
+			var err error
+			page, err = b.getPageFromStack(pageURL)
+			if err != nil {
+				return nil, "", fmt.Errorf("page not found for cursor %d: %w", cursor, err)
+			}
 		}
 	}

@@ -587,7 +594,7 @@ func (b *BrowserOpen) Execute(ctx context.Context, args map[string]any) (any, st
 	// Try to get id as integer (link ID from current page)
 	if id, ok := args["id"].(float64); ok {
 		if page == nil {
-			return nil, "", errors.New("no current page to resolve link from")
+			return nil, "", fmt.Errorf("no current page to resolve link from")
 		}
 		idInt := int(id)
 		pageURL, ok := page.Links[idInt]
@@ -630,7 +637,7 @@ func (b *BrowserOpen) Execute(ctx context.Context, args map[string]any) (any, st

 	// If no id provided, just display current page
 	if page == nil {
-		return nil, "", errors.New("no current page to display")
+		return nil, "", fmt.Errorf("no current page to display")
 	}
 	// Only add to PageStack without updating URLToPage
 	b.state.Data.PageStack = append(b.state.Data.PageStack, page.URL)
@@ -735,7 +742,7 @@ func (b *BrowserFind) Schema() map[string]any {
 func (b *BrowserFind) Execute(ctx context.Context, args map[string]any) (any, string, error) {
 	pattern, ok := args["pattern"].(string)
 	if !ok {
-		return nil, "", errors.New("pattern parameter is required")
+		return nil, "", fmt.Errorf("pattern parameter is required")
 	}

 	// Get cursor parameter if provided, default to current page
@@ -749,7 +756,7 @@ func (b *BrowserFind) Execute(ctx context.Context, args map[string]any) (any, st
 	if cursor == -1 {
 		// Use current page
 		if len(b.state.Data.PageStack) == 0 {
-			return nil, "", errors.New("no pages to search in")
+			return nil, "", fmt.Errorf("no pages to search in")
 		}
 		var err error
 		page, err = b.getPageFromStack(b.state.Data.PageStack[len(b.state.Data.PageStack)-1])
@@ -769,7 +776,7 @@ func (b *BrowserFind) Execute(ctx context.Context, args map[string]any) (any, st
 	}

 	if page == nil {
-		return nil, "", errors.New("page not found")
+		return nil, "", fmt.Errorf("page not found")
 	}

 	// Create find results page
--- a/app/tools/browser_crawl.go
+++ b/app/tools/browser_crawl.go
@@ -5,7 +5,6 @@ package tools
 import (
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 )

@@ -88,7 +87,7 @@ func (g *BrowserCrawler) Schema() map[string]any {
 func (g *BrowserCrawler) Execute(ctx context.Context, args map[string]any) (*CrawlResponse, error) {
 	urlsRaw, ok := args["urls"].([]any)
 	if !ok {
-		return nil, errors.New("urls parameter is required and must be an array of strings")
+		return nil, fmt.Errorf("urls parameter is required and must be an array of strings")
 	}

 	urls := make([]string, 0, len(urlsRaw))
@@ -99,7 +98,7 @@ func (g *BrowserCrawler) Execute(ctx context.Context, args map[string]any) (*Cra
 	}

 	if len(urls) == 0 {
-		return nil, errors.New("at least one URL is required")
+		return nil, fmt.Errorf("at least one URL is required")
 	}

 	return g.performWebCrawl(ctx, urls)
--- a/app/tools/browser_websearch.go
+++ b/app/tools/browser_websearch.go
@@ -5,7 +5,6 @@ package tools
 import (
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"strconv"
 	"time"
@@ -85,7 +84,7 @@ func (w *BrowserWebSearch) Schema() map[string]any {
 func (w *BrowserWebSearch) Execute(ctx context.Context, args map[string]any) (any, error) {
 	queriesRaw, ok := args["queries"].([]any)
 	if !ok {
-		return nil, errors.New("queries parameter is required and must be an array of strings")
+		return nil, fmt.Errorf("queries parameter is required and must be an array of strings")
 	}

 	queries := make([]string, 0, len(queriesRaw))
@@ -96,7 +95,7 @@ func (w *BrowserWebSearch) Execute(ctx context.Context, args map[string]any) (an
 	}

 	if len(queries) == 0 {
-		return nil, errors.New("at least one query is required")
+		return nil, fmt.Errorf("at least one query is required")
 	}

 	maxResults := 5
--- a/app/tools/web_fetch.go
+++ b/app/tools/web_fetch.go
@@ -6,7 +6,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"net/http"
 	"net/url"
@@ -37,7 +36,7 @@ func (w *WebFetch) Description() string {
 	return "Crawl and extract text content from web pages"
 }

-func (w *WebFetch) Schema() map[string]any {
+func (g *WebFetch) Schema() map[string]any {
 	schemaBytes := []byte(`{
 		"type": "object",
 		"properties": {
@@ -62,11 +61,11 @@ func (w *WebFetch) Prompt() string {
 func (w *WebFetch) Execute(ctx context.Context, args map[string]any) (any, string, error) {
 	urlRaw, ok := args["url"]
 	if !ok {
-		return nil, "", errors.New("url parameter is required")
+		return nil, "", fmt.Errorf("url parameter is required")
 	}
 	urlStr, ok := urlRaw.(string)
 	if !ok || strings.TrimSpace(urlStr) == "" {
-		return nil, "", errors.New("url must be a non-empty string")
+		return nil, "", fmt.Errorf("url must be a non-empty string")
 	}

 	result, err := performWebFetch(ctx, urlStr)
--- a/app/tools/web_search.go
+++ b/app/tools/web_search.go
@@ -6,7 +6,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"net/http"
 	"net/url"
@@ -46,7 +45,7 @@ func (w *WebSearch) Prompt() string {
 	return ""
 }

-func (w *WebSearch) Schema() map[string]any {
+func (g *WebSearch) Schema() map[string]any {
 	schemaBytes := []byte(`{
 		"type": "object",
 		"properties": {
@@ -72,12 +71,12 @@ func (w *WebSearch) Schema() map[string]any {
 func (w *WebSearch) Execute(ctx context.Context, args map[string]any) (any, string, error) {
 	rawQuery, ok := args["query"]
 	if !ok {
-		return nil, "", errors.New("query parameter is required")
+		return nil, "", fmt.Errorf("query parameter is required")
 	}

 	queryStr, ok := rawQuery.(string)
 	if !ok || strings.TrimSpace(queryStr) == "" {
-		return nil, "", errors.New("query must be a non-empty string")
+		return nil, "", fmt.Errorf("query must be a non-empty string")
 	}

 	maxResults := 5
--- a/app/types/not/found.go
+++ b/app/types/not/found.go
@@ -19,12 +19,10 @@ import (
 // Errors wrapping Found should provide additional context, e.g.
 // fmt.Errorf("%w: %s", not.Found, key)
 //
-//nolint:staticcheck
 //lint:ignore ST1012 This is a sentinel error intended to be read like not.Found.
 var Found = errors.New("not found")

 // Available is an error that indicates that a value is not available.
 //
-//nolint:staticcheck
 //lint:ignore ST1012 This is a sentinel error intended to be read like not.Available.
 var Available = errors.New("not available")
--- a/app/types/not/valids.go
+++ b/app/types/not/valids.go
@@ -4,7 +4,6 @@ package not

 import (
 	"fmt"
-	"strings"
 )

 type ValidError struct {
@@ -45,12 +44,12 @@ func (b Valids) Error() string {
 		return ""
 	}

-	var sb strings.Builder
+	var result string
 	for i, err := range b {
 		if i > 0 {
-			sb.WriteString("; ")
+			result += "; "
 		}
-		sb.WriteString(err.Error())
+		result += err.Error()
 	}
-	return sb.String()
+	return result
 }
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -469,26 +469,24 @@ export class HealthResponse {
 }
 export class User {
    id: string;
-    name: string;
    email: string;
-    avatarURL: string;
-    plan: string;
-    bio: string;
-    firstName: string;
-    lastName: string;
-    overThreshold: boolean;
+    name: string;
+    bio?: string;
+    avatarurl?: string;
+    firstname?: string;
+    lastname?: string;
+    plan?: string;

    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
        this.id = source["id"];
-        this.name = source["name"];
        this.email = source["email"];
-        this.avatarURL = source["avatarURL"];
-        this.plan = source["plan"];
+        this.name = source["name"];
        this.bio = source["bio"];
-        this.firstName = source["firstName"];
-        this.lastName = source["lastName"];
-        this.overThreshold = source["overThreshold"];
+        this.avatarurl = source["avatarurl"];
+        this.firstname = source["firstname"];
+        this.lastname = source["lastname"];
+        this.plan = source["plan"];
    }
 }
 export class Attachment {
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -15,7 +15,7 @@ import {
 import { parseJsonlFromResponse } from "./util/jsonl-parsing";
 import { ollamaClient as ollama } from "./lib/ollama-client";
 import type { ModelResponse } from "ollama/browser";
-import { API_BASE } from "./lib/config";
+import { API_BASE, OLLAMA_DOT_COM } from "./lib/config";

 // Extend Model class with utility methods
 declare module "@/gotypes" {
@@ -27,7 +27,6 @@ declare module "@/gotypes" {
 Model.prototype.isCloud = function (): boolean {
  return this.model.endsWith("cloud");
 };
-
 // Helper function to convert Uint8Array to base64
 function uint8ArrayToBase64(uint8Array: Uint8Array): string {
  const chunkSize = 0x8000; // 32KB chunks to avoid stack overflow
@@ -42,44 +41,50 @@ function uint8ArrayToBase64(uint8Array: Uint8Array): string {
 }

 export async function fetchUser(): Promise<User | null> {
-  try {
-    const response = await fetch(`${API_BASE}/api/v1/me`, {
-      method: "GET",
-      headers: {
-        "Content-Type": "application/json",
-      },
-    });
-
-    if (response.ok) {
-      const userData: User = await response.json();
-      return userData;
-    }
-
-    return null;
-  } catch (error) {
-    console.error("Error fetching user:", error);
-    return null;
-  }
-}
-
-export async function fetchConnectUrl(): Promise<string> {
-  const response = await fetch(`${API_BASE}/api/v1/connect`, {
-    method: "GET",
+  const response = await fetch(`${API_BASE}/api/me`, {
+    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
  });

-  if (!response.ok) {
-    throw new Error("Failed to fetch connect URL");
+  if (response.ok) {
+    const userData: User = await response.json();
+
+    if (userData.avatarurl && !userData.avatarurl.startsWith("http")) {
+      userData.avatarurl = `${OLLAMA_DOT_COM}${userData.avatarurl}`;
+    }
+
+    return userData;
  }

-  const data = await response.json();
-  return data.connect_url;
+  if (response.status === 401 || response.status === 403) {
+    return null;
+  }
+
+  throw new Error(`Failed to fetch user: ${response.status}`);
+}
+
+export async function fetchConnectUrl(): Promise<string> {
+  const response = await fetch(`${API_BASE}/api/me`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+  });
+
+  if (response.status === 401) {
+    const data = await response.json();
+    if (data.signin_url) {
+      return data.signin_url;
+    }
+  }
+
+  throw new Error("Failed to fetch connect URL");
 }

 export async function disconnectUser(): Promise<void> {
-  const response = await fetch(`${API_BASE}/api/v1/disconnect`, {
+  const response = await fetch(`${API_BASE}/api/signout`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
@@ -204,12 +209,10 @@ export async function* sendMessage(
    data: uint8ArrayToBase64(att.data),
  }));

-  // Only send think parameter when actually requesting thinking
-  // Don't send false as it causes issues with some providers
+  // Send think parameter when it's explicitly set (true, false, or a non-empty string).
  const shouldSendThink =
    think !== undefined &&
-    ((typeof think === "boolean" && think) ||
-      (typeof think === "string" && think !== ""));
+    (typeof think === "boolean" || (typeof think === "string" && think !== ""));

  const response = await fetch(`${API_BASE}/api/v1/chat/${chatId}`, {
    method: "POST",
@@ -391,7 +394,8 @@ export async function getInferenceCompute(): Promise<InferenceCompute[]> {

 export async function fetchHealth(): Promise<boolean> {
  try {
-    const response = await fetch(`${API_BASE}/api/v1/health`, {
+    // Use the /api/version endpoint as a health check
+    const response = await fetch(`${API_BASE}/api/version`, {
      method: "GET",
      headers: {
        "Content-Type": "application/json",
@@ -400,7 +404,8 @@ export async function fetchHealth(): Promise<boolean> {

    if (response.ok) {
      const data = await response.json();
-      return data.healthy || false;
+      // If we get a version back, the server is healthy
+      return !!data.version;
    }

    return false;
--- a/app/ui/app/src/components/Settings.tsx
+++ b/app/ui/app/src/components/Settings.tsx
@@ -299,9 +299,9 @@ export default function Settings() {
                        </Button>
                      </div>
                    </div>
-                    {user?.avatarURL && (
+                    {user?.avatarurl && (
                      <img
-                        src={user.avatarURL}
+                        src={user.avatarurl}
                        alt={user?.name}
                        className="h-10 w-10 rounded-full bg-neutral-200 dark:bg-neutral-700 flex-shrink-0"
                        onError={(e) => {
--- a/app/ui/app/src/components/Thinking.tsx
+++ b/app/ui/app/src/components/Thinking.tsx
@@ -50,21 +50,33 @@ export default function Thinking({
  // Position content to show bottom when collapsed
  useEffect(() => {
    if (isCollapsed && contentRef.current && wrapperRef.current) {
-      const contentHeight = contentRef.current.scrollHeight;
-      const wrapperHeight = wrapperRef.current.clientHeight;
-      if (contentHeight > wrapperHeight) {
-        const translateY = -(contentHeight - wrapperHeight);
-        contentRef.current.style.transform = `translateY(${translateY}px)`;
-        setHasOverflow(true);
-      } else {
-        setHasOverflow(false);
-      }
+      requestAnimationFrame(() => {
+        if (!contentRef.current || !wrapperRef.current) return;
+
+        const contentHeight = contentRef.current.scrollHeight;
+        const wrapperHeight = wrapperRef.current.clientHeight;
+        if (contentHeight > wrapperHeight) {
+          const translateY = -(contentHeight - wrapperHeight);
+          contentRef.current.style.transform = `translateY(${translateY}px)`;
+          setHasOverflow(true);
+        } else {
+          contentRef.current.style.transform = "translateY(0)";
+          setHasOverflow(false);
+        }
+      });
    } else if (contentRef.current) {
      contentRef.current.style.transform = "translateY(0)";
      setHasOverflow(false);
    }
  }, [thinking, isCollapsed]);

+  useEffect(() => {
+    if (activelyThinking && wrapperRef.current && !isCollapsed) {
+      // When expanded and actively thinking, scroll to bottom
+      wrapperRef.current.scrollTop = wrapperRef.current.scrollHeight;
+    }
+  }, [thinking, activelyThinking, isCollapsed]);
+
  const handleToggle = () => {
    setIsCollapsed(!isCollapsed);
    setHasUserInteracted(true);
--- a/app/ui/app/src/hooks/useChats.ts
+++ b/app/ui/app/src/hooks/useChats.ts
@@ -7,6 +7,7 @@ import { createQueryBatcher } from "./useQueryBatcher";
 import { useRefetchModels } from "./useModels";
 import { useStreamingContext } from "@/contexts/StreamingContext";
 import { useSettings } from "./useSettings";
+import { getModelCapabilities } from "@/api";

 export const useChats = () => {
  return useQuery({
@@ -606,6 +607,24 @@ export const useSendMessage = (chatId: string) => {
              queryClient.setQueryData(["staleModels"], newStaleMap);

              queryClient.invalidateQueries({ queryKey: ["models"] });
+
+              // Fetch fresh capabilities for the downloaded model
+              getModelCapabilities(selectedModel.model)
+                .then((capabilities) => {
+                  queryClient.setQueryData(
+                    ["modelCapabilities", selectedModel.model],
+                    capabilities,
+                  );
+                })
+                .catch((error) => {
+                  console.error(
+                    "Failed to fetch capabilities after download:",
+                    error,
+                  );
+                  queryClient.invalidateQueries({
+                    queryKey: ["modelCapabilities", selectedModel.model],
+                  });
+                });
            }
            break;
          }
--- a/app/ui/app/src/hooks/useDownloadModel.ts
+++ b/app/ui/app/src/hooks/useDownloadModel.ts
@@ -1,114 +0,0 @@
-import { useMutation, useQueryClient } from "@tanstack/react-query";
-import { useState } from "react";
-import { pullModel } from "@/api";
-import { useSelectedModel } from "./useSelectedModel";
-import { useSettings } from "./useSettings";
-
-interface DownloadProgress {
-  status: string;
-  digest?: string;
-  total?: number;
-  completed?: number;
-  done?: boolean;
-}
-
-export function useDownloadModel(chatId?: string) {
-  const queryClient = useQueryClient();
-  const { selectedModel } = useSelectedModel(chatId);
-  const { setSettings } = useSettings();
-  const [downloadProgress, setDownloadProgress] =
-    useState<DownloadProgress | null>(null);
-  const [abortController, setAbortController] =
-    useState<AbortController | null>(null);
-  const [downloadingChatIds, setDownloadingChatIds] = useState<Set<string>>(
-    new Set(),
-  );
-
-  const mutation = useMutation({
-    mutationFn: async (modelName: string) => {
-      const controller = new AbortController();
-      setAbortController(controller);
-      setDownloadProgress({ status: "Starting download..." });
-      if (chatId) {
-        setDownloadingChatIds((prev) => new Set(prev).add(chatId));
-      }
-
-      try {
-        for await (const progress of pullModel(modelName, controller.signal)) {
-          setDownloadProgress(progress);
-
-          if (progress.status === "success") {
-            // Update selected model to indicate it's now available locally
-            if (selectedModel && selectedModel.model === modelName) {
-              setSettings({ SelectedModel: modelName });
-            }
-            // Invalidate models query to refresh the list
-            await queryClient.invalidateQueries({ queryKey: ["models"] });
-            break;
-          }
-        }
-      } finally {
-        setAbortController(null);
-        if (chatId) {
-          setDownloadingChatIds((prev) => {
-            const newSet = new Set(prev);
-            newSet.delete(chatId);
-            return newSet;
-          });
-        }
-      }
-    },
-    onSuccess: () => {
-      setDownloadProgress(null);
-      if (chatId) {
-        setDownloadingChatIds((prev) => {
-          const newSet = new Set(prev);
-          newSet.delete(chatId);
-          return newSet;
-        });
-      }
-    },
-    onError: (error: Error) => {
-      const status =
-        error.name === "AbortError" ? "Download cancelled" : "Download failed";
-      setDownloadProgress({ status, done: true });
-
-      // Clear error message after delay
-      const delay = error.name === "AbortError" ? 1500 : 3000;
-      setTimeout(() => {
-        setDownloadProgress(null);
-        if (chatId) {
-          setDownloadingChatIds((prev) => {
-            const newSet = new Set(prev);
-            newSet.delete(chatId);
-            return newSet;
-          });
-        }
-      }, delay);
-    },
-  });
-
-  const cancelDownload = () => {
-    if (abortController) {
-      abortController.abort();
-      setAbortController(null);
-      if (chatId) {
-        setDownloadingChatIds((prev) => {
-          const newSet = new Set(prev);
-          newSet.delete(chatId);
-          return newSet;
-        });
-      }
-    }
-  };
-
-  return {
-    downloadModel: mutation.mutate,
-    isDownloading:
-      mutation.isPending && chatId ? downloadingChatIds.has(chatId) : false,
-    downloadProgress:
-      chatId && downloadingChatIds.has(chatId) ? downloadProgress : null,
-    error: mutation.error,
-    cancelDownload,
-  };
-}
--- a/app/ui/app/src/hooks/useUser.ts
+++ b/app/ui/app/src/hooks/useUser.ts
@@ -1,29 +1,20 @@
 import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query";
-import { useEffect, useState } from "react";
 import { fetchUser, fetchConnectUrl, disconnectUser } from "@/api";

 export function useUser() {
  const queryClient = useQueryClient();
-  const [initialDataLoaded, setInitialDataLoaded] = useState(false);
-
-  // Wait for initial data to be loaded
-  useEffect(() => {
-    const initialPromise = window.__initialUserDataPromise;
-    if (initialPromise) {
-      initialPromise.finally(() => {
-        setInitialDataLoaded(true);
-      });
-    } else {
-      setInitialDataLoaded(true);
-    }
-  }, []);

  const userQuery = useQuery({
    queryKey: ["user"],
-    queryFn: () => fetchUser(),
+    queryFn: async () => {
+      const result = await fetchUser();
+      return result;
+    },
    staleTime: 5 * 60 * 1000, // Consider data stale after 5 minutes
    gcTime: 10 * 60 * 1000, // Keep in cache for 10 minutes
-    initialData: null, // Start with null to prevent flashing
+    retry: 10,
+    retryDelay: (attemptIndex) => Math.min(500 * attemptIndex, 2000),
+    refetchOnMount: true, // Always fetch when component mounts
  });

  // Mutation to refresh user data
@@ -49,14 +40,15 @@ export function useUser() {
    },
  });

+  const isLoading = userQuery.isLoading || userQuery.isFetching;
+  const isAuthenticated = Boolean(userQuery.data?.name);
+
  return {
    user: userQuery.data,
-    isLoading:
-      !initialDataLoaded ||
-      (userQuery.isLoading && userQuery.data === undefined), // Show loading until initial data is loaded
+    isLoading,
    isError: userQuery.isError,
    error: userQuery.error,
-    isAuthenticated: Boolean(userQuery.data?.name),
+    isAuthenticated,
    refreshUser: refreshUser.mutate,
    isRefreshing: refreshUser.isPending,
    refetchUser: userQuery.refetch,
--- a/app/ui/app/src/lib/config.ts
+++ b/app/ui/app/src/lib/config.ts
@@ -8,3 +8,6 @@ export const API_BASE = import.meta.env.DEV ? DEV_API_URL : "";
 export const OLLAMA_HOST = import.meta.env.DEV
  ? DEV_API_URL
  : window.location.origin;
+
+export const OLLAMA_DOT_COM =
+  import.meta.env.VITE_OLLAMA_DOT_COM_URL || "https://ollama.com";
--- a/app/ui/app/src/lib/highlighter.ts
+++ b/app/ui/app/src/lib/highlighter.ts
@@ -147,6 +147,7 @@ export const highlighterPromise = createHighlighter({
    "c",
    "cpp",
    "sql",
+    "swift",
    "yaml",
    "markdown",
  ],
--- a/app/ui/app/src/main.tsx
+++ b/app/ui/app/src/main.tsx
@@ -5,13 +5,6 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 import { routeTree } from "./routeTree.gen";
 import { fetchUser } from "./api";
 import { StreamingProvider } from "./contexts/StreamingContext";
-import { User } from "@/gotypes";
-
-declare global {
-  interface Window {
-    __initialUserDataPromise?: Promise<User | null>;
-  }
-}

 const queryClient = new QueryClient({
  defaultOptions: {
@@ -24,27 +17,11 @@ const queryClient = new QueryClient({
  },
 });

-// Track initial user data fetch
-let initialUserDataPromise: Promise<User | null> | null = null;
-
-// Initialize user data on app startup
-const initializeUserData = async () => {
-  try {
-    const userData = await fetchUser();
+fetchUser().then((userData) => {
+  if (userData) {
    queryClient.setQueryData(["user"], userData);
-    return userData;
-  } catch (error) {
-    console.error("Error initializing user data:", error);
-    queryClient.setQueryData(["user"], null);
-    return null;
  }
-};
-
-// Start initialization immediately and track the promise
-initialUserDataPromise = initializeUserData();
-
-// Export the promise so hooks can await it
-window.__initialUserDataPromise = initialUserDataPromise;
+});

 const router = createRouter({
  routeTree,
--- a/app/ui/extract.go
+++ b/app/ui/extract.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"path/filepath"
 	"slices"
-	"strconv"
 	"strings"
 	"unicode/utf8"

@@ -74,7 +73,7 @@ func extractPDFText(data []byte) (string, error) {
 		if strings.TrimSpace(text) != "" {
 			if textBuilder.Len() > 0 {
 				textBuilder.WriteString("\n\n--- Page ")
-				textBuilder.WriteString(strconv.Itoa(i))
+				textBuilder.WriteString(fmt.Sprintf("%d", i))
 				textBuilder.WriteString(" ---\n")
 			}
 			textBuilder.WriteString(text)
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -101,15 +101,14 @@ type HealthResponse struct {
 }

 type User struct {
-	ID            string `json:"id"`
-	Name          string `json:"name"`
-	Email         string `json:"email"`
-	AvatarURL     string `json:"avatarURL"`
-	Plan          string `json:"plan"`
-	Bio           string `json:"bio"`
-	FirstName     string `json:"firstName"`
-	LastName      string `json:"lastName"`
-	OverThreshold bool   `json:"overThreshold"`
+	ID        string `json:"id"`
+	Email     string `json:"email"`
+	Name      string `json:"name"`
+	Bio       string `json:"bio,omitempty"`
+	AvatarURL string `json:"avatarurl,omitempty"`
+	FirstName string `json:"firstname,omitempty"`
+	LastName  string `json:"lastname,omitempty"`
+	Plan      string `json:"plan,omitempty"`
 }

 type Attachment struct {
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -12,18 +12,17 @@ import (
 	"log/slog"
 	"net/http"
 	"net/http/httputil"
-	"net/url"
 	"os"
 	"runtime"
 	"runtime/debug"
 	"slices"
 	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"github.com/google/uuid"
 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/app/auth"
 	"github.com/ollama/ollama/app/server"
 	"github.com/ollama/ollama/app/store"
 	"github.com/ollama/ollama/app/tools"
@@ -118,40 +117,66 @@ func (s *Server) log() *slog.Logger {

 // ollamaProxy creates a reverse proxy handler to the Ollama server
 func (s *Server) ollamaProxy() http.Handler {
-	ollamaHost := os.Getenv("OLLAMA_HOST")
-	if ollamaHost == "" {
-		ollamaHost = "http://127.0.0.1:11434"
-	}
+	var (
+		proxy   http.Handler
+		proxyMu sync.Mutex
+	)

-	if !strings.HasPrefix(ollamaHost, "http://") && !strings.HasPrefix(ollamaHost, "https://") {
-		ollamaHost = "http://" + ollamaHost
-	}
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		proxyMu.Lock()
+		p := proxy
+		proxyMu.Unlock()

-	target, err := url.Parse(ollamaHost)
-	if err != nil {
-		s.log().Error("failed to parse OLLAMA_HOST", "error", err, "host", ollamaHost)
-		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			http.Error(w, "failed to configure proxy", http.StatusInternalServerError)
-		})
-	}
+		if p == nil {
+			proxyMu.Lock()
+			if proxy == nil {
+				var err error
+				for i := range 2 {
+					if i > 0 {
+						s.log().Warn("ollama server not ready, retrying", "attempt", i+1)
+						time.Sleep(1 * time.Second)
+					}

-	s.log().Info("configuring ollama proxy", "target", target.String())
+					err = WaitForServer(context.Background(), 10*time.Second)
+					if err == nil {
+						break
+					}
+				}

-	proxy := httputil.NewSingleHostReverseProxy(target)
+				if err != nil {
+					proxyMu.Unlock()
+					s.log().Error("ollama server not ready after retries", "error", err)
+					http.Error(w, "Ollama server is not ready", http.StatusServiceUnavailable)
+					return
+				}

-	originalDirector := proxy.Director
-	proxy.Director = func(req *http.Request) {
-		originalDirector(req)
-		req.Host = target.Host
-		s.log().Debug("proxying request", "method", req.Method, "path", req.URL.Path, "target", target.Host)
-	}
+				target := envconfig.Host()
+				s.log().Info("configuring ollama proxy", "target", target.String())

-	proxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) {
-		s.log().Error("proxy error", "error", err, "path", r.URL.Path, "target", target.String())
-		http.Error(w, "proxy error: "+err.Error(), http.StatusBadGateway)
-	}
+				newProxy := httputil.NewSingleHostReverseProxy(target)

-	return proxy
+				originalDirector := newProxy.Director
+				newProxy.Director = func(req *http.Request) {
+					originalDirector(req)
+					req.Host = target.Host
+					s.log().Debug("proxying request", "method", req.Method, "path", req.URL.Path, "target", target.Host)
+				}
+
+				newProxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) {
+					s.log().Error("proxy error", "error", err, "path", r.URL.Path, "target", target.String())
+					http.Error(w, "proxy error: "+err.Error(), http.StatusBadGateway)
+				}
+
+				proxy = newProxy
+				p = newProxy
+			} else {
+				p = proxy
+			}
+			proxyMu.Unlock()
+		}
+
+		p.ServeHTTP(w, r)
+	})
 }

 type errHandlerFunc func(http.ResponseWriter, *http.Request) error
@@ -194,7 +219,7 @@ func (s *Server) Handler() http.Handler {
 			log := s.log()
 			level := slog.LevelInfo
 			start := time.Now()
-			requestID := strconv.FormatInt(time.Now().UnixNano(), 10)
+			requestID := fmt.Sprintf("%d", time.Now().UnixNano())

 			defer func() {
 				p := recover()
@@ -204,7 +229,7 @@ func (s *Server) Handler() http.Handler {

 					// Handle panic with user-friendly error
 					if !sw.Written() {
-						s.handleError(sw, errors.New("internal server error"))
+						s.handleError(sw, fmt.Errorf("internal server error"))
 					}
 				}

@@ -264,11 +289,10 @@ func (s *Server) Handler() http.Handler {
 	ollamaProxy := s.ollamaProxy()
 	mux.Handle("GET /api/tags", ollamaProxy)
 	mux.Handle("POST /api/show", ollamaProxy)
-
-	mux.Handle("GET /api/v1/me", handle(s.me))
-	mux.Handle("POST /api/v1/disconnect", handle(s.disconnect))
-	mux.Handle("GET /api/v1/connect", handle(s.connectURL))
-	mux.Handle("GET /api/v1/health", handle(s.health))
+	mux.Handle("GET /api/version", ollamaProxy)
+	mux.Handle("HEAD /api/version", ollamaProxy)
+	mux.Handle("POST /api/me", ollamaProxy)
+	mux.Handle("POST /api/signout", ollamaProxy)

 	// React app - catch all non-API routes and serve the React app
 	mux.Handle("GET /", s.appHandler())
@@ -338,7 +362,7 @@ func (s *Server) doSelfSigned(ctx context.Context, method, path string) (*http.R
 }

 // UserData fetches user data from ollama.com API for the current ollama key
-func (s *Server) UserData(ctx context.Context) (*responses.User, error) {
+func (s *Server) UserData(ctx context.Context) (*api.UserResponse, error) {
 	resp, err := s.doSelfSigned(ctx, http.MethodPost, "/api/me")
 	if err != nil {
 		return nil, fmt.Errorf("failed to call ollama.com/api/me: %w", err)
@@ -349,7 +373,7 @@ func (s *Server) UserData(ctx context.Context) (*responses.User, error) {
 		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
 	}

-	var user responses.User
+	var user api.UserResponse
 	if err := json.NewDecoder(resp.Body).Decode(&user); err != nil {
 		return nil, fmt.Errorf("failed to parse user response: %w", err)
 	}
@@ -368,29 +392,27 @@ func (s *Server) UserData(ctx context.Context) (*responses.User, error) {
 	return &user, nil
 }

-func waitForServer(ctx context.Context) error {
-	timeout := time.Now().Add(10 * time.Second)
-	// TODO: this avoids an error on first load of the app
-	// however we should either show a loading state or
-	// wait for the Ollama server to be ready before redirecting
-	for {
+// WaitForServer waits for the Ollama server to be ready
+func WaitForServer(ctx context.Context, timeout time.Duration) error {
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
 		c, err := api.ClientFromEnvironment()
 		if err != nil {
 			return err
 		}
 		if _, err := c.Version(ctx); err == nil {
-			break
-		}
-		if time.Now().After(timeout) {
-			return errors.New("timeout waiting for Ollama server to be ready")
+			slog.Debug("ollama server is ready")
+			return nil
 		}
 		time.Sleep(10 * time.Millisecond)
 	}
-	return nil
+	return errors.New("timeout waiting for Ollama server to be ready")
 }

 func (s *Server) createChat(w http.ResponseWriter, r *http.Request) error {
-	waitForServer(r.Context())
+	if err := WaitForServer(r.Context(), 10*time.Second); err != nil {
+		return err
+	}

 	id, err := uuid.NewV7()
 	if err != nil {
@@ -455,7 +477,7 @@ func (s *Server) checkModelUpstream(ctx context.Context, modelName string, timeo

 	digest := resp.Header.Get("ollama-content-digest")
 	if digest == "" {
-		return "", 0, errors.New("no digest header found")
+		return "", 0, fmt.Errorf("no digest header found")
 	}

 	var pushTime int64
@@ -598,12 +620,12 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 	}

 	if req.Model == "" {
-		return errors.New("empty model")
+		return fmt.Errorf("empty model")
 	}

 	// Don't allow empty messages unless forceUpdate is true
 	if req.Prompt == "" && !req.ForceUpdate {
-		return errors.New("empty message")
+		return fmt.Errorf("empty message")
 	}

 	if createdChat {
@@ -942,7 +964,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 					} else {
 						onlyStandalone := true
 						for _, tc := range res.Message.ToolCalls {
-							if tc.Function.Name != "web_search" && tc.Function.Name != "web_fetch" {
+							if !(tc.Function.Name == "web_search" || tc.Function.Name == "web_fetch") {
 								onlyStandalone = false
 								break
 							}
@@ -975,7 +997,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 				for _, toolCall := range res.Message.ToolCalls {
 					// continues loop as tools were executed
 					toolsExecuted = true
-					result, content, err := registry.Execute(ctx, toolCall.Function.Name, toolCall.Function.Arguments)
+					result, content, err := registry.Execute(ctx, toolCall.Function.Name, toolCall.Function.Arguments.ToMap())
 					if err != nil {
 						errContent := fmt.Sprintf("Error: %v", err)
 						toolErrMsg := store.NewMessage("tool", errContent, nil)
@@ -1194,7 +1216,7 @@ func (s *Server) getChat(w http.ResponseWriter, r *http.Request) error {
 	cid := r.PathValue("id")

 	if cid == "" {
-		return errors.New("chat ID is required")
+		return fmt.Errorf("chat ID is required")
 	}

 	chat, err := s.Store.Chat(cid)
@@ -1252,7 +1274,7 @@ func (s *Server) getChat(w http.ResponseWriter, r *http.Request) error {
 func (s *Server) renameChat(w http.ResponseWriter, r *http.Request) error {
 	cid := r.PathValue("id")
 	if cid == "" {
-		return errors.New("chat ID is required")
+		return fmt.Errorf("chat ID is required")
 	}

 	var req struct {
@@ -1283,7 +1305,7 @@ func (s *Server) renameChat(w http.ResponseWriter, r *http.Request) error {
 func (s *Server) deleteChat(w http.ResponseWriter, r *http.Request) error {
 	cid := r.PathValue("id")
 	if cid == "" {
-		return errors.New("chat ID is required")
+		return fmt.Errorf("chat ID is required")
 	}

 	// Check if the chat exists (no need to load attachments)
@@ -1291,7 +1313,7 @@ func (s *Server) deleteChat(w http.ResponseWriter, r *http.Request) error {
 	if err != nil {
 		if errors.Is(err, not.Found) {
 			w.WriteHeader(http.StatusNotFound)
-			return errors.New("chat not found")
+			return fmt.Errorf("chat not found")
 		}
 		return fmt.Errorf("failed to get chat: %w", err)
 	}
@@ -1438,129 +1460,6 @@ func (s *Server) settings(w http.ResponseWriter, r *http.Request) error {
 	})
 }

-func (s *Server) me(w http.ResponseWriter, r *http.Request) error {
-	if r.Method != http.MethodGet {
-		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
-		return nil
-	}
-
-	user, err := s.UserData(r.Context())
-	if err != nil {
-		// If fetching from API fails, try to return cached user data if available
-		if cachedUser, cacheErr := s.Store.User(); cacheErr == nil && cachedUser != nil {
-			s.log().Info("API request failed, returning cached user data", "error", err)
-			responseUser := &responses.User{
-				Name:  cachedUser.Name,
-				Email: cachedUser.Email,
-				Plan:  cachedUser.Plan,
-			}
-			w.Header().Set("Content-Type", "application/json")
-			w.WriteHeader(http.StatusOK)
-			return json.NewEncoder(w).Encode(responseUser)
-		}
-
-		s.log().Error("failed to get user data", "error", err)
-		w.WriteHeader(http.StatusInternalServerError)
-		return json.NewEncoder(w).Encode(responses.Error{
-			Error: "failed to get user data",
-		})
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	return json.NewEncoder(w).Encode(user)
-}
-
-func (s *Server) disconnect(w http.ResponseWriter, r *http.Request) error {
-	if r.Method != http.MethodPost {
-		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
-		return nil
-	}
-
-	if err := s.Store.ClearUser(); err != nil {
-		s.log().Warn("failed to clear cached user data", "error", err)
-	}
-
-	// Get the SSH public key to encode for the delete request
-	pubKey, err := ollamaAuth.GetPublicKey()
-	if err != nil {
-		s.log().Error("failed to get public key", "error", err)
-		w.WriteHeader(http.StatusInternalServerError)
-		return json.NewEncoder(w).Encode(responses.Error{
-			Error: "failed to get public key",
-		})
-	}
-
-	// Encode the key using base64 URL encoding
-	encodedKey := base64.RawURLEncoding.EncodeToString([]byte(pubKey))
-
-	// Call the /api/user/keys/{encodedKey} endpoint with DELETE
-	resp, err := s.doSelfSigned(r.Context(), http.MethodDelete, fmt.Sprintf("/api/user/keys/%s", encodedKey))
-	if err != nil {
-		s.log().Error("failed to call ollama.com/api/user/keys", "error", err)
-		w.WriteHeader(http.StatusInternalServerError)
-		return json.NewEncoder(w).Encode(responses.Error{
-			Error: "failed to disconnect from ollama.com",
-		})
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		s.log().Error("disconnect request failed", "status", resp.StatusCode)
-		w.WriteHeader(http.StatusInternalServerError)
-		return json.NewEncoder(w).Encode(responses.Error{
-			Error: "failed to disconnect from ollama.com",
-		})
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	return json.NewEncoder(w).Encode(map[string]string{"status": "disconnected"})
-}
-
-func (s *Server) connectURL(w http.ResponseWriter, r *http.Request) error {
-	if r.Method != http.MethodGet {
-		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
-		return nil
-	}
-
-	connectURL, err := auth.BuildConnectURL(OllamaDotCom)
-	if err != nil {
-		s.log().Error("failed to build connect URL", "error", err)
-		w.WriteHeader(http.StatusInternalServerError)
-		return json.NewEncoder(w).Encode(responses.Error{
-			Error: "failed to build connect URL",
-		})
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	return json.NewEncoder(w).Encode(map[string]string{
-		"connect_url": connectURL,
-	})
-}
-
-func (s *Server) health(w http.ResponseWriter, r *http.Request) error {
-	if r.Method != http.MethodGet {
-		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
-		return nil
-	}
-
-	healthy := false
-	c, err := api.ClientFromEnvironment()
-	if err == nil {
-		if _, err := c.Version(r.Context()); err == nil {
-			healthy = true
-		}
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	return json.NewEncoder(w).Encode(responses.HealthResponse{
-		Healthy: healthy,
-	})
-}
-
 func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) error {
 	ctx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
 	defer cancel()
@@ -1592,7 +1491,7 @@ func (s *Server) getInferenceCompute(w http.ResponseWriter, r *http.Request) err

 func (s *Server) modelUpstream(w http.ResponseWriter, r *http.Request) error {
 	if r.Method != "POST" {
-		return errors.New("method not allowed")
+		return fmt.Errorf("method not allowed")
 	}

 	var req struct {
@@ -1603,7 +1502,7 @@ func (s *Server) modelUpstream(w http.ResponseWriter, r *http.Request) error {
 	}

 	if req.Model == "" {
-		return errors.New("model is required")
+		return fmt.Errorf("model is required")
 	}

 	digest, pushTime, err := s.checkModelUpstream(r.Context(), req.Model, 5*time.Second)
@@ -1659,13 +1558,13 @@ func convertToOllamaTool(toolSchema map[string]any) api.Tool {

 	tool.Function.Parameters.Type = "object"
 	tool.Function.Parameters.Required = []string{}
-	tool.Function.Parameters.Properties = make(map[string]api.ToolProperty)
+	tool.Function.Parameters.Properties = api.NewToolPropertiesMap()

 	if schemaProps, ok := toolSchema["schema"].(map[string]any); ok {
 		tool.Function.Parameters.Type = getStringFromMap(schemaProps, "type", "object")

 		if props, ok := schemaProps["properties"].(map[string]any); ok {
-			tool.Function.Parameters.Properties = make(map[string]api.ToolProperty)
+			tool.Function.Parameters.Properties = api.NewToolPropertiesMap()

 			for propName, propDef := range props {
 				if propMap, ok := propDef.(map[string]any); ok {
@@ -1673,7 +1572,7 @@ func convertToOllamaTool(toolSchema map[string]any) api.Tool {
 						Type:        api.PropertyType{getStringFromMap(propMap, "type", "string")},
 						Description: getStringFromMap(propMap, "description", ""),
 					}
-					tool.Function.Parameters.Properties[propName] = prop
+					tool.Function.Parameters.Properties.Set(propName, prop)
 				}
 			}
 		}
@@ -1730,8 +1629,8 @@ func supportsWebSearchTools(model string) bool {

 // buildChatRequest converts store.Chat to api.ChatRequest
 func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
-	msgs := make([]api.Message, len(chat.Messages))
-	for i, m := range chat.Messages {
+	var msgs []api.Message
+	for _, m := range chat.Messages {
 		// Skip empty messages if present
 		if m.Content == "" && m.Thinking == "" && len(m.ToolCalls) == 0 && len(m.Attachments) == 0 {
 			continue
@@ -1789,7 +1688,7 @@ func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, ava
 			s.log().Debug("unknown message role", "role", m.Role)
 		}

-		msgs[i] = apiMsg
+		msgs = append(msgs, apiMsg)
 	}

 	var thinkValue *api.ThinkValue
--- a/app/updater/updater.go
+++ b/app/updater/updater.go
@@ -198,7 +198,7 @@ func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateRespo
 	_, err = os.Stat(filepath.Dir(stageFilename))
 	if errors.Is(err, os.ErrNotExist) {
 		if err := os.MkdirAll(filepath.Dir(stageFilename), 0o755); err != nil {
-			return fmt.Errorf("create ollama dir %s: %w", filepath.Dir(stageFilename), err)
+			return fmt.Errorf("create ollama dir %s: %v", filepath.Dir(stageFilename), err)
 		}
 	}

@@ -218,7 +218,7 @@ func (u *Updater) DownloadNewRelease(ctx context.Context, updateResp UpdateRespo

 	if err := VerifyDownload(); err != nil {
 		_ = os.Remove(stageFilename)
-		return fmt.Errorf("%s - %w", resp.Request.URL.String(), err)
+		return fmt.Errorf("%s - %s", resp.Request.URL.String(), err)
 	}
 	UpdateDownloaded = true
 	return nil
--- a/app/updater/updater_darwin.go
+++ b/app/updater/updater_darwin.go
@@ -92,7 +92,7 @@ func DoUpgrade(interactive bool) error {

 	bundle := getStagedUpdate()
 	if bundle == "" {
-		return errors.New("failed to lookup downloads")
+		return fmt.Errorf("failed to lookup downloads")
 	}

 	slog.Info("starting upgrade", "app", BundlePath, "update", bundle, "pid", os.Getpid(), "log", UpgradeLogFile)
@@ -107,7 +107,7 @@ func DoUpgrade(interactive bool) error {
 	// Verify old doesn't exist yet
 	if _, err := os.Stat(contentsOldName); err == nil {
 		slog.Error("prior upgrade failed", "backup", contentsOldName)
-		return errors.New("prior upgrade failed - please upgrade manually by installing the bundle")
+		return fmt.Errorf("prior upgrade failed - please upgrade manually by installing the bundle")
 	}
 	if err := os.MkdirAll(appBackupDir, 0o755); err != nil {
 		return fmt.Errorf("unable to create backup dir %s: %w", appBackupDir, err)
@@ -133,7 +133,7 @@ func DoUpgrade(interactive bool) error {
 			return err
 		}
 		if !chownWithAuthorization(u.Username) {
-			return errors.New("unable to change permissions to complete upgrade")
+			return fmt.Errorf("unable to change permissions to complete upgrade")
 		}
 		if err := os.Rename(BundlePath, appBackup); err != nil {
 			return fmt.Errorf("unable to perform upgrade - failed to stage old version: %w", err)
@@ -264,7 +264,7 @@ func DoPostUpgradeCleanup() error {
 func verifyDownload() error {
 	bundle := getStagedUpdate()
 	if bundle == "" {
-		return errors.New("failed to lookup downloads")
+		return fmt.Errorf("failed to lookup downloads")
 	}
 	slog.Debug("verifying update", "bundle", bundle)

@@ -338,7 +338,7 @@ func verifyDownload() error {
 	}

 	if err := verifyExtractedBundle(filepath.Join(dir, "Ollama.app")); err != nil {
-		return fmt.Errorf("signature verification failed: %w", err)
+		return fmt.Errorf("signature verification failed: %s", err)
 	}
 	return nil
 }
@@ -347,11 +347,11 @@ func verifyDownload() error {
 func DoUpgradeAtStartup() error {
 	bundle := getStagedUpdate()
 	if bundle == "" {
-		return errors.New("failed to lookup downloads")
+		return fmt.Errorf("failed to lookup downloads")
 	}

 	if BundlePath == "" {
-		return errors.New("unable to upgrade at startup, app in development mode")
+		return fmt.Errorf("unable to upgrade at startup, app in development mode")
 	}

 	// [Re]verify before proceeding
--- a/app/updater/updater_test.go
+++ b/app/updater/updater_test.go
@@ -22,7 +22,9 @@ func TestIsNewReleaseAvailable(t *testing.T) {
 	var server *httptest.Server
 	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/update.json" {
-			fmt.Fprintf(w, `{"version": "9.9.9", "url": "%s"}`, server.URL+"/9.9.9/"+Installer)
+			w.Write([]byte(
+				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
+					server.URL+"/9.9.9/"+Installer)))
 			// TODO - wire up the redirects to mimic real behavior
 		} else {
 			slog.Debug("unexpected request", "url", r.URL)
@@ -65,16 +67,17 @@ func TestBackgoundChecker(t *testing.T) {

 	var server *httptest.Server
 	server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		switch r.URL.Path {
-		case "/update.json":
-			fmt.Fprintf(w, `{"version": "9.9.9", "url": "%s"}`, server.URL+"/9.9.9/"+Installer)
+		if r.URL.Path == "/update.json" {
+			w.Write([]byte(
+				fmt.Sprintf(`{"version": "9.9.9", "url": "%s"}`,
+					server.URL+"/9.9.9/"+Installer)))
 			// TODO - wire up the redirects to mimic real behavior
-		case "/9.9.9/" + Installer:
+		} else if r.URL.Path == "/9.9.9/"+Installer {
 			buf := &bytes.Buffer{}
 			zw := zip.NewWriter(buf)
 			zw.Close()
 			io.Copy(w, buf)
-		default:
+		} else {
 			slog.Debug("unexpected request", "url", r.URL)
 		}
 	}))
--- a/app/wintray/eventloop.go
+++ b/app/wintray/eventloop.go
@@ -158,16 +158,16 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
 	case uint32(UI_REQUEST_MSG_ID):
 		// Requests for the UI must always come from the main event thread
 		l := int(wParam)
-		path := unsafe.String((*byte)(unsafe.Pointer(lParam)), l)
+		path := unsafe.String((*byte)(unsafe.Pointer(lParam)), l) //nolint:govet,gosec
 		t.app.UIRun(path)
 	case WM_COPYDATA:
 		// Handle URL scheme requests from other instances
 		if lParam != 0 {
-			cds := (*COPYDATASTRUCT)(unsafe.Pointer(lParam))
-			if cds.DwData == 1 { // Our identifier for URL scheme messages
+			cds := (*COPYDATASTRUCT)(unsafe.Pointer(lParam)) //nolint:govet,gosec
+			if cds.DwData == 1 {                             // Our identifier for URL scheme messages
 				// Convert the data back to string
 				data := make([]byte, cds.CbData)
-				copy(data, (*[1 << 30]byte)(unsafe.Pointer(cds.LpData))[:cds.CbData:cds.CbData])
+				copy(data, (*[1 << 30]byte)(unsafe.Pointer(cds.LpData))[:cds.CbData:cds.CbData]) //nolint:govet,gosec
 				urlScheme := string(data)
 				handleURLSchemeRequest(urlScheme)
 				lResult = 1 // Return non-zero to indicate success
--- a/cmd/bench/README.md
+++ b/cmd/bench/README.md
@@ -15,7 +15,7 @@ A Go-based command-line tool for benchmarking Ollama models with configurable pa

 ```
 go build -o ollama-bench bench.go
-./bench -model gpt-oss:20b -epochs 6 -format csv
+./ollama-bench -model gpt-oss:20b -epochs 6 -format csv
 ```

 Using Go Run (without building)
@@ -29,31 +29,32 @@ go run bench.go -model gpt-oss:20b -epochs 3
 ### Basic Example

 ```
-./bench -model gemma3 -epochs 6
+./ollama-bench -model gemma3 -epochs 6
 ```

 ### Benchmark Multiple Models

 ```
-./bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
+./ollama-bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
 benchstat -col /name gemma.bench
 ```

 ### With Image Prompt

 ```
-./bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
+./ollama-bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
 ```

 ### Advanced Example

 ```
-./bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
+./ollama-bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
 ```

 ## Command Line Options

 | Option  	| Description | Default |
+|----------|-------------|---------|
 | -model	| Comma-separated list of models to benchmark	| (required)		|
 | -epochs	| Number of iterations per model		| 1			|
 | -max-tokens	| Maximum tokens for model response		| 0 (unlimited)		|
--- a/cmd/bench/bench.go
+++ b/cmd/bench/bench.go
@@ -48,8 +48,8 @@ func OutputMetrics(w io.Writer, format string, metrics []Metrics, verbose bool)
 	case "benchstat":
 		if verbose {
 			printHeader := func() {
-				fmt.Printf("sysname: %s\n", runtime.GOOS)
-				fmt.Printf("machine: %s\n", runtime.GOARCH)
+				fmt.Fprintf(w, "sysname: %s\n", runtime.GOOS)
+				fmt.Fprintf(w, "machine: %s\n", runtime.GOARCH)
 			}
 			once.Do(printHeader)
 		}
@@ -147,9 +147,20 @@ func BenchmarkChat(fOpt flagOptions) error {
 		return err
 	}

+	var out io.Writer = os.Stdout
+	if fOpt.outputFile != nil && *fOpt.outputFile != "" {
+		f, err := os.OpenFile(*fOpt.outputFile, os.O_CREATE|os.O_WRONLY, 0o644)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "ERROR: cannot open output file %s: %v\n", *fOpt.outputFile, err)
+			return err
+		}
+		defer f.Close()
+		out = f
+	}
+
 	for _, model := range models {
 		for range *fOpt.epochs {
-			options := make(map[string]any)
+			options := make(map[string]interface{})
 			if *fOpt.maxTokens > 0 {
 				options["num_predict"] = *fOpt.maxTokens
 			}
@@ -241,13 +252,14 @@ func BenchmarkChat(fOpt flagOptions) error {
 				},
 			}

-			OutputMetrics(os.Stdout, *fOpt.format, metrics, *fOpt.verbose)
+			OutputMetrics(out, *fOpt.format, metrics, *fOpt.verbose)

 			if *fOpt.keepAlive > 0 {
 				time.Sleep(time.Duration(*fOpt.keepAlive*float64(time.Second)) + 200*time.Millisecond)
 			}
 		}
 	}
+
 	return nil
 }

--- a/cmd/bench/bench_test.go
+++ b/cmd/bench/bench_test.go
@@ -442,7 +442,7 @@ func TestReadImage_FileNotFound(t *testing.T) {
 func TestOptionsMapCreation(t *testing.T) {
 	fOpt := createTestFlagOptions()

-	options := make(map[string]any)
+	options := make(map[string]interface{})
 	if *fOpt.maxTokens > 0 {
 		options["num_predict"] = *fOpt.maxTokens
 	}
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -11,7 +11,6 @@ import (
 	"fmt"
 	"io"
 	"log"
-	"maps"
 	"math"
 	"net"
 	"net/http"
@@ -46,6 +45,7 @@ import (
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/types/syncmap"
 	"github.com/ollama/ollama/version"
+	xcmd "github.com/ollama/ollama/x/cmd"
 )

 const ConnectInstructions = "To sign in, navigate to:\n    %s\n\n"
@@ -204,7 +204,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {

 	if err := client.Create(cmd.Context(), req, fn); err != nil {
 		if strings.Contains(err.Error(), "path or Modelfile are required") {
-			return errors.New("the ollama server must be updated to use `ollama create` with this client")
+			return fmt.Errorf("the ollama server must be updated to use `ollama create` with this client")
 		}
 		return err
 	}
@@ -518,6 +518,10 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return generateEmbedding(cmd, name, opts.Prompt, opts.KeepAlive, truncate, dimensions)
 	}

+	// Check for experimental flag
+	isExperimental, _ := cmd.Flags().GetBool("experimental")
+	yoloMode, _ := cmd.Flags().GetBool("yolo")
+
 	if interactive {
 		if err := loadOrUnloadModel(cmd, &opts); err != nil {
 			var sErr api.AuthorizationError
@@ -544,6 +548,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 			}
 		}

+		// Use experimental agent loop with tools
+		if isExperimental {
+			return xcmd.GenerateInteractive(cmd, opts.Model, opts.WordWrap, opts.Options, opts.Think, opts.HideThinking, opts.KeepAlive, yoloMode)
+		}
+
 		return generateInteractive(cmd, opts)
 	}
 	return generate(cmd, opts)
@@ -944,6 +953,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 			rows = append(rows, []string{"", "parameters", resp.Details.ParameterSize})
 		}
 		rows = append(rows, []string{"", "quantization", resp.Details.QuantizationLevel})
+		if resp.Requires != "" {
+			rows = append(rows, []string{"", "requires", resp.Requires})
+		}
 		return
 	})

@@ -991,7 +1003,7 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 				var v string
 				switch vData := resp.ModelInfo[k].(type) {
 				case bool:
-					v = strconv.FormatBool(vData)
+					v = fmt.Sprintf("%t", vData)
 				case string:
 					v = vData
 				case float64:
@@ -1205,7 +1217,9 @@ func (r runOptions) Copy() runOptions {
 	var opts map[string]any
 	if r.Options != nil {
 		opts = make(map[string]any, len(r.Options))
-		maps.Copy(opts, r.Options)
+		for k, v := range r.Options {
+			opts[k] = v
+		}
 	}

 	var think *api.ThinkValue
@@ -1329,12 +1343,12 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 		cancel()
 	}()

-	state := &displayResponseState{}
+	var state *displayResponseState = &displayResponseState{}
 	var thinkingContent strings.Builder
 	var latest api.ChatResponse
 	var fullResponse strings.Builder
-	thinkTagOpened := false
-	thinkTagClosed := false
+	var thinkTagOpened bool = false
+	var thinkTagClosed bool = false

 	role := "assistant"

@@ -1429,7 +1443,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
 		latest.Summary()
 	}

-	return &api.Message{Role: role, Content: fullResponse.String()}, nil
+	return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, nil
 }

 func generate(cmd *cobra.Command, opts runOptions) error {
@@ -1462,10 +1476,10 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		cancel()
 	}()

-	state := &displayResponseState{}
+	var state *displayResponseState = &displayResponseState{}
 	var thinkingContent strings.Builder
-	thinkTagOpened := false
-	thinkTagClosed := false
+	var thinkTagOpened bool = false
+	var thinkTagClosed bool = false

 	plainText := !term.IsTerminal(int(os.Stdout.Fd()))

@@ -1633,7 +1647,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 		return err
 	}
 	if err := client.Heartbeat(cmd.Context()); err != nil {
-		if !strings.Contains(err.Error(), " refused") && !strings.Contains(err.Error(), "could not connect") {
+		if !(strings.Contains(err.Error(), " refused") || strings.Contains(err.Error(), "could not connect")) {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
@@ -1750,6 +1764,8 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Bool("hidethinking", false, "Hide thinking output (if provided)")
 	runCmd.Flags().Bool("truncate", false, "For embedding models: truncate inputs exceeding context length (default: true). Set --truncate=false to error instead")
 	runCmd.Flags().Int("dimensions", 0, "Truncate output embeddings to specified dimension (embedding models only)")
+	runCmd.Flags().Bool("experimental", false, "Enable experimental agent loop with tools")
+	runCmd.Flags().BoolP("yolo", "y", false, "Skip all tool approval prompts (use with caution)")

 	stopCmd := &cobra.Command{
 		Use:     "stop MODEL",
@@ -1817,6 +1833,7 @@ func NewCLI() *cobra.Command {
 		PreRunE: checkServerHeartbeat,
 		RunE:    ListRunningHandler,
 	}
+
 	copyCmd := &cobra.Command{
 		Use:     "cp SOURCE DESTINATION",
 		Short:   "Copy a model",
@@ -1951,13 +1968,13 @@ func inferThinkingOption(caps *[]model.Capability, runOpts *runOptions, explicit
 }

 func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
-	var sb strings.Builder
+	out := ""
 	formatExplanation := ""
 	formatValues := ""
 	if !plainText {
 		formatExplanation = readline.ColorGrey + readline.ColorBold
 		formatValues = readline.ColorDefault
-		sb.WriteString(formatExplanation)
+		out += formatExplanation
 	}
 	for i, toolCall := range toolCalls {
 		argsAsJSON, err := json.Marshal(toolCall.Function.Arguments)
@@ -1965,13 +1982,13 @@ func renderToolCalls(toolCalls []api.ToolCall, plainText bool) string {
 			return ""
 		}
 		if i > 0 {
-			sb.WriteString("\n")
+			out += "\n"
 		}
 		// all tool calls are unexpected since we don't currently support registering any in the CLI
-		fmt.Fprintf(&sb, "  Model called a non-existent function '%s()' with arguments: %s", formatValues+toolCall.Function.Name+formatExplanation, formatValues+string(argsAsJSON)+formatExplanation)
+		out += fmt.Sprintf("  Model called a non-existent function '%s()' with arguments: %s", formatValues+toolCall.Function.Name+formatExplanation, formatValues+string(argsAsJSON)+formatExplanation)
 	}
 	if !plainText {
-		sb.WriteString(readline.ColorDefault)
+		out += readline.ColorDefault
 	}
-	return sb.String()
+	return out
 }
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -3,7 +3,6 @@ package cmd
 import (
 	"bytes"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -292,6 +291,31 @@ Weigh anchor!
 			t.Errorf("unexpected output (-want +got):\n%s", diff)
 		}
 	})
+
+	t.Run("min version", func(t *testing.T) {
+		var b bytes.Buffer
+		if err := showInfo(&api.ShowResponse{
+			Details: api.ModelDetails{
+				Family:            "test",
+				ParameterSize:     "7B",
+				QuantizationLevel: "FP16",
+			},
+			Requires: "0.14.0",
+		}, false, &b); err != nil {
+			t.Fatal(err)
+		}
+
+		expect := `  Model
+    architecture    test      
+    parameters      7B        
+    quantization    FP16      
+    requires        0.14.0    
+
+`
+		if diff := cmp.Diff(expect, b.String()); diff != "" {
+			t.Errorf("unexpected output (-want +got):\n%s", diff)
+		}
+	})
 }

 func TestDeleteHandler(t *testing.T) {
@@ -308,7 +332,7 @@ func TestDeleteHandler(t *testing.T) {
 			} else {
 				w.WriteHeader(http.StatusNotFound)
 				errPayload := `{"error":"model '%s' not found"}`
-				fmt.Fprintf(w, errPayload, req.Name)
+				w.Write([]byte(fmt.Sprintf(errPayload, req.Name)))
 			}
 			return
 		}
@@ -762,8 +786,8 @@ func TestGetModelfileName(t *testing.T) {
 				t.Errorf("expected filename: '%s' actual filename: '%s'", expectedFilename, actualFilename)
 			}

-			if !errors.Is(tt.expectedErr, os.ErrNotExist) {
-				if !errors.Is(actualErr, tt.expectedErr) {
+			if tt.expectedErr != os.ErrNotExist {
+				if actualErr != tt.expectedErr {
 					t.Errorf("expected err: %v actual err: %v", tt.expectedErr, actualErr)
 				}
 			} else {
@@ -925,8 +949,10 @@ func TestPushHandler(t *testing.T) {
 						t.Errorf("expected output %q, got %q", tt.expectedOutput, got)
 					}
 				}
-			} else if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
-				t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
+			} else {
+				if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
+					t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
+				}
 			}
 		})
 	}
@@ -1013,8 +1039,10 @@ func TestListHandler(t *testing.T) {
 				if got := string(output); got != tt.expectedOutput {
 					t.Errorf("expected output:\n%s\ngot:\n%s", tt.expectedOutput, got)
 				}
-			} else if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
-				t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
+			} else {
+				if err == nil || !strings.Contains(err.Error(), tt.expectedError) {
+					t.Errorf("expected error containing %q, got %v", tt.expectedError, err)
+				}
 			}
 		})
 	}
@@ -1319,8 +1347,8 @@ func TestRunOptions_Copy(t *testing.T) {
 	// Test 2: Verify all fields are copied correctly
 	tests := []struct {
 		name string
-		got  any
-		want any
+		got  interface{}
+		want interface{}
 	}{
 		{"Model", copied.Model, original.Model},
 		{"ParentModel", copied.ParentModel, original.ParentModel},
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -40,6 +40,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /bye            Exit")
 		fmt.Fprintln(os.Stderr, "  /?, /help       Help for a command")
 		fmt.Fprintln(os.Stderr, "  /? shortcuts    Help for keyboard shortcuts")
+
 		fmt.Fprintln(os.Stderr, "")
 		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")

@@ -130,7 +131,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {

 	var sb strings.Builder
 	var multiline MultilineState
-	thinkExplicitlySet := opts.Think != nil
+	var thinkExplicitlySet bool = opts.Think != nil

 	for {
 		line, err := scanner.Readline()
@@ -410,7 +411,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					if resp.Parameters == "" {
 						fmt.Println("  No additional parameters were specified for this model.")
 					} else {
-						for l := range strings.SplitSeq(resp.Parameters, "\n") {
+						for _, l := range strings.Split(resp.Parameters, "\n") {
 							fmt.Printf("  %s\n", l)
 						}
 					}
@@ -576,8 +577,9 @@ func extractFileNames(input string) []string {

 func extractFileData(input string) (string, []api.ImageData, error) {
 	filePaths := extractFileNames(input)
-	imgs := make([]api.ImageData, len(filePaths))
-	for i, fp := range filePaths {
+	var imgs []api.ImageData
+
+	for _, fp := range filePaths {
 		nfp := normalizeFilePath(fp)
 		data, err := getImageData(nfp)
 		if errors.Is(err, os.ErrNotExist) {
@@ -590,7 +592,7 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 		input = strings.ReplaceAll(input, "'"+nfp+"'", "")
 		input = strings.ReplaceAll(input, "'"+fp+"'", "")
 		input = strings.ReplaceAll(input, fp, "")
-		imgs[i] = data
+		imgs = append(imgs, data)
 	}
 	return strings.TrimSpace(input), imgs, nil
 }
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -38,10 +38,10 @@ func (ModelParameters) KV(t *Tokenizer) ggml.KV {
 		"general.file_type":            uint32(1),
 		"general.quantization_version": uint32(2),
 		"tokenizer.ggml.pre":           t.Pre,
-		"tokenizer.ggml.model":         t.Model,
-		"tokenizer.ggml.tokens":        t.Tokens,
-		"tokenizer.ggml.scores":        t.Scores,
-		"tokenizer.ggml.token_type":    t.Types,
+		"tokenizer.ggml.model":         t.Vocabulary.Model,
+		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
+		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
+		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
 	}

 	if len(t.Merges) > 0 {
@@ -182,6 +182,8 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
 		conv = &llama4Model{}
 	case "Mistral3ForConditionalGeneration":
 		conv = &mistral3Model{}
+	case "Ministral3ForCausalLM":
+		conv = &mistral3CausalModel{}
 	case "MixtralForCausalLM":
 		conv = &mixtralModel{}
 	case "GemmaForCausalLM":
@@ -200,14 +202,20 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
 		conv = &qwen25VLModel{}
 	case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
 		conv = &qwen3VLModel{}
+	case "Olmo3ForCausalLM":
+		conv = &olmoModel{}
 	case "BertModel":
 		conv = &bertModel{}
+	case "NomicBertModel", "NomicBertMoEModel":
+		conv = &nomicbertModel{}
 	case "CohereForCausalLM":
 		conv = &commandrModel{}
 	case "GptOssForCausalLM":
 		conv = &gptossModel{}
 	case "DeepseekOCRForCausalLM":
 		conv = &deepseekocr{}
+	case "DeepseekV3ForCausalLM":
+		conv = &deepseek2Model{}
 	default:
 		return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
@@ -231,20 +239,20 @@ func ConvertModel(fsys fs.FS, f *os.File) error {

 	switch {
 	case vocabSize == 0:
-		slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Tokens))
-	case vocabSize > len(t.Tokens):
-		slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Tokens))
-		for i := range vocabSize - len(t.Tokens) {
-			t.Tokens = append(t.Tokens, fmt.Sprintf("[PAD%d]", i))
-			t.Scores = append(t.Scores, -1)
-			t.Types = append(t.Types, tokenTypeUserDefined)
+		slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
+	case vocabSize > len(t.Vocabulary.Tokens):
+		slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
+		for i := range vocabSize - len(t.Vocabulary.Tokens) {
+			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
+			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
+			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
 		}
-	case vocabSize < len(t.Tokens):
-		slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Tokens))
-		p.VocabSize = uint32(len(t.Tokens))
-		p.TextModel.VocabSize = uint32(len(t.Tokens))
+	case vocabSize < len(t.Vocabulary.Tokens):
+		slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
+		p.VocabSize = uint32(len(t.Vocabulary.Tokens))
+		p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
 	default:
-		slog.Debug("vocabulary", "size", len(t.Tokens))
+		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
 	}

 	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
--- a/convert/convert_bert.go
+++ b/convert/convert_bert.go
@@ -137,7 +137,7 @@ func (p *bertModel) KV(t *Tokenizer) ggml.KV {
 }

 func (p *bertModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, 0, len(ts))
+	var out []*ggml.Tensor
 	for _, t := range ts {
 		if slices.Contains([]string{
 			"embeddings.position_ids",
--- a/convert/convert_commandr.go
+++ b/convert/convert_commandr.go
@@ -44,14 +44,14 @@ func (p *commandrModel) KV(t *Tokenizer) ggml.KV {
 }

 func (p *commandrModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
-		out[i] = &ggml.Tensor{
+	var out []*ggml.Tensor
+	for _, t := range ts {
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
-		}
+		})
 	}

 	return out
--- a/convert/convert_deepseek2.go
+++ b/convert/convert_deepseek2.go
@@ -0,0 +1,173 @@
+package convert
+
+import (
+	"cmp"
+	"fmt"
+	"log/slog"
+	"regexp"
+	"strconv"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type deepseek2Model struct {
+	ModelParameters               // architectures, vocab_size
+	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
+	HiddenSize            uint32  `json:"hidden_size"`
+	HiddenLayers          uint32  `json:"num_hidden_layers"`
+	IntermediateSize      uint32  `json:"intermediate_size"`
+	NumAttentionHeads     uint32  `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
+	RMSNormEPS            float32 `json:"rms_norm_eps"`
+
+	RopeTheta     float32 `json:"rope_theta"`
+	QKNopeHeadDim uint32  `json:"qk_nope_head_dim"`
+	QKRopeHeadDim uint32  `json:"qk_rope_head_dim"`
+	KVLoraRank    uint32  `json:"kv_lora_rank"`
+	QLoraRank     uint32  `json:"q_lora_rank"`
+	VHeadDim      uint32  `json:"v_head_dim"`
+
+	ExpertCount            uint32  `json:"n_routed_experts"`
+	ExpertSharedCount      uint32  `json:"n_shared_experts"`
+	ExpertIntermediateSize uint32  `json:"moe_intermediate_size"`
+	ExpertUsedCount        uint32  `json:"num_experts_per_tok"`
+	ExpertWeightsNorm      bool    `json:"norm_topk_prob"`
+	ExpertWeightsScale     float32 `json:"routed_scaling_factor"`
+
+	ScoringFunc            string `json:"scoring_func"`
+	LeadingDenseBlockCount uint32 `json:"first_k_dense_replace"`
+
+	RopeScaling struct {
+		Factor                        float32 `json:"factor"`
+		OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
+		Type                          string  `json:"type"`
+		MScaleAllDim                  float32 `json:"mscale_all_dim"`
+	} `json:"rope_scaling"`
+
+	Architecture string
+}
+
+func (p *deepseek2Model) KV(t *Tokenizer) ggml.KV {
+	kv := p.ModelParameters.KV(t)
+	kv["general.architecture"] = "deepseek2"
+	kv["general.type"] = "model"
+	kv["deepseek2.block_count"] = p.HiddenLayers
+
+	numHeads := p.NumAttentionHeads
+	numKVHeads := p.NumKeyValueHeads
+
+	kv["deepseek2.attention.head_count"] = numHeads
+	kv["deepseek2.attention.head_count_kv"] = numKVHeads
+	kv["deepseek2.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
+	kv["deepseek2.attention.kv_lora_rank"] = p.KVLoraRank
+	kv["deepseek2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
+	kv["deepseek2.attention.q_lora_rank"] = p.QLoraRank
+	kv["deepseek2.attention.value_length"] = p.VHeadDim
+	kv["deepseek2.context_length"] = p.MaxPositionEmbeddings
+	kv["deepseek2.embedding_length"] = p.HiddenSize
+	kv["deepseek2.expert_count"] = p.ExpertCount
+	kv["deepseek2.expert_feed_forward_length"] = p.ExpertIntermediateSize
+	kv["deepseek2.expert_shared_count"] = p.ExpertSharedCount
+
+	var scoringFunc uint32
+	switch p.ScoringFunc {
+	case "softmax":
+		// not currently supported in the model, but needed for Deepseek-OCR
+		scoringFunc = 1
+	case "sigmoid":
+		scoringFunc = 2
+	}
+	kv["deepseek2.expert_gating_func"] = scoringFunc
+	kv["deepseek2.expert_used_count"] = p.ExpertUsedCount
+	kv["deepseek2.expert_weights_norm"] = p.ExpertWeightsNorm
+	kv["deepseek2.expert_weights_scale"] = p.ExpertWeightsScale
+	kv["deepseek2.feed_forward_length"] = p.IntermediateSize
+	kv["deepseek2.leading_dense_block_count"] = p.LeadingDenseBlockCount
+
+	kv["deepseek2.rope.dimension_count"] = p.QKRopeHeadDim
+	kv["deepseek2.rope.freq_base"] = cmp.Or(p.RopeTheta, 10000.0)
+	kv["deepseek2.rope.scaling.factor"] = p.RopeScaling.Factor
+	kv["deepseek2.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeddings
+	kv["deepseek2.rope.scaling.type"] = p.RopeScaling.Type
+	kv["deepseek2.rope.scaling.yarn_log_multiplier"] = 0.1 * p.RopeScaling.MScaleAllDim
+
+	kv["tokenizer.ggml.pre"] = "deepseek-v3"
+
+	return kv
+}
+
+func (p *deepseek2Model) Replacements() []string {
+	return []string{
+		"lm_head", "output",
+		"model.embed_tokens", "token_embd",
+		"model.norm", "output_norm",
+		"language_model.", "",
+		"model.layers", "blk",
+		"input_layernorm", "attn_norm",
+		"self_attn.kv_a_proj_with_mqa", "attn_kv_a_mqa",
+		"self_attn.kv_a_layernorm", "attn_kv_a_norm",
+		"self_attn.kv_b_proj", "attn_kv_b",
+		"self_attn.q_a_proj", "attn_q_a",
+		"self_attn.q_a_layernorm", "attn_q_a_norm",
+		"self_attn.q_b_proj", "attn_q_b",
+		"self_attn.o_proj", "attn_output",
+		"post_attention_layernorm", "ffn_norm",
+		"mlp.shared_experts.down_proj", "ffn_down_shexp",
+		"mlp.shared_experts.gate_proj", "ffn_gate_shexp",
+		"mlp.shared_experts.up_proj", "ffn_up_shexp",
+		"mlp.gate_proj", "ffn_gate",
+		"mlp.down_proj", "ffn_down",
+		"mlp.up_proj", "ffn_up",
+		"mlp.gate.e_score_correction_bias", "exp_probs_b.bias",
+		"mlp.gate", "ffn_gate_inp",
+	}
+}
+
+func (p *deepseek2Model) Tensors(s []Tensor) (out []*ggml.Tensor) {
+	merges := make([]merge, p.HiddenLayers*3)
+	for i := range p.HiddenLayers {
+		merges[i*3+0] = merge{
+			fmt.Sprintf("blk.%d.mlp.experts.*.gate_proj.weight", i),
+			fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
+		}
+		merges[i*3+1] = merge{
+			fmt.Sprintf("blk.%d.mlp.experts.*.up_proj.weight", i),
+			fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
+		}
+		merges[i*3+2] = merge{
+			fmt.Sprintf("blk.%d.mlp.experts.*.down_proj.weight", i),
+			fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
+		}
+	}
+
+	skipLayer := func(n string, minValue uint32) bool {
+		re := regexp.MustCompile(`^blk\.(\d+)`)
+		matches := re.FindStringSubmatch(n)
+		if matches == nil {
+			return false
+		}
+
+		blkNum, err := strconv.Atoi(matches[1])
+		if err != nil {
+			return false
+		}
+
+		return uint32(blkNum) >= minValue
+	}
+
+	out, s = mergeTensors(s, merges...)
+	for _, t := range s {
+		// skip any additional layers (such as the Multi-Token Prediction layer)
+		if skipLayer(t.Name(), p.HiddenLayers) {
+			slog.Debug("skipping layer", "name", t.Name())
+			continue
+		}
+		out = append(out, &ggml.Tensor{
+			Name:     t.Name(),
+			Kind:     t.Kind(),
+			Shape:    t.Shape(),
+			WriterTo: t,
+		})
+	}
+	return out
+}
--- a/convert/convert_gemma.go
+++ b/convert/convert_gemma.go
@@ -43,18 +43,18 @@ func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
 }

 func (p *gemmaModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
+	var out []*ggml.Tensor
+	for _, t := range ts {
 		if !strings.HasPrefix(t.Name(), "v.") && strings.HasSuffix(t.Name(), "_norm.weight") {
 			t.SetRepacker(p.addOne)
 		}

-		out[i] = &ggml.Tensor{
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
-		}
+		})
 	}

 	return out
--- a/convert/convert_gemma2_adapter.go
+++ b/convert/convert_gemma2_adapter.go
@@ -22,8 +22,8 @@ func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
 }

 func (p *gemma2Adapter) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
+	var out []*ggml.Tensor
+	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
@@ -31,12 +31,12 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []*ggml.Tensor {
 			t.SetRepacker(p.repack)
 		}

-		out[i] = &ggml.Tensor{
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
-		}
+		})
 	}

 	return out
--- a/convert/convert_gemma3.go
+++ b/convert/convert_gemma3.go
@@ -2,6 +2,7 @@ package convert

 import (
 	"cmp"
+	"slices"

 	"github.com/ollama/ollama/fs/ggml"
 )
@@ -26,16 +27,26 @@ type gemma3Model struct {
 		NumChannels       uint32  `json:"num_channels"`        // num_channels 3
 		PatchSize         uint32  `json:"patch_size"`          // patch_size 14
 	} `json:"vision_config"`
-	MaxPositionEmbeddings    uint32  `json:"max_position_embeddings"`
-	NumAttentionHeads        uint32  `json:"num_attention_heads"`
-	NumKeyValueHeads         uint32  `json:"num_key_value_heads"`
-	RMSNormEPS               float32 `json:"rms_norm_eps"`
-	HeadDim                  uint32  `json:"head_dim"`
-	FinalLogitSoftcap        float32 `json:"final_logit_softcapping"`
-	RopeLocalTheta           float32 `json:"rope_local_base_freq"`
-	RopeGlobalTheta          float32 `json:"rope_global_base_freq"`
-	SlidingWindow            uint32  `json:"sliding_window"`
-	MultiModalTokensPerImage uint32  `json:"mm_tokens_per_image"`
+	MaxPositionEmbeddings    uint32   `json:"max_position_embeddings"`
+	NumAttentionHeads        uint32   `json:"num_attention_heads"`
+	NumKeyValueHeads         uint32   `json:"num_key_value_heads"`
+	RMSNormEPS               float32  `json:"rms_norm_eps"`
+	HeadDim                  uint32   `json:"head_dim"`
+	FinalLogitSoftcap        float32  `json:"final_logit_softcapping"`
+	RopeLocalTheta           float32  `json:"rope_local_base_freq"`
+	RopeTheta                float32  `json:"rope_theta"`
+	SlidingWindow            uint32   `json:"sliding_window"`
+	SlidingWindowPattern     *uint32  `json:"sliding_window_pattern"`
+	LayerTypes               []string `json:"layer_types"`
+	MultiModalTokensPerImage uint32   `json:"mm_tokens_per_image"`
+	RopeScaling              *struct {
+		Type                          string  `json:"rope_type"`
+		Factor                        float32 `json:"factor"`
+		OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
+		ExtrapolationFactor           float32 `json:"extrapolation_factor"`
+		BetaFast                      float32 `json:"beta_fast"`
+		BetaSlow                      float32 `json:"beta_slow"`
+	} `json:"rope_scaling"`
 }

 const (
@@ -81,9 +92,38 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
 		kv["gemma3.attention.key_length"] = p.HeadDim
 		kv["gemma3.attention.value_length"] = p.HeadDim
 		kv["gemma3.attention.sliding_window"] = p.SlidingWindow
-		kv["gemma3.final_logit_softcapping"] = cmp.Or(p.FinalLogitSoftcap, 30)
+
+		// The sliding window pattern is either provided as the sliding_window_pattern
+		// key (an int) or as the layer_types key (a list of strings).
+		if p.SlidingWindowPattern != nil || len(p.LayerTypes) > 0 {
+			kv["gemma3.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
+				for i := range numBlocks {
+					var isLocal bool
+					if len(p.LayerTypes) > 0 && int(i) < len(p.LayerTypes) {
+						isLocal = p.LayerTypes[i] == "sliding_attention"
+					} else if p.SlidingWindowPattern != nil && *p.SlidingWindowPattern > 0 {
+						isLocal = (i+1)%*p.SlidingWindowPattern != 0
+					}
+					if !yield(isLocal) {
+						break
+					}
+				}
+			})
+		}
+		if p.FinalLogitSoftcap > 0 {
+			kv["gemma3.final_logit_softcapping"] = p.FinalLogitSoftcap
+		}
 		kv["gemma3.rope.local.freq_base"] = cmp.Or(p.RopeLocalTheta, 10000.0)
-		kv["gemma3.rope.global.freq_base"] = cmp.Or(p.RopeGlobalTheta, 1000000.0)
+		kv["gemma3.rope.freq_base"] = cmp.Or(p.RopeTheta, 1000000.0)
+		if p.RopeScaling != nil && p.RopeScaling.Type == "yarn" && p.RopeScaling.Factor > 0 {
+			kv["gemma3.rope.scaling.type"] = "yarn"
+			kv["gemma3.rope.scaling.factor"] = p.RopeScaling.Factor
+			kv["gemma3.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeddings
+			kv["gemma3.rope.scaling.extrapolation_factor"] = cmp.Or(p.RopeScaling.ExtrapolationFactor, float32(1.0))
+			kv["gemma3.rope.scaling.beta_fast"] = cmp.Or(p.RopeScaling.BetaFast, float32(64.0))
+			kv["gemma3.rope.scaling.beta_slow"] = cmp.Or(p.RopeScaling.BetaSlow, float32(1.0))
+		}
+
 		kv["gemma3.embedding_length"] = p.HiddenSize
 		kv["gemma3.feed_forward_length"] = p.IntermediateSize
 	default:
--- a/convert/convert_gptoss.go
+++ b/convert/convert_gptoss.go
@@ -111,7 +111,7 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
 	for name, mxfp4 := range mxfp4s {
 		dims := mxfp4.blocks.Shape()
 		if !strings.HasSuffix(name, ".weight") {
-			name += ".weight"
+			name = name + ".weight"
 		}
 		if strings.Contains(name, "ffn_down_exps") {
 			out = append(out, &ggml.Tensor{
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@@ -127,7 +127,7 @@ func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
 }

 func (p *llamaModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, 0, len(ts)+1)
+	var out []*ggml.Tensor

 	if p.RopeScaling.factors != nil {
 		out = append(out, &ggml.Tensor{
@@ -176,9 +176,9 @@ func (p *llamaModel) Replacements() []string {
 }

 func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
-	dims := make([]int, len(shape))
-	for i, dim := range shape {
-		dims[i] = int(dim)
+	var dims []int
+	for _, dim := range shape {
+		dims = append(dims, int(dim))
 	}

 	var heads uint32
--- a/convert/convert_llama_adapter.go
+++ b/convert/convert_llama_adapter.go
@@ -30,8 +30,8 @@ func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
 }

 func (p *llamaAdapter) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
+	var out []*ggml.Tensor
+	for _, t := range ts {
 		shape := t.Shape()
 		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
 			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
@@ -41,12 +41,12 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []*ggml.Tensor {
 			t.SetRepacker(p.repack)
 		}

-		out[i] = &ggml.Tensor{
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    shape,
 			WriterTo: t,
-		}
+		})
 	}

 	return out
--- a/convert/convert_mistral.go
+++ b/convert/convert_mistral.go
@@ -29,6 +29,17 @@ type mistral3Model struct {
 		SlidingWindow         *uint32 `json:"sliding_window"`
 		HiddenAct             string  `json:"hidden_act"`
 		VocabSize             uint32  `json:"vocab_size"`
+		RopeParameters        struct {
+			BetaFast                  float32  `json:"beta_fast"`
+			BetaSlow                  float32  `json:"beta_slow"`
+			Factor                    float32  `json:"factor"`
+			Llama4ScalingBeta         *float32 `json:"llama_4_scaling_beta"`
+			OrigMaxPositionEmbeddings uint32   `json:"original_max_position_embeddings"`
+			RopeType                  string   `json:"rope_type"`
+			RopeTheta                 float32  `json:"rope_theta"`
+			Mscale                    *float32 `json:"mscale"`
+			MscaleAllDim              *float32 `json:"mscale_all_dim"`
+		} `json:"rope_parameters"`
 	} `json:"text_config"`
 	VisionModel struct {
 		NumAttentionHeads uint32  `json:"num_attention_heads"`
@@ -41,6 +52,9 @@ type mistral3Model struct {
 		HeadDim           uint32  `json:"head_dim"`
 		HiddenAct         string  `json:"hidden_act"`
 		RopeTheta         float32 `json:"rope_theta"`
+		RopeParameters    struct {
+			RopeTheta float32 `json:"rope_theta"`
+		} `json:"rope_parameters"`
 	} `json:"vision_config"`
 	MultiModalProjectorBias bool   `json:"multimodal_projector_bias"`
 	ProjectorHiddenAct      string `json:"projector_hidden_act"`
@@ -61,8 +75,25 @@ func (p *mistral3Model) KV(t *Tokenizer) ggml.KV {
 	kv["mistral3.attention.layer_norm_rms_epsilon"] = p.TextModel.RMSNormEPS
 	kv["mistral3.attention.key_length"] = p.TextModel.HeadDim
 	kv["mistral3.attention.value_length"] = p.TextModel.HeadDim
-	kv["mistral3.rope.dimension_count"] = p.TextModel.HiddenSize / p.TextModel.NumHiddenLayers
-	kv["mistral3.rope.freq_base"] = p.TextModel.RopeTheta
+	kv["mistral3.rope.dimension_count"] = cmp.Or(p.TextModel.HeadDim, p.TextModel.HiddenSize/p.TextModel.NumAttentionHeads)
+	kv["mistral3.rope.freq_base"] = cmp.Or(p.TextModel.RopeTheta, p.TextModel.RopeParameters.RopeTheta)
+	kv["mistral3.rope.scaling.factor"] = p.TextModel.RopeParameters.Factor
+	kv["mistral3.rope.scaling.type"] = p.TextModel.RopeParameters.RopeType
+	kv["mistral3.rope.scaling.beta_fast"] = p.TextModel.RopeParameters.BetaFast
+	kv["mistral3.rope.scaling.beta_slow"] = p.TextModel.RopeParameters.BetaSlow
+
+	if p.TextModel.RopeParameters.Mscale != nil {
+		kv["mistral3.rope.scaling.mscale"] = *p.TextModel.RopeParameters.Mscale
+	}
+	if p.TextModel.RopeParameters.MscaleAllDim != nil {
+		kv["mistral3.rope.scaling.mscale_all_dim"] = *p.TextModel.RopeParameters.MscaleAllDim
+	}
+	if p.TextModel.RopeParameters.OrigMaxPositionEmbeddings > 0 {
+		kv["mistral3.rope.scaling.original_context_length"] = p.TextModel.RopeParameters.OrigMaxPositionEmbeddings
+	}
+	if p.TextModel.RopeParameters.Llama4ScalingBeta != nil {
+		kv["mistral3.rope.scaling_beta"] = *p.TextModel.RopeParameters.Llama4ScalingBeta
+	}

 	// Vision configuration
 	kv["mistral3.vision.block_count"] = p.VisionModel.NumHiddenLayers
@@ -74,7 +105,7 @@ func (p *mistral3Model) KV(t *Tokenizer) ggml.KV {
 	kv["mistral3.vision.patch_size"] = p.VisionModel.PatchSize
 	kv["mistral3.vision.num_channels"] = p.VisionModel.NumChannels
 	// kv["mistral3.vision.attention.layer_norm_epsilon"] = 1e-05 // Default value
-	kv["mistral3.vision.rope.freq_base"] = p.VisionModel.RopeTheta
+	kv["mistral3.vision.rope.freq_base"] = cmp.Or(p.VisionModel.RopeTheta, p.VisionModel.RopeParameters.RopeTheta)

 	// Multimodal configuration
 	kv["mistral3.image_token_index"] = p.ImageTokenIndex
@@ -90,8 +121,9 @@ func (p *mistral3Model) KV(t *Tokenizer) ggml.KV {
 }

 func (p *mistral3Model) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
+	var out []*ggml.Tensor
+
+	for _, t := range ts {
 		if !strings.HasPrefix(t.Name(), "v.") {
 			if strings.HasSuffix(t.Name(), ".attn_q.weight") ||
 				strings.HasSuffix(t.Name(), ".attn_k.weight") {
@@ -99,12 +131,12 @@ func (p *mistral3Model) Tensors(ts []Tensor) []*ggml.Tensor {
 			}
 		}

-		out[i] = &ggml.Tensor{
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
-		}
+		})
 	}

 	return out
@@ -144,9 +176,9 @@ func (p *mistral3Model) Replacements() []string {
 }

 func (p *mistral3Model) repack(name string, data []float32, shape []uint64) ([]float32, error) {
-	dims := make([]int, len(shape))
-	for i, dim := range shape {
-		dims[i] = int(dim)
+	var dims []int
+	for _, dim := range shape {
+		dims = append(dims, int(dim))
 	}

 	var heads uint32
--- a/convert/convert_mistral_causal.go
+++ b/convert/convert_mistral_causal.go
@@ -0,0 +1,181 @@
+package convert
+
+import (
+	"cmp"
+	"fmt"
+	"strings"
+
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type mistral3CausalModel struct {
+	ModelParameters
+
+	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
+	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
+	HiddenSize            uint32  `json:"hidden_size"`
+	IntermediateSize      uint32  `json:"intermediate_size"`
+	NumAttentionHeads     uint32  `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
+	RopeTheta             float32 `json:"rope_theta"`
+	RMSNormEPS            float32 `json:"rms_norm_eps"`
+	HeadDim               uint32  `json:"head_dim"`
+	SlidingWindow         *uint32 `json:"sliding_window"`
+	HiddenAct             string  `json:"hidden_act"`
+	VocabSize             uint32  `json:"vocab_size"`
+	RopeParameters        struct {
+		BetaFast                  float32  `json:"beta_fast"`
+		BetaSlow                  float32  `json:"beta_slow"`
+		Factor                    float32  `json:"factor"`
+		Llama4ScalingBeta         *float32 `json:"llama_4_scaling_beta"`
+		OrigMaxPositionEmbeddings uint32   `json:"original_max_position_embeddings"`
+		RopeType                  string   `json:"rope_type"`
+		RopeTheta                 float32  `json:"rope_theta"`
+		Mscale                    *float32 `json:"mscale"`
+		MscaleAllDim              *float32 `json:"mscale_all_dim"`
+	} `json:"rope_parameters"`
+}
+
+func (p *mistral3CausalModel) KV(t *Tokenizer) ggml.KV {
+	kv := p.ModelParameters.KV(t)
+	kv["general.architecture"] = "mistral3"
+	kv["mistral3.vocab_size"] = p.VocabSize
+
+	// Text configuration
+	kv["mistral3.block_count"] = p.NumHiddenLayers
+	kv["mistral3.context_length"] = p.MaxPositionEmbeddings
+	kv["mistral3.embedding_length"] = p.HiddenSize
+	kv["mistral3.feed_forward_length"] = p.IntermediateSize
+	kv["mistral3.attention.head_count"] = p.NumAttentionHeads
+	kv["mistral3.attention.head_count_kv"] = p.NumKeyValueHeads
+	kv["mistral3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
+	kv["mistral3.attention.key_length"] = p.HeadDim
+	kv["mistral3.attention.value_length"] = p.HeadDim
+	kv["mistral3.rope.dimension_count"] = cmp.Or(p.HeadDim, p.HiddenSize/p.NumAttentionHeads)
+	kv["mistral3.rope.freq_base"] = cmp.Or(p.RopeTheta, p.RopeParameters.RopeTheta)
+	kv["mistral3.rope.scaling.factor"] = p.RopeParameters.Factor
+	kv["mistral3.rope.scaling.type"] = p.RopeParameters.RopeType
+	kv["mistral3.rope.scaling.beta_fast"] = p.RopeParameters.BetaFast
+	kv["mistral3.rope.scaling.beta_slow"] = p.RopeParameters.BetaSlow
+
+	if p.RopeParameters.Mscale != nil {
+		kv["mistral3.rope.scaling.mscale"] = *p.RopeParameters.Mscale
+	}
+
+	if p.RopeParameters.MscaleAllDim != nil {
+		kv["mistral3.rope.scaling.mscale_all_dim"] = *p.RopeParameters.MscaleAllDim
+	}
+
+	if p.RopeParameters.OrigMaxPositionEmbeddings > 0 {
+		kv["mistral3.rope.scaling.original_context_length"] = p.RopeParameters.OrigMaxPositionEmbeddings
+		kv["mistral3.rope.scaling_beta"] = *p.RopeParameters.Llama4ScalingBeta
+	}
+
+	if p.RopeParameters.Llama4ScalingBeta != nil {
+		kv["mistral3.rope.scaling_beta"] = *p.RopeParameters.Llama4ScalingBeta
+	}
+
+	return kv
+}
+
+func (p *mistral3CausalModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	var out []*ggml.Tensor
+
+	for _, t := range ts {
+		if !strings.HasPrefix(t.Name(), "v.") {
+			if strings.HasSuffix(t.Name(), ".attn_q.weight") ||
+				strings.HasSuffix(t.Name(), ".attn_k.weight") {
+				t.SetRepacker(p.repack)
+			}
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     t.Name(),
+			Kind:     t.Kind(),
+			Shape:    t.Shape(),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (p *mistral3CausalModel) Replacements() []string {
+	return []string{
+		"model.norm", "output_norm",
+		"model.", "",
+		"layers", "blk",
+		"transformer.layers", "blk",
+		"vision_tower", "v",
+		"ln_pre", "encoder_norm",
+		"input_layernorm", "attn_norm",
+		"post_attention_layernorm", "ffn_norm",
+		"embed_tokens", "token_embd",
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.o_proj", "attn_output",
+		"mlp.down_proj", "ffn_down",
+		"mlp.gate_proj", "ffn_gate",
+		"mlp.up_proj", "ffn_up",
+		"attention.q_proj", "attn_q",
+		"attention.k_proj", "attn_k",
+		"attention.v_proj", "attn_v",
+		"attention.o_proj", "attn_output",
+		"attention_norm", "attn_norm",
+		"feed_forward.gate_proj", "ffn_gate",
+		"feed_forward.down_proj", "ffn_down",
+		"feed_forward.up_proj", "ffn_up",
+		"multi_modal_projector", "mm",
+		"ffn_norm", "ffn_norm",
+		"lm_head", "output",
+	}
+}
+
+func (p *mistral3CausalModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
+	var dims []int
+	for _, dim := range shape {
+		dims = append(dims, int(dim))
+	}
+
+	var heads uint32
+	if strings.HasSuffix(name, ".attn_q.weight") {
+		heads = p.NumAttentionHeads
+	} else if strings.HasSuffix(name, ".attn_k.weight") {
+		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
+	} else {
+		return nil, fmt.Errorf("unknown tensor for repack: %s", name)
+	}
+
+	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
+		return nil, err
+	}
+
+	if err := n.T(0, 2, 1, 3); err != nil {
+		return nil, err
+	}
+
+	if err := n.Reshape(dims...); err != nil {
+		return nil, err
+	}
+
+	if err := n.Transpose(); err != nil {
+		return nil, err
+	}
+
+	ts, err := native.SelectF32(n, 1)
+	if err != nil {
+		return nil, err
+	}
+
+	var f32s []float32
+	for _, t := range ts {
+		f32s = append(f32s, t...)
+	}
+
+	return f32s, nil
+}
--- a/convert/convert_nomicbert.go
+++ b/convert/convert_nomicbert.go
@@ -0,0 +1,213 @@
+package convert
+
+import (
+	"cmp"
+	"encoding/json"
+	"io/fs"
+	"path/filepath"
+	"slices"
+	"strings"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type nomicbertModel struct {
+	ModelParameters
+	NLayers               uint32  `json:"n_layers"`
+	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
+	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
+	HiddenSize            uint32  `json:"hidden_size"`
+	IntermediateSize      uint32  `json:"intermediate_size"`
+	NumAttentionHeads     uint32  `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
+	LayerNormEPS          float32 `json:"layer_norm_eps"`
+	LayerNormEpsilon      float32 `json:"layer_norm_epsilon"`
+	RopeFreqBase          float32 `json:"rope_theta"`
+	normalizeEmbeddings   bool
+	PoolingType           uint32
+
+	// MoE parameters (only present in v2 models)
+	NumExperts      uint32 `json:"num_local_experts"`
+	NumExpertsUsed  uint32 `json:"num_experts_per_tok"`
+	MoEEveryNLayers uint32 `json:"moe_every_n_layers"`
+}
+
+var (
+	_ ModelConverter = (*nomicbertModel)(nil)
+	_ moreParser     = (*nomicbertModel)(nil)
+)
+
+func (p *nomicbertModel) parseMore(fsys fs.FS) error {
+	bts, err := fs.ReadFile(fsys, "modules.json")
+	if err != nil {
+		return err
+	}
+
+	var modules []struct {
+		Type string `json:"type"`
+		Path string `json:"path"`
+	}
+
+	if err := json.Unmarshal(bts, &modules); err != nil {
+		return err
+	}
+
+	var pooling string
+	for _, m := range modules {
+		switch m.Type {
+		case "sentence_transformers.models.Pooling":
+			pooling = m.Path
+		case "sentence_transformers.models.Normalize":
+			p.normalizeEmbeddings = true
+		}
+	}
+
+	if pooling != "" {
+		bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
+		if err != nil {
+			return err
+		}
+
+		var pc struct {
+			PoolingModeCLSToken   bool `json:"pooling_mode_cls_token"`
+			PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
+		}
+
+		if err := json.Unmarshal(bts, &pc); err != nil {
+			return err
+		}
+
+		if pc.PoolingModeMeanTokens {
+			p.PoolingType = 1
+		} else if pc.PoolingModeCLSToken {
+			p.PoolingType = 2
+		}
+	}
+
+	return nil
+}
+
+func (p *nomicbertModel) KV(t *Tokenizer) ggml.KV {
+	kv := p.ModelParameters.KV(t)
+
+	// Determine architecture based on MoE parameters (following qwen3 pattern)
+	arch := "nomic-bert"
+	if p.MoEEveryNLayers > 0 {
+		arch += "-moe"
+	}
+
+	kv["general.architecture"] = arch
+	kv["attention.causal"] = false
+	kv["pooling_type"] = p.PoolingType
+	kv["normalize_embeddings"] = p.normalizeEmbeddings
+
+	kv["block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers)
+
+	if contextLength := p.MaxPositionEmbeddings; contextLength > 0 {
+		kv["context_length"] = contextLength
+	}
+
+	if embeddingLength := p.HiddenSize; embeddingLength > 0 {
+		kv["embedding_length"] = p.HiddenSize
+	}
+
+	if feedForwardLength := p.IntermediateSize; feedForwardLength > 0 {
+		kv["feed_forward_length"] = p.IntermediateSize
+	}
+
+	if headCount := p.NumAttentionHeads; headCount > 0 {
+		kv["attention.head_count"] = p.NumAttentionHeads
+	}
+
+	if kvHeadCount := p.NumKeyValueHeads; kvHeadCount > 0 {
+		kv["attention.head_count_kv"] = p.NumKeyValueHeads
+	}
+
+	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon); layerNormEpsilon > 0 {
+		kv["attention.layer_norm_epsilon"] = layerNormEpsilon
+	}
+
+	if p.RopeFreqBase > 0 {
+		kv["rope.freq_base"] = p.RopeFreqBase
+	}
+
+	// MoE specific parameters (only if MoE is enabled)
+	if p.NumExperts > 0 {
+		kv["expert_count"] = p.NumExperts
+	}
+
+	if p.NumExpertsUsed > 0 {
+		kv["expert_used_count"] = p.NumExpertsUsed
+	}
+
+	if p.MoEEveryNLayers > 0 {
+		kv["moe_every_n_layers"] = p.MoEEveryNLayers
+	}
+
+	kv["tokenizer.ggml.model"] = "bert"
+	kv["tokenizer.ggml.token_type_count"] = uint32(2)
+
+	// convert to phantom space tokens
+	for i, e := range t.Tokens {
+		switch {
+		case strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]"):
+			// noop - keep special tokens as-is
+		case strings.HasPrefix(e, "##"):
+			t.Tokens[i] = e[2:]
+		default:
+			t.Tokens[i] = "\u2581" + e
+		}
+	}
+
+	kv["tokenizer.ggml.tokens"] = t.Tokens
+
+	return kv
+}
+
+func (p *nomicbertModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	out := make([]*ggml.Tensor, 0, len(ts))
+	for _, t := range ts {
+		if slices.Contains([]string{
+			"embeddings.position_ids",
+			"pooler.dense.weight",
+			"pooler.dense.bias",
+		}, t.Name()) {
+			continue
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     t.Name(),
+			Kind:     t.Kind(),
+			Shape:    t.Shape(),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (nomicbertModel) Replacements() []string {
+	return []string{
+		"encoder.layer", "blk",
+		"encoder.layers", "blk",
+		"embeddings.word_embeddings", "token_embd",
+		"embeddings.token_type_embeddings", "token_types",
+		"embeddings.LayerNorm", "token_embd_norm",
+
+		"attention.self.qkv", "attn_qkv",
+
+		"attention.output.dense", "attn_output",
+		"attention.output.LayerNorm", "attn_output_norm",
+
+		"mlp.up", "ffn_up",
+		"mlp.down", "ffn_down",
+
+		"mlp.router", "ffn_gate_inp",
+		"mlp.experts.up", "ffn_up_exps",
+		"mlp.experts.down", "ffn_down_exps",
+
+		"intermediate.dense", "ffn_up",
+		"output.dense", "ffn_down",
+		"output.LayerNorm", "layer_output_norm",
+	}
+}
--- a/convert/convert_olmo.go
+++ b/convert/convert_olmo.go
@@ -0,0 +1,117 @@
+package convert
+
+import (
+	"cmp"
+
+	"github.com/ollama/ollama/fs/ggml"
+)
+
+type ropeScaling struct {
+	Factor                    float32 `json:"factor"`
+	OriginalMaxPositionEmbeds uint32  `json:"original_max_position_embeddings"`
+	AttentionFactor           float32 `json:"attention_factor"`
+	BetaFast                  float32 `json:"beta_fast"`
+	BetaSlow                  float32 `json:"beta_slow"`
+	RopeType                  string  `json:"rope_type"`
+	ExtrapolationFactor       float32 `json:"extrapolation_factor"`
+}
+
+type olmoModel struct {
+	ModelParameters
+
+	HiddenSize            uint32       `json:"hidden_size"`
+	NumHiddenLayers       uint32       `json:"num_hidden_layers"`
+	IntermediateSize      uint32       `json:"intermediate_size"`
+	NumAttentionHeads     uint32       `json:"num_attention_heads"`
+	NumKeyValueHeads      uint32       `json:"num_key_value_heads"`
+	MaxPositionEmbeddings uint32       `json:"max_position_embeddings"`
+	RMSNormEPS            float32      `json:"rms_norm_eps"`
+	RopeTheta             float32      `json:"rope_theta"`
+	RopeScaling           *ropeScaling `json:"rope_scaling"`
+	SlidingWindow         uint32       `json:"sliding_window"`
+	LayerTypes            []string     `json:"layer_types"`
+}
+
+var _ ModelConverter = (*olmoModel)(nil)
+
+func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
+	kv := p.ModelParameters.KV(t)
+	kv["general.architecture"] = "olmo3"
+	kv["olmo3.block_count"] = p.NumHiddenLayers
+	kv["olmo3.context_length"] = p.MaxPositionEmbeddings
+	kv["olmo3.embedding_length"] = p.HiddenSize
+	kv["olmo3.feed_forward_length"] = p.IntermediateSize
+	kv["olmo3.attention.head_count"] = p.NumAttentionHeads
+	kv["olmo3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
+
+	if p.RopeTheta > 0 {
+		kv["olmo3.rope.freq_base"] = p.RopeTheta
+	}
+
+	if p.RopeScaling != nil {
+		if p.RopeScaling.Factor > 0 {
+			kv["olmo3.rope.scaling.factor"] = p.RopeScaling.Factor
+		}
+		if p.RopeScaling.OriginalMaxPositionEmbeds > 0 {
+			kv["olmo3.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeds
+		}
+		if p.RopeScaling.AttentionFactor > 0 {
+			kv["olmo3.rope.scaling.attn_factor"] = p.RopeScaling.AttentionFactor
+		}
+		if p.RopeScaling.RopeType != "" {
+			kv["olmo3.rope.scaling.type"] = p.RopeScaling.RopeType
+		}
+	}
+
+	if p.RMSNormEPS > 0 {
+		kv["olmo3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
+	}
+
+	if p.SlidingWindow > 0 {
+		kv["olmo3.attention.sliding_window"] = p.SlidingWindow
+	}
+
+	if len(p.LayerTypes) > 0 {
+		slidingPattern := make([]bool, len(p.LayerTypes))
+		for i, layerType := range p.LayerTypes {
+			slidingPattern[i] = (layerType == "sliding_attention")
+		}
+		kv["olmo3.attention.sliding_window_pattern"] = slidingPattern
+	}
+
+	return kv
+}
+
+func (p *olmoModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	out := make([]*ggml.Tensor, 0, len(ts))
+	for _, t := range ts {
+		out = append(out, &ggml.Tensor{
+			Name:     t.Name(),
+			Kind:     t.Kind(),
+			Shape:    t.Shape(),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (p *olmoModel) Replacements() []string {
+	return []string{
+		"lm_head", "output",
+		"model.embed_tokens", "token_embd",
+		"model.layers", "blk",
+		"model.norm", "output_norm",
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.o_proj", "attn_output",
+		"self_attn.q_norm", "attn_q_norm",
+		"self_attn.k_norm", "attn_k_norm",
+		"post_attention_layernorm", "post_attention_norm",
+		"post_feedforward_layernorm", "post_ffw_norm",
+		"mlp.gate_proj", "ffn_gate",
+		"mlp.down_proj", "ffn_down",
+		"mlp.up_proj", "ffn_up",
+	}
+}
--- a/convert/convert_qwen2.go
+++ b/convert/convert_qwen2.go
@@ -49,20 +49,20 @@ func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
 }

 func (q *qwen2Model) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, len(ts))
-	for i, t := range ts {
-		out[i] = &ggml.Tensor{
+	var out []*ggml.Tensor
+	for _, t := range ts {
+		out = append(out, &ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),
 			WriterTo: t,
-		}
+		})
 	}

 	return out
 }

-func (q *qwen2Model) Replacements() []string {
+func (p *qwen2Model) Replacements() []string {
 	return []string{
 		"lm_head", "output",
 		"model.embed_tokens", "token_embd",
--- a/convert/convert_qwen25vl.go
+++ b/convert/convert_qwen25vl.go
@@ -90,9 +90,9 @@ func (q *qwen25VLModel) Tensors(ts []Tensor) []*ggml.Tensor {
 	return out
 }

-func (q *qwen25VLModel) Replacements() []string {
+func (p *qwen25VLModel) Replacements() []string {
 	return append(
-		q.qwen2Model.Replacements(),
+		p.qwen2Model.Replacements(),
 		"visual", "v",
 		"blocks", "blk",
 		"attn.proj", "attn_out",
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@@ -54,6 +54,6 @@ func (t torch) Clone() Tensor {
 	}
 }

-func (t torch) WriteTo(w io.Writer) (int64, error) {
+func (pt torch) WriteTo(w io.Writer) (int64, error) {
 	return 0, nil
 }
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@@ -49,7 +49,8 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)

 			// temporary fix to handle gemma3 broken configs
-			if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>"}, piece.GetPiece()) {
+			// TODO(parthsareen): allow reading of tokenizer.json to allow managing special tokens when using spm
+			if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>", "<start_function_declaration>", "<end_function_declaration>", "<start_function_call>", "<end_function_call>", "<start_function_response>", "<end_function_response>", "<escape>"}, piece.GetPiece()) {
 				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
 			}

@@ -82,7 +83,7 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 		content string
 	}

-	ts := make([]t, 0, len(atm))
+	var ts []t
 	for content, id := range atm {
 		ts = append(ts, t{id, content})
 	}
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		}

 		slog.Info("discovering available GPUs...")
+		detectIncompatibleLibraries()

 		// Warn if any user-overrides are set which could lead to incorrect GPU discovery
 		overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					continue
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
+				} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
+					slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
+					continue
 				} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
 					slog.Info("experimental Vulkan support disabled.  To enable, set OLLAMA_VULKAN=1")
 					continue
@@ -125,15 +129,25 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		supportedMu := sync.Mutex{}
 		supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
 		for i := range devices {
+			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
 			if !devices[i].NeedsInitValidation() {
+				// No need to validate, add to the supported map
+				supportedMu.Lock()
+				if _, ok := supported[devices[i].Library]; !ok {
+					supported[devices[i].Library] = make(map[string]map[string]int)
+				}
+				if _, ok := supported[devices[i].Library][libDir]; !ok {
+					supported[devices[i].Library][libDir] = make(map[string]int)
+				}
+				supported[devices[i].Library][libDir][devices[i].ID] = i
+				supportedMu.Unlock()
 				continue
 			}
-			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
 			slog.Debug("verifying if device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
 			wg.Add(1)
 			go func(i int) {
 				defer wg.Done()
-				extraEnvs := ml.GetVisibleDevicesEnv(devices[i : i+1])
+				extraEnvs := ml.GetVisibleDevicesEnv(devices[i:i+1], true)
 				devices[i].AddInitValidation(extraEnvs)
 				if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
 					slog.Debug("filtering device which didn't fully initialize",
@@ -319,7 +333,8 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			defer cancel()

 			// Apply any dev filters to avoid re-discovering unsupported devices, and get IDs correct
-			devFilter := ml.GetVisibleDevicesEnv(devices)
+			// We avoid CUDA filters here to keep ROCm from failing to discover GPUs in a mixed environment
+			devFilter := ml.GetVisibleDevicesEnv(devices, false)

 			for dir := range libDirs {
 				updatedDevices := bootstrapDevices(ctx, []string{ml.LibOllamaPath, dir}, devFilter)
@@ -474,3 +489,16 @@ func overrideWarnings() {
 		slog.Warn("if GPUs are not correctly discovered, unset and try again")
 	}
 }
+
+func detectIncompatibleLibraries() {
+	if runtime.GOOS != "windows" {
+		return
+	}
+	basePath, err := exec.LookPath("ggml-base.dll")
+	if err != nil || basePath == "" {
+		return
+	}
+	if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
+		slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
+	}
+}
--- a/docs/api.md
+++ b/docs/api.md
@@ -50,7 +50,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
 Advanced parameters (optional):

 - `format`: the format to return a response in. Format can be `json` or a JSON schema
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.mdx#valid-parameters-and-values) such as `temperature`
 - `system`: system message to (overrides what is defined in the `Modelfile`)
 - `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
@@ -507,7 +507,7 @@ The `message` object has the following fields:
 Advanced parameters (optional):

 - `format`: the format to return a response in. Format can be `json` or a JSON schema.
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.mdx#valid-parameters-and-values) such as `temperature`
 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)

@@ -895,11 +895,11 @@ curl http://localhost:11434/api/chat -d '{
      "tool_calls": [
        {
          "function": {
-            "name": "get_temperature",
+            "name": "get_weather",
            "arguments": {
              "city": "Toronto"
            }
-          },
+          }
        }
      ]
    },
@@ -907,7 +907,7 @@ curl http://localhost:11434/api/chat -d '{
    {
      "role": "tool",
      "content": "11 degrees celsius",
-      "tool_name": "get_temperature",
+      "tool_name": "get_weather"
    }
  ],
  "stream": false,
@@ -1189,7 +1189,7 @@ If you are creating a model from a safetensors directory or from a GGUF file, yo
 - `template`: (optional) the prompt template for the model
 - `license`: (optional) a string or list of strings containing the license or licenses for the model
 - `system`: (optional) a string containing the system prompt for the model
- `parameters`: (optional) a dictionary of parameters for the model (see [Modelfile](./modelfile.md#valid-parameters-and-values) for a list of parameters)
+- `parameters`: (optional) a dictionary of parameters for the model (see [Modelfile](./modelfile.mdx#valid-parameters-and-values) for a list of parameters)
 - `messages`: (optional) a list of message objects used to create a conversation
 - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
 - `quantize` (optional): quantize a non-quantized (e.g. float16) model
@@ -1698,7 +1698,7 @@ Generate embeddings from a model
 Advanced parameters:

 - `truncate`: truncates the end of each input to fit within context length. Returns error if `false` and context length is exceeded. Defaults to `true`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.mdx#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
 - `dimensions`: number of dimensions for the embedding

@@ -1817,7 +1817,7 @@ Generate embeddings from a model

 Advanced parameters:

- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.mdx#valid-parameters-and-values) such as `temperature`
 - `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)

 ### Examples
--- a/docs/api/openai-compatibility.mdx
+++ b/docs/api/openai-compatibility.mdx
--- a/docs/capabilities/tool-calling.mdx
+++ b/docs/capabilities/tool-calling.mdx
@@ -15,7 +15,7 @@ Also known as "single-shot" tool calling.
    ```shell
    curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
      "model": "qwen3",
-      "messages": [{"role": "user", "content": "What's the temperature in New York?"}],
+      "messages": [{"role": "user", "content": "What is the temperature in New York?"}],
      "stream": false,
      "tools": [
        {
@@ -41,7 +41,7 @@ Also known as "single-shot" tool calling.
    curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
      "model": "qwen3",
      "messages": [
-        {"role": "user", "content": "What's the temperature in New York?"},
+        {"role": "user", "content": "What is the temperature in New York?"},
        {
          "role": "assistant",
          "tool_calls": [
@@ -90,7 +90,7 @@ Also known as "single-shot" tool calling.
      }
      return temperatures.get(city, "Unknown")

-    messages = [{"role": "user", "content": "What's the temperature in New York?"}]
+    messages = [{"role": "user", "content": "What is the temperature in New York?"}]

    # pass functions directly as tools in the tools list or as a JSON schema
    response = chat(model="qwen3", messages=messages, tools=[get_temperature], think=True)
@@ -146,7 +146,7 @@ Also known as "single-shot" tool calling.
      },
    ]

-    const messages = [{ role: 'user', content: "What's the temperature in New York?" }]
+    const messages = [{ role: 'user', content: "What is the temperature in New York?" }]

    const response = await ollama.chat({
      model: 'qwen3',
@@ -609,7 +609,7 @@ def get_temperature(city: str) -> str:
  return temperatures.get(city, 'Unknown')


-messages = [{'role': 'user', 'content': "What's the temperature in New York?"}]
+messages = [{'role': 'user', 'content': "What is the temperature in New York?"}]

 while True:
  stream = chat(
@@ -684,7 +684,7 @@ const getTemperatureTool = {
 }

 async function agentLoop() {
-  const messages = [{ role: 'user', content: "What's the temperature in New York?" }]
+  const messages = [{ role: 'user', content: "What is the temperature in New York?" }]

  while (true) {
    const stream = await ollama.chat({
--- a/docs/capabilities/vision.mdx
+++ b/docs/capabilities/vision.mdx
@@ -36,7 +36,6 @@ Provide an `images` array. SDKs accept file paths, URLs or raw bytes while the R
        }],
        "stream": false
    }'
-    "
    ```
  </Tab>
  <Tab title="Python">
--- a/docs/development.md
+++ b/docs/development.md
@@ -49,6 +49,8 @@ Install prerequisites:
    - [Ninja](https://github.com/ninja-build/ninja/releases)
 - (Optional) NVIDIA GPU support
    - [CUDA SDK](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=11&target_type=exe_network)
+- (Optional) VULKAN GPU support
+    - [VULKAN SDK](https://vulkan.lunarg.com/sdk/home) - useful for AMD/Intel GPUs

 Then, configure and build the project:

@@ -57,6 +59,17 @@ cmake -B build
 cmake --build build --config Release
 ```

+> Building for Vulkan requires VULKAN_SDK environment variable:
+> 
+> PowerShell
+> ```powershell
+> $env:VULKAN_SDK="C:\VulkanSDK\<version>"
+> ```
+> CMD
+> ```cmd
+> set VULKAN_SDK=C:\VulkanSDK\<version>
+> ```
+
 > [!IMPORTANT]
 > Building for ROCm requires additional flags:
 > ```
@@ -65,6 +78,7 @@ cmake --build build --config Release
 > ```


+
 Lastly, run Ollama:

 ```shell
@@ -84,7 +98,9 @@ Install prerequisites:
    - [ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
 - (Optional) NVIDIA GPU support
    - [CUDA SDK](https://developer.nvidia.com/cuda-downloads)
-
+- (Optional) VULKAN GPU support
+    - [VULKAN SDK](https://vulkan.lunarg.com/sdk/home) - useful for AMD/Intel GPUs
+    - Or install via package manager: `sudo apt install vulkan-sdk` (Ubuntu/Debian) or `sudo dnf install vulkan-sdk` (Fedora/CentOS)
 > [!IMPORTANT]
 > Ensure prerequisites are in `PATH` before running CMake.

--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -14,11 +14,11 @@ curl -fsSL https://ollama.com/install.sh | sh

 ## How can I view the logs?

-Review the [Troubleshooting](./troubleshooting.md) docs for more about using logs.
+Review the [Troubleshooting](./troubleshooting) docs for more about using logs.

 ## Is my GPU compatible with Ollama?

-Please refer to the [GPU docs](./gpu.md).
+Please refer to the [GPU docs](./gpu).

 ## How can I specify the context window size?

@@ -57,8 +57,13 @@ ollama ps
 ```

 <Info>
-  **Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
-  100% GPU 4 minutes from now ```
+
+**Output**:
+
+```
+NAME        ID            SIZE    PROCESSOR   UNTIL
+llama3:70b  bcfb190ca3a7  42 GB   100% GPU    4 minutes from now
+```
 </Info>

 The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation.  You
 - In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`

 **MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
+- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@@ -33,7 +33,7 @@ Check your compute compatibility to see if your card is supported:
 | 5.0                | GeForce GTX         | `GTX 750 Ti` `GTX 750` `NVS 810`                                                                                               |
 |                    | Quadro              | `K2200` `K1200` `K620` `M1200` `M520` `M5000M` `M4000M` `M3000M` `M2000M` `M1000M` `K620M` `M600M` `M500M`                     |

-For building locally to support older GPUs, see [developer.md](./development.md#linux-cuda-nvidia)
+For building locally to support older GPUs, see [developer](./development#linux-cuda-nvidia)

 ### GPU Selection

@@ -54,7 +54,7 @@ sudo modprobe nvidia_uvm`

 Ollama supports the following AMD GPUs via the ROCm library:

-> [!NOTE]
+> **NOTE:**
 > Additional AMD GPU support is provided by the Vulkan Library - see below.


@@ -132,9 +132,9 @@ Ollama supports GPU acceleration on Apple devices via the Metal API.

 ## Vulkan GPU Support

-> [!NOTE]
+> **NOTE:**
 > Vulkan is currently an Experimental feature.  To enable, you must set OLLAMA_VULKAN=1 for the Ollama server as
-described in the [FAQ](faq.md#how-do-i-configure-ollama-server)
+described in the [FAQ](faq#how-do-i-configure-ollama-server)

 Additional GPU support on Windows and Linux is provided via
 [Vulkan](https://www.vulkan.org/). On Windows most GPU vendors drivers come
@@ -161,6 +161,6 @@ sudo setcap cap_perfmon+ep /usr/local/bin/ollama

 To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
-described in the [FAQ](faq.md#how-do-i-configure-ollama-server). If you
+described in the [FAQ](faq#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
 by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/docs/modelfile.mdx
+++ b/docs/modelfile.mdx
@@ -41,6 +41,7 @@ INSTRUCTION arguments
 | [`ADAPTER`](#adapter)               | Defines the (Q)LoRA adapters to apply to the model.            |
 | [`LICENSE`](#license)               | Specifies the legal license.                                   |
 | [`MESSAGE`](#message)               | Specify message history.                                       |
+| [`REQUIRES`](#requires)             | Specify the minimum version of Ollama required by the model.   |

 ## Examples

@@ -149,9 +150,6 @@ PARAMETER <parameter> <parametervalue>

 | Parameter      | Description                                                                                                                                                                                                                                                                                                                                                                     | Value Type | Example Usage        |
 | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
-| mirostat       | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)                                                                                                                                                                                                                                                                 | int        | mirostat 0           |
-| mirostat_eta   | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)                                                                                                                                                | float      | mirostat_eta 0.1     |
-| mirostat_tau   | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)                                                                                                                                                                                                                                 | float      | mirostat_tau 5.0     |
 | num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                                                                                                                                            | int        | num_ctx 4096         |
 | repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                                                                                                                                                   | int        | repeat_last_n 64     |
 | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                                                                                                                                             | float      | repeat_penalty 1.1   |
@@ -251,6 +249,16 @@ MESSAGE user Is Ontario in Canada?
 MESSAGE assistant yes
 ```

+### REQUIRES
+
+The `REQUIRES` instruction allows you to specify the minimum version of Ollama required by the model.
+
+```
+REQUIRES <version>
+```
+
+The version should be a valid Ollama version (e.g. 0.14.0).
+
 ## Notes

 - the **`Modelfile` is not case sensitive**. In the examples, uppercase instructions are used to make it easier to distinguish it from arguments.
--- a/docs/tools/extract-examples/README.md
+++ b/docs/tools/extract-examples/README.md
@@ -0,0 +1,46 @@
+# extract-examples
+
+Extracts code examples from MDX files to a temp directory so you can run them.
+
+## Usage
+
+```shell
+go run docs/tools/extract-examples/main.go <mdx-file>
+```
+
+## Example
+
+```shell
+go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx
+```
+
+Output:
+
+```
+Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+
+  - 01_basic.py
+  - 01_basic.js
+  - 01_basic.sh
+  - 02_responses.py
+  - 02_responses.js
+  - 02_responses.sh
+  - 03_vision.py
+  - 03_vision.js
+  - 03_vision.sh
+
+Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+
+To run examples:
+
+  cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+  npm install   # for JS examples
+
+then run individual files with `node file.js`, `python file.py`, `bash file.sh`
+```
+
+## How it works
+
+- Parses MDX files looking for fenced code blocks with filenames (e.g., ` ```python basic.py `)
+- Groups examples by their `<CodeGroup>` and prefixes filenames with `01_`, `02_`, etc.
+- Writes all extracted files to a temp directory
--- a/docs/tools/extract-examples/main.go
+++ b/docs/tools/extract-examples/main.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+func main() {
+	if len(os.Args) < 2 {
+		fmt.Fprintln(os.Stderr, "Usage: go run extract-examples.go <mdx-file>")
+		os.Exit(1)
+	}
+
+	mdxFile := os.Args[1]
+
+	f, err := os.Open(mdxFile)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+	defer f.Close()
+
+	// Create temp directory
+	tempDir, err := os.MkdirTemp("", "mdx-examples-*")
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error creating temp dir: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("Extracting code examples to: %s\n\n", tempDir)
+
+	// Patterns
+	codeBlockStart := regexp.MustCompile("^```([a-zA-Z0-9_-]+)\\s+([^\\s]+)$")
+	codeGroupStart := regexp.MustCompile("^<CodeGroup")
+	codeGroupEnd := regexp.MustCompile("^</CodeGroup>")
+
+	scanner := bufio.NewScanner(f)
+	inCodeBlock := false
+	inCodeGroup := false
+	var currentFile string
+	var content strings.Builder
+	count := 0
+	codeGroupNum := 0
+
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		// Track CodeGroup boundaries
+		if codeGroupStart.MatchString(line) {
+			inCodeGroup = true
+			codeGroupNum++
+			continue
+		}
+		if codeGroupEnd.MatchString(line) {
+			inCodeGroup = false
+			continue
+		}
+
+		if inCodeBlock {
+			if line == "```" {
+				// End of code block - write file
+				if currentFile != "" {
+					outPath := filepath.Join(tempDir, currentFile)
+					if err := os.WriteFile(outPath, []byte(content.String()), 0o644); err != nil {
+						fmt.Fprintf(os.Stderr, "Error writing %s: %v\n", currentFile, err)
+					} else {
+						fmt.Printf("  - %s\n", currentFile)
+						count++
+					}
+				}
+				inCodeBlock = false
+				currentFile = ""
+				content.Reset()
+			} else {
+				content.WriteString(line)
+				content.WriteString("\n")
+			}
+		} else {
+			if matches := codeBlockStart.FindStringSubmatch(line); matches != nil {
+				inCodeBlock = true
+				filename := matches[2]
+				// Prefix with CodeGroup number if inside a CodeGroup
+				if inCodeGroup {
+					currentFile = fmt.Sprintf("%02d_%s", codeGroupNum, filename)
+				} else {
+					currentFile = filename
+				}
+				content.Reset()
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error reading file: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Write package.json for JavaScript dependencies
+	packageJSON := `{
+  "name": "mdx-examples",
+  "type": "module",
+  "dependencies": {
+    "openai": "^4",
+    "ollama": "^0.5"
+  }
+}
+`
+	if err := os.WriteFile(filepath.Join(tempDir, "package.json"), []byte(packageJSON), 0o644); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing package.json: %v\n", err)
+	}
+
+	// Write pyproject.toml for Python dependencies
+	pyprojectTOML := `[project]
+name = "mdx-examples"
+version = "0.0.0"
+dependencies = [
+    "openai",
+    "ollama",
+]
+`
+	if err := os.WriteFile(filepath.Join(tempDir, "pyproject.toml"), []byte(pyprojectTOML), 0o644); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing pyproject.toml: %v\n", err)
+	}
+
+	fmt.Printf("\n")
+	fmt.Printf("Extracted %d file(s) to %s\n", count, tempDir)
+	fmt.Printf("\n")
+	fmt.Printf("To run examples:\n")
+	fmt.Printf("\n")
+	fmt.Printf("  cd %s\n  npm install   # for JS examples\n", tempDir)
+	fmt.Printf("\n")
+	fmt.Printf("then run individual files with `node file.js`, `python file.py`, `bash file.sh`\n")
+}
--- a/docs/troubleshooting.mdx
+++ b/docs/troubleshooting.mdx
@@ -87,7 +87,7 @@ When Ollama starts up, it takes inventory of the GPUs present in the system to d

 ### Linux NVIDIA Troubleshooting

-If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker](./docker)

 Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem

--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -206,6 +206,8 @@ var (
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
 	EnableVulkan = Bool("OLLAMA_VULKAN")
+	// Usage enables usage statistics reporting
+	Usage = Bool("OLLAMA_USAGE")
 )

 func String(s string) func() string {
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -13,6 +13,7 @@ import (

 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/fs/util/bufioutil"
+	"github.com/ollama/ollama/ml"
 )

 type GGML struct {
@@ -240,18 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {

 func (kv KV) OllamaEngineRequired() bool {
 	return slices.Contains([]string{
+		"bert",
+		"deepseek2",
+		"deepseekocr",
 		"gemma3",
 		"gemma3n",
 		"gptoss", "gpt-oss",
 		"llama4",
 		"mistral3",
 		"mllama",
+		"nomic-bert",
+		"olmo3",
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
-		"deepseekocr",
-		"deepseek2",
-		"nomic-bert",
 	}, kv.Architecture())
 }

@@ -300,9 +303,9 @@ func (s Tensors) Items(prefix ...string) []*Tensor {
 	return items
 }

-func (s Tensors) GroupLayers() map[string]Layer {
+func (ts Tensors) GroupLayers() map[string]Layer {
 	layers := make(map[string]Layer)
-	for _, t := range s.items {
+	for _, t := range ts.items {
 		parts := strings.Split(t.Name, ".")
 		if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
 			if len(parts) > index+2 {
@@ -550,7 +553,7 @@ func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error) {
 	}, nil
 }

-func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string, useFlashAttention bool) (kv []uint64, partialOffload, fullOffload uint64) {
+func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string, useFlashAttention ml.FlashAttentionType) (kv []uint64, partialOffload, fullOffload uint64) {
 	context *= uint64(numParallel)

 	embedding := f.KV().EmbeddingLength()
@@ -791,7 +794,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
 		}

 		partialOffload = 2 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
-		if useFlashAttention {
+		if useFlashAttention == ml.FlashAttentionEnabled {
 			// rough estimate of graph size with flash attention on
 			partialOffload = (4*uint64(numParallel) + context>>10 + 110) * format.MebiByte
 		}
@@ -809,6 +812,14 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool {
 	return slices.Contains([]string{"q8_0", "q4_0"}, cacheType)
 }

+// KVCacheTypeIsQuantized checks if the requested cache type is a quantized type
+func (f GGML) KVCacheTypeIsQuantized(cacheType string) bool {
+	if cacheType == "" || cacheType == "f16" || cacheType == "f32" || cacheType == "bf16" {
+		return false
+	}
+	return true
+}
+
 // SupportsFlashAttention checks if the model supports flash attention
 func (f GGML) SupportsFlashAttention() bool {
 	_, isEmbedding := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]
@@ -829,8 +840,11 @@ func (f GGML) SupportsFlashAttention() bool {
 // FlashAttention checks if the model should enable flash attention
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
+		"bert",
 		"gemma3",
 		"gptoss", "gpt-oss",
+		"mistral3",
+		"olmo3",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
 	}, f.KV().String("general.architecture"))
--- a/fs/ggml/gguf.go
+++ b/fs/ggml/gguf.go
@@ -5,7 +5,6 @@ import (
 	"cmp"
 	"encoding/binary"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@@ -226,7 +225,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 			Name:   name,
 			Kind:   kind,
 			Offset: offset,
-			Shape:  shape,
+			Shape:  shape[:],
 		}

 		llm.tensors = append(llm.tensors, &tensor)
@@ -512,7 +511,7 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
 func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error {
 	arch := kv.String("general.architecture")
 	if arch == "" {
-		return errors.New("architecture not set")
+		return fmt.Errorf("architecture not set")
 	}

 	if err := binary.Write(f, binary.LittleEndian, []byte("GGUF")); err != nil {
@@ -598,6 +597,10 @@ func ggufWriteKV(ws io.WriteSeeker, arch, k string, v any) error {

 	var err error
 	switch v := v.(type) {
+	case int32:
+		err = writeGGUF(ws, ggufTypeInt32, v)
+	case int64:
+		err = writeGGUF(ws, ggufTypeInt64, v)
 	case uint32, FileType:
 		err = writeGGUF(ws, ggufTypeUint32, v)
 	case uint64:
@@ -612,6 +615,10 @@ func ggufWriteKV(ws io.WriteSeeker, arch, k string, v any) error {
 		err = writeGGUFArray(ws, ggufTypeInt32, v)
 	case *array[int32]:
 		err = writeGGUFArray(ws, ggufTypeInt32, v.values)
+	case []int64:
+		err = writeGGUFArray(ws, ggufTypeInt64, v)
+	case *array[int64]:
+		err = writeGGUFArray(ws, ggufTypeInt64, v.values)
 	case []uint32:
 		err = writeGGUFArray(ws, ggufTypeUint32, v)
 	case *array[uint32]:
--- a/fs/ggml/gguf_test.go
+++ b/fs/ggml/gguf_test.go
@@ -42,6 +42,10 @@ func TestWriteGGUF(t *testing.T) {
 				"general.architecture": "test",
 				"general.alignment":    uint32(16),
 				"test.key":             "value",
+				"test.int32_key":       int32(-42),
+				"test.int64_key":       int64(-9223372036854775808),
+				"test.int32_array":     []int32{-1, 0, 1, 2147483647, -2147483648},
+				"test.int64_array":     []int64{-1, 0, 1, 9223372036854775807, -9223372036854775808},
 				"attention.key":        "value2",
 				"tokenizer.key":        "value3",
 				"adapter.key":          "value4",
@@ -55,7 +59,7 @@ func TestWriteGGUF(t *testing.T) {
 			}
 			defer r.Close()

-			ff, err := Decode(r, 0)
+			ff, err := Decode(r, -1)
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -65,15 +69,19 @@ func TestWriteGGUF(t *testing.T) {
 				"general.alignment":       uint32(16),
 				"general.parameter_count": uint64(54),
 				"test.key":                "value",
+				"test.int32_key":          int32(-42),
+				"test.int64_key":          int64(-9223372036854775808),
+				"test.int32_array":        &array[int32]{size: 5, values: []int32{-1, 0, 1, 2147483647, -2147483648}},
+				"test.int64_array":        &array[int64]{size: 5, values: []int64{-1, 0, 1, 9223372036854775807, -9223372036854775808}},
 				"test.attention.key":      "value2",
 				"tokenizer.key":           "value3",
 				"adapter.key":             "value4",
-			}, ff.KV()); diff != "" {
+			}, ff.KV(), cmp.AllowUnexported(array[int32]{}, array[int64]{})); diff != "" {
 				t.Errorf("Mismatch (-want +got):\n%s", diff)
 			}

 			if diff := cmp.Diff(Tensors{
-				Offset: 800,
+				Offset: 992,
 				items: []*Tensor{
 					{Name: "blk.0.attn_k.weight", Offset: 0, Shape: []uint64{2, 3}},
 					{Name: "blk.0.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
--- a/fs/ggml/type.go
+++ b/fs/ggml/type.go
@@ -136,8 +136,8 @@ func (t FileType) Value() uint32 {
 	return uint32(t)
 }

-func (t FileType) ToTensorType() TensorType {
-	switch t {
+func (ftype FileType) ToTensorType() TensorType {
+	switch ftype {
 	case FileTypeF32:
 		return TensorTypeF32
 	case FileTypeF16:
@@ -177,7 +177,7 @@ func (t FileType) ToTensorType() TensorType {
 	case fileTypeMXFP4:
 		return TensorTypeMXFP4
 	default:
-		slog.Warn("unsupported file type", "type", t)
+		slog.Warn("unsupported file type", "type", ftype)
 		return 0 // F32
 	}
 }
--- a/fs/gguf/keyvalue.go
+++ b/fs/gguf/keyvalue.go
@@ -11,7 +11,7 @@ type KeyValue struct {
 }

 func (kv KeyValue) Valid() bool {
-	return kv.Key != "" && kv.value != nil
+	return kv.Key != "" && kv.Value.value != nil
 }

 type Value struct {
--- a/go.mod
+++ b/go.mod
@@ -15,8 +15,8 @@ require (
 	github.com/spf13/cobra v1.7.0
 	github.com/stretchr/testify v1.9.0
 	github.com/x448/float16 v0.8.4
-	golang.org/x/sync v0.12.0
-	golang.org/x/sys v0.36.0
+	golang.org/x/sync v0.17.0
+	golang.org/x/sys v0.37.0
 )

 require (
@@ -28,13 +28,17 @@ require (
 	github.com/nlpodyssey/gopickle v0.3.0
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
 	github.com/tkrajina/typescriptify-golang-structs v0.2.0
+	github.com/wk8/go-ordered-map/v2 v2.1.8
 	golang.org/x/image v0.22.0
-	golang.org/x/tools v0.30.0
+	golang.org/x/mod v0.30.0
+	golang.org/x/tools v0.38.0
 	gonum.org/v1/gonum v0.15.0
 )

 require (
 	github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
+	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
 	github.com/bytedance/sonic/loader v0.1.1 // indirect
 	github.com/chewxy/hm v1.0.0 // indirect
 	github.com/chewxy/math32 v1.11.0 // indirect
@@ -44,6 +48,7 @@ require (
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/google/flatbuffers v24.3.25+incompatible // indirect
 	github.com/kr/text v0.2.0 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
@@ -76,11 +81,11 @@ require (
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
 	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/crypto v0.36.0
+	golang.org/x/crypto v0.43.0
 	golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
-	golang.org/x/net v0.38.0 // indirect
-	golang.org/x/term v0.30.0
-	golang.org/x/text v0.23.0
+	golang.org/x/net v0.46.0 // indirect
+	golang.org/x/term v0.36.0
+	golang.org/x/text v0.30.0
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -14,7 +14,11 @@ github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6IC
 github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
+github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
+github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
 github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
+github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
+github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
 github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
 github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
 github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
@@ -123,6 +127,7 @@ github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+
 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
@@ -143,6 +148,8 @@ github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728 h1:QwWKgMY28TAXaDl+
 github.com/ledongthuc/pdf v0.0.0-20250511090121-5959a4027728/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
 github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
 github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
@@ -207,6 +214,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
 github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
+github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/xtgo/set v1.0.0 h1:6BCNBRv3ORNDQ7fyoJXRv+tstJz3m1JVFQErfeZz2pY=
@@ -224,8 +233,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
-golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
+golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
+golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -255,6 +264,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
+golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -267,8 +278,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
-golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
+golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -278,8 +289,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -295,17 +306,17 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
-golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
+golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
+golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
-golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
+golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -319,8 +330,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
-golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/harmony/harmonyparser.go
+++ b/harmony/harmonyparser.go
@@ -200,7 +200,9 @@ func (s *HarmonyParser) parseHeader(raw string) HarmonyHeader {
 		before := raw[:channelIndex]
 		after := raw[channelIndex+len("<|channel|>"):]
 		// the channel name is `after` all the way up to the first (if any) whitespace character
-		idx := strings.IndexFunc(after, unicode.IsSpace)
+		idx := strings.IndexFunc(after, func(r rune) bool {
+			return unicode.IsSpace(r)
+		})
 		if idx == -1 {
 			idx = len(after)
 		}
@@ -317,12 +319,11 @@ func (h *HarmonyMessageHandler) AddContent(content string, toolParser *HarmonyTo
 			}
 		case HarmonyEventContentEmitted:
 			logutil.Trace("harmony event content", "content", event.Content, "state", h.state)
-			switch h.state {
-			case harmonyMessageState_Normal:
+			if h.state == harmonyMessageState_Normal {
 				contentSb.WriteString(event.Content)
-			case harmonyMessageState_Thinking:
+			} else if h.state == harmonyMessageState_Thinking {
 				thinkingSb.WriteString(event.Content)
-			case harmonyMessageState_ToolCalling:
+			} else if h.state == harmonyMessageState_ToolCalling {
 				toolContentSb.WriteString(event.Content)
 			}
 		case HarmonyEventMessageEnd:
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -4,7 +4,9 @@ package integration

 import (
 	"context"
+	"errors"
 	"math"
+	"strings"
 	"testing"
 	"time"

@@ -204,8 +206,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
 		t.Fatalf("expected %v, got %v (similarity: %f)", expected[0:5], res.Embeddings[0][0:5], sim)
 	}

-	if res.PromptEvalCount != 6 {
-		t.Fatalf("expected 6 prompt tokens, got %d", res.PromptEvalCount)
+	if res.PromptEvalCount != 8 {
+		t.Fatalf("expected 8 prompt tokens, got %d", res.PromptEvalCount)
 	}
 }

@@ -251,8 +253,8 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
 		t.Fatalf("expected %v, got %v (similarity: %f)", expected[1][0:5], res.Embeddings[1][0:5], sim)
 	}

-	if res.PromptEvalCount != 12 {
-		t.Fatalf("expected 12 prompt tokens, got %d", res.PromptEvalCount)
+	if res.PromptEvalCount != 16 {
+		t.Fatalf("expected 16 prompt tokens, got %d", res.PromptEvalCount)
 	}
 }

@@ -275,7 +277,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 	cases := []struct {
 		name    string
 		request api.EmbedRequest
-		check   func(*api.EmbedResponse, error)
+		check   func(*testing.T, *api.EmbedResponse, error)
 	}{
 		{
 			name: "target truncation",
@@ -283,7 +285,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Model: "all-minilm",
 				Input: "why",
 			},
-			check: func(got *api.EmbedResponse, err error) {
+			check: func(t *testing.T, got *api.EmbedResponse, err error) {
 				if err != nil {
 					t.Fatal(err)
 				}
@@ -300,10 +302,11 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Input:   "why is the sky blue?",
 				Options: map[string]any{"num_ctx": 3},
 			},
-			check: func(got *api.EmbedResponse, err error) {
+			check: func(t *testing.T, got *api.EmbedResponse, err error) {
 				if err != nil {
 					t.Fatal(err)
 				}
+				t.Logf("PromptEvalCount: want=%d got=%d", want.PromptEvalCount, got.PromptEvalCount)
 				if diff := cmp.Diff(want.Embeddings[0], got.Embeddings[0]); diff != "" {
 					t.Errorf("embedding mismatch (-want +got):\n%s", diff)
 				}
@@ -317,10 +320,11 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Truncate: &truncTrue,
 				Options:  map[string]any{"num_ctx": 3},
 			},
-			check: func(got *api.EmbedResponse, err error) {
+			check: func(t *testing.T, got *api.EmbedResponse, err error) {
 				if err != nil {
 					t.Fatal(err)
 				}
+				t.Logf("PromptEvalCount: want=%d got=%d", want.PromptEvalCount, got.PromptEvalCount)
 				if diff := cmp.Diff(want.Embeddings[0], got.Embeddings[0]); diff != "" {
 					t.Errorf("embedding mismatch (-want +got):\n%s", diff)
 				}
@@ -334,21 +338,21 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Truncate: &truncFalse,
 				Options:  map[string]any{"num_ctx": 3},
 			},
-			check: func(res *api.EmbedResponse, err error) {
-				if err.Error() != "input exceeds maximum context length" {
+			check: func(t *testing.T, res *api.EmbedResponse, err error) {
+				if err.Error() != "the input length exceeds the context length" {
 					t.Fatalf("expected truncation error, got: %v", err)
 				}
 			},
 		},
 		{
-			name: "input after truncate error",
+			name: "input after truncate error with context length of 1",
 			request: api.EmbedRequest{
 				Model:    "all-minilm",
 				Input:    "why is the sky blue?",
 				Truncate: &truncTrue,
 				Options:  map[string]any{"num_ctx": 1},
 			},
-			check: func(res *api.EmbedResponse, err error) {
+			check: func(t *testing.T, res *api.EmbedResponse, err error) {
 				if err.Error() != "input after truncation exceeds maximum context length" {
 					t.Fatalf("expected truncation error, got: %v", err)
 				}
@@ -362,7 +366,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Truncate: &truncTrue,
 				Options:  map[string]any{"num_ctx": 0},
 			},
-			check: func(res *api.EmbedResponse, err error) {
+			check: func(t *testing.T, res *api.EmbedResponse, err error) {
 				if err.Error() != "input after truncation exceeds maximum context length" {
 					t.Fatalf("expected truncation error, got: %v", err)
 				}
@@ -375,7 +379,7 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 				Input:   "why is the sky blue? Why is the sky blue? hi there my",
 				Options: map[string]any{"num_ctx": 16},
 			},
-			check: func(res *api.EmbedResponse, err error) {
+			check: func(t *testing.T, res *api.EmbedResponse, err error) {
 				if err != nil {
 					t.Fatal(err)
 				}
@@ -385,7 +389,8 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {

 	for _, req := range cases {
 		t.Run(req.name, func(t *testing.T) {
-			req.check(embedTestHelper(ctx, client, t, req.request))
+			resp, err := embedTestHelper(ctx, client, t, req.request)
+			req.check(t, resp, err)
 		})
 	}
 }
@@ -409,3 +414,230 @@ func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req

 	return client.Embed(ctx, &req)
 }
+
+func TestEmbedTruncation(t *testing.T) {
+	// Use test deadline if set, otherwise default to 2 minutes
+	timeout := 2 * time.Minute
+	if deadline, ok := t.Deadline(); ok {
+		timeout = time.Until(deadline) - 10*time.Second // Reserve 10s buffer
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+
+	for _, model := range libraryEmbedModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			// Check if we're running out of time (reserve 20s for current model)
+			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
+				t.Skip("skipping remaining tests to avoid timeout")
+			}
+
+			// Give each model its own budget to account for first-time pulls/loads
+			mctx, mcancel := context.WithTimeout(ctx, 3*time.Minute)
+			defer mcancel()
+
+			t.Run("truncation batch", func(t *testing.T) {
+				truncTrue := true
+				req := api.EmbedRequest{
+					Model:    model,
+					Input:    []string{"short", strings.Repeat("long ", 100), "medium text"},
+					Truncate: &truncTrue,
+					Options:  map[string]any{"num_ctx": 30},
+				}
+
+				res, err := embedTestHelper(mctx, client, t, req)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				if len(res.Embeddings) != 3 {
+					t.Fatalf("expected 3 embeddings, got %d", len(res.Embeddings))
+				}
+
+				if res.PromptEvalCount > 90 {
+					t.Fatalf("expected tokens <= 90 (3 × 30 max), got %d", res.PromptEvalCount)
+				}
+			})
+
+			t.Run("runner token count accuracy", func(t *testing.T) {
+				baseline := api.EmbedRequest{Model: model, Input: "test"}
+				baseRes, err := embedTestHelper(mctx, client, t, baseline)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				batch := api.EmbedRequest{
+					Model: model,
+					Input: []string{"test", "test", "test"},
+				}
+				batchRes, err := embedTestHelper(mctx, client, t, batch)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				expectedCount := baseRes.PromptEvalCount * 3
+				if batchRes.PromptEvalCount < expectedCount-2 || batchRes.PromptEvalCount > expectedCount+2 {
+					t.Fatalf("expected ~%d tokens (3 × %d), got %d",
+						expectedCount, baseRes.PromptEvalCount, batchRes.PromptEvalCount)
+				}
+			})
+		})
+	}
+}
+
+// TestEmbedLargeInput tests that embedding models can handle large inputs that would exceed typical batch sizes.
+func TestEmbedLargeInput(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+	defer cancel()
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+
+	for _, model := range libraryEmbedModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
+			defer mcancel()
+
+			// Test with progressively larger inputs
+			testCases := []struct {
+				name       string
+				inputWords int
+			}{
+				{"medium_input_256_words", 256},
+				{"large_input_512_words", 512},
+				{"very_large_input_800_words", 800},
+			}
+
+			for _, tc := range testCases {
+				t.Run(tc.name, func(t *testing.T) {
+					words := make([]string, tc.inputWords)
+					for i := range words {
+						words[i] = "word"
+					}
+					input := strings.Join(words, " ")
+
+					req := api.EmbedRequest{
+						Model:     model,
+						Input:     input,
+						KeepAlive: &api.Duration{Duration: 30 * time.Second},
+					}
+
+					res, err := embedTestHelper(mctx, client, t, req)
+					if err != nil {
+						t.Fatalf("embedding failed for %d words: %v", tc.inputWords, err)
+					}
+
+					if len(res.Embeddings) != 1 {
+						t.Fatalf("expected 1 embedding, got %d", len(res.Embeddings))
+					}
+
+					if len(res.Embeddings[0]) == 0 {
+						t.Fatal("expected non-empty embedding")
+					}
+
+					t.Logf("Successfully embedded %d words (%d tokens)", tc.inputWords, res.PromptEvalCount)
+				})
+			}
+		})
+	}
+}
+
+// TestEmbedStatusCode tests that errors from the embedding endpoint
+// properly preserve their HTTP status codes when returned to the client.
+// This test specifically checks the error handling path in EmbedHandler
+// where api.StatusError errors should maintain their original status code.
+func TestEmbedStatusCode(t *testing.T) {
+	// Use test deadline if set, otherwise default to 2 minutes
+	timeout := 2 * time.Minute
+	if deadline, ok := t.Deadline(); ok {
+		timeout = time.Until(deadline) - 10*time.Second // Reserve 10s buffer
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+
+	for _, model := range libraryEmbedModels {
+		model := model
+		t.Run(model, func(t *testing.T) {
+			// Check if we're running out of time (reserve 20s for current model)
+			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
+				t.Skip("skipping remaining tests to avoid timeout")
+			}
+
+			mctx, mcancel := context.WithTimeout(ctx, 3*time.Minute)
+			defer mcancel()
+
+			// Pull the model if needed
+			if err := PullIfMissing(mctx, client, model); err != nil {
+				t.Fatal(err)
+			}
+
+			t.Run("truncation error status code", func(t *testing.T) {
+				truncFalse := false
+				longInput := strings.Repeat("word ", 100)
+
+				req := api.EmbedRequest{
+					Model:    model,
+					Input:    longInput,
+					Truncate: &truncFalse,
+					Options:  map[string]any{"num_ctx": 10},
+				}
+
+				_, err := embedTestHelper(mctx, client, t, req)
+				if err == nil {
+					t.Fatal("expected error when truncate=false with long input")
+				}
+
+				// Check that it's a StatusError with the correct status code
+				var statusErr api.StatusError
+				if !errors.As(err, &statusErr) {
+					t.Fatalf("expected api.StatusError, got %T: %v", err, err)
+				}
+
+				// The error should be a 4xx client error (likely 400 Bad Request)
+				// not a 500 Internal Server Error
+				if statusErr.StatusCode < 400 || statusErr.StatusCode >= 500 {
+					t.Errorf("expected 4xx status code, got %d", statusErr.StatusCode)
+				}
+
+				// Verify the error message is meaningful
+				if !strings.Contains(err.Error(), "context length") {
+					t.Errorf("expected error message to mention context length, got: %v", err)
+				}
+			})
+
+			t.Run("batch truncation error status code", func(t *testing.T) {
+				truncFalse := false
+				req := api.EmbedRequest{
+					Model: model,
+					Input: []string{
+						"short input",
+						strings.Repeat("very long input ", 100),
+						"another short input",
+					},
+					Truncate: &truncFalse,
+					Options:  map[string]any{"num_ctx": 10},
+				}
+
+				_, err := embedTestHelper(mctx, client, t, req)
+				if err == nil {
+					t.Fatal("expected error when one input exceeds context with truncate=false")
+				}
+
+				// Check that it's a StatusError with the correct status code
+				var statusErr api.StatusError
+				if !errors.As(err, &statusErr) {
+					t.Fatalf("expected api.StatusError, got %T: %v", err, err)
+				}
+
+				// The error should be a 4xx client error, not a 500 Internal Server Error
+				if statusErr.StatusCode < 400 || statusErr.StatusCode >= 500 {
+					t.Errorf("expected 4xx status code, got %d", statusErr.StatusCode)
+				}
+			})
+		})
+	}
+}
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -33,6 +33,9 @@ func TestVisionModels(t *testing.T) {
 			// Qwen 3 VL mixture of experts
 			model: "qwen3-vl:30b",
 		},
+		{
+			model: "ministral-3",
+		},
 	}

 	for _, v := range testCases {
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -11,6 +11,15 @@ import (
 	"github.com/ollama/ollama/api"
 )

+// testPropsMap creates a ToolPropertiesMap from a map (convenience function for tests)
+func testPropsMap(m map[string]api.ToolProperty) *api.ToolPropertiesMap {
+	props := api.NewToolPropertiesMap()
+	for k, v := range m {
+		props.Set(k, v)
+	}
+	return props
+}
+
 func TestAPIToolCalling(t *testing.T) {
 	initialTimeout := 60 * time.Second
 	streamTimeout := 60 * time.Second
@@ -30,6 +39,7 @@ func TestAPIToolCalling(t *testing.T) {
 		"mistral":       6,
 		"qwen2.5":       6,
 		"qwen2":         6,
+		"ministral-3":   20,
 		"mistral-nemo":  9,
 		"mistral-small": 16,
 		"mixtral:8x22b": 80,
@@ -56,12 +66,12 @@ func TestAPIToolCalling(t *testing.T) {
 						Parameters: api.ToolFunctionParameters{
 							Type:     "object",
 							Required: []string{"location"},
-							Properties: map[string]api.ToolProperty{
+							Properties: testPropsMap(map[string]api.ToolProperty{
 								"location": {
 									Type:        api.PropertyType{"string"},
 									Description: "The city and state, e.g. San Francisco, CA",
 								},
-							},
+							}),
 						},
 					},
 				},
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -38,6 +38,7 @@ var (

 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
+		"ministral-3",
 		"qwen3-coder:30b",
 		"gpt-oss:20b",
 		"gemma3n:e2b",
@@ -167,6 +168,7 @@ var (
 		"medllama2",
 		"megadolphin",
 		"minicpm-v",
+		"ministral-3",
 		"mistral-large",
 		"mistral-nemo",
 		"mistral-openorca",
@@ -270,6 +272,7 @@ var (
 		"mistral",
 		"qwen2.5",
 		"qwen2",
+		"ministral-3",
 		"mistral-nemo",
 		"mistral-small",
 		"mixtral:8x22b",
--- a/internal/orderedmap/orderedmap.go
+++ b/internal/orderedmap/orderedmap.go
@@ -0,0 +1,94 @@
+// Package orderedmap provides a generic ordered map that maintains insertion order.
+// It wraps github.com/wk8/go-ordered-map/v2 to encapsulate the dependency.
+package orderedmap
+
+import (
+	"encoding/json"
+	"iter"
+
+	orderedmap "github.com/wk8/go-ordered-map/v2"
+)
+
+// Map is a generic ordered map that maintains insertion order.
+type Map[K comparable, V any] struct {
+	om *orderedmap.OrderedMap[K, V]
+}
+
+// New creates a new empty ordered map.
+func New[K comparable, V any]() *Map[K, V] {
+	return &Map[K, V]{
+		om: orderedmap.New[K, V](),
+	}
+}
+
+// Get retrieves a value by key.
+func (m *Map[K, V]) Get(key K) (V, bool) {
+	if m == nil || m.om == nil {
+		var zero V
+		return zero, false
+	}
+	return m.om.Get(key)
+}
+
+// Set sets a key-value pair. If the key already exists, its value is updated
+// but its position in the iteration order is preserved. If the key is new,
+// it is appended to the end.
+func (m *Map[K, V]) Set(key K, value V) {
+	if m == nil {
+		return
+	}
+	if m.om == nil {
+		m.om = orderedmap.New[K, V]()
+	}
+	m.om.Set(key, value)
+}
+
+// Len returns the number of entries.
+func (m *Map[K, V]) Len() int {
+	if m == nil || m.om == nil {
+		return 0
+	}
+	return m.om.Len()
+}
+
+// All returns an iterator over all key-value pairs in insertion order.
+func (m *Map[K, V]) All() iter.Seq2[K, V] {
+	return func(yield func(K, V) bool) {
+		if m == nil || m.om == nil {
+			return
+		}
+		for pair := m.om.Oldest(); pair != nil; pair = pair.Next() {
+			if !yield(pair.Key, pair.Value) {
+				return
+			}
+		}
+	}
+}
+
+// ToMap converts to a regular Go map.
+// Note: The resulting map does not preserve order.
+func (m *Map[K, V]) ToMap() map[K]V {
+	if m == nil || m.om == nil {
+		return nil
+	}
+	result := make(map[K]V, m.om.Len())
+	for pair := m.om.Oldest(); pair != nil; pair = pair.Next() {
+		result[pair.Key] = pair.Value
+	}
+	return result
+}
+
+// MarshalJSON implements json.Marshaler. The JSON output preserves key order.
+func (m *Map[K, V]) MarshalJSON() ([]byte, error) {
+	if m == nil || m.om == nil {
+		return []byte("null"), nil
+	}
+	return json.Marshal(m.om)
+}
+
+// UnmarshalJSON implements json.Unmarshaler. The insertion order matches the
+// order of keys in the JSON input.
+func (m *Map[K, V]) UnmarshalJSON(data []byte) error {
+	m.om = orderedmap.New[K, V]()
+	return json.Unmarshal(data, &m.om)
+}
--- a/internal/orderedmap/orderedmap_test.go
+++ b/internal/orderedmap/orderedmap_test.go
@@ -0,0 +1,348 @@
+package orderedmap
+
+import (
+	"encoding/json"
+	"slices"
+	"testing"
+)
+
+func TestMap_BasicOperations(t *testing.T) {
+	m := New[string, int]()
+
+	// Test empty map
+	if m.Len() != 0 {
+		t.Errorf("expected Len() = 0, got %d", m.Len())
+	}
+	v, ok := m.Get("a")
+	if ok {
+		t.Error("expected Get on empty map to return false")
+	}
+	if v != 0 {
+		t.Errorf("expected zero value, got %d", v)
+	}
+
+	// Test Set and Get
+	m.Set("a", 1)
+	m.Set("b", 2)
+	m.Set("c", 3)
+
+	if m.Len() != 3 {
+		t.Errorf("expected Len() = 3, got %d", m.Len())
+	}
+
+	v, ok = m.Get("a")
+	if !ok || v != 1 {
+		t.Errorf("expected Get(a) = (1, true), got (%d, %v)", v, ok)
+	}
+
+	v, ok = m.Get("b")
+	if !ok || v != 2 {
+		t.Errorf("expected Get(b) = (2, true), got (%d, %v)", v, ok)
+	}
+
+	v, ok = m.Get("c")
+	if !ok || v != 3 {
+		t.Errorf("expected Get(c) = (3, true), got (%d, %v)", v, ok)
+	}
+
+	// Test updating existing key preserves position
+	m.Set("a", 10)
+	v, ok = m.Get("a")
+	if !ok || v != 10 {
+		t.Errorf("expected Get(a) = (10, true), got (%d, %v)", v, ok)
+	}
+	if m.Len() != 3 {
+		t.Errorf("expected Len() = 3 after update, got %d", m.Len())
+	}
+}
+
+func TestMap_InsertionOrderPreserved(t *testing.T) {
+	m := New[string, int]()
+
+	// Insert in non-alphabetical order
+	m.Set("z", 1)
+	m.Set("a", 2)
+	m.Set("m", 3)
+	m.Set("b", 4)
+
+	// Verify iteration order matches insertion order
+	var keys []string
+	var values []int
+	for k, v := range m.All() {
+		keys = append(keys, k)
+		values = append(values, v)
+	}
+
+	expectedKeys := []string{"z", "a", "m", "b"}
+	expectedValues := []int{1, 2, 3, 4}
+
+	if !slices.Equal(keys, expectedKeys) {
+		t.Errorf("expected keys %v, got %v", expectedKeys, keys)
+	}
+	if !slices.Equal(values, expectedValues) {
+		t.Errorf("expected values %v, got %v", expectedValues, values)
+	}
+}
+
+func TestMap_UpdatePreservesPosition(t *testing.T) {
+	m := New[string, int]()
+
+	m.Set("first", 1)
+	m.Set("second", 2)
+	m.Set("third", 3)
+
+	// Update middle element
+	m.Set("second", 20)
+
+	var keys []string
+	for k := range m.All() {
+		keys = append(keys, k)
+	}
+
+	// Order should still be first, second, third
+	expected := []string{"first", "second", "third"}
+	if !slices.Equal(keys, expected) {
+		t.Errorf("expected keys %v, got %v", expected, keys)
+	}
+}
+
+func TestMap_MarshalJSON_PreservesOrder(t *testing.T) {
+	m := New[string, int]()
+
+	// Insert in non-alphabetical order
+	m.Set("z", 1)
+	m.Set("a", 2)
+	m.Set("m", 3)
+
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("Marshal failed: %v", err)
+	}
+
+	// JSON should preserve insertion order, not alphabetical
+	expected := `{"z":1,"a":2,"m":3}`
+	if string(data) != expected {
+		t.Errorf("expected %s, got %s", expected, string(data))
+	}
+}
+
+func TestMap_UnmarshalJSON_PreservesOrder(t *testing.T) {
+	// JSON with non-alphabetical key order
+	jsonData := `{"z":1,"a":2,"m":3}`
+
+	m := New[string, int]()
+	if err := json.Unmarshal([]byte(jsonData), m); err != nil {
+		t.Fatalf("Unmarshal failed: %v", err)
+	}
+
+	// Verify iteration order matches JSON order
+	var keys []string
+	for k := range m.All() {
+		keys = append(keys, k)
+	}
+
+	expected := []string{"z", "a", "m"}
+	if !slices.Equal(keys, expected) {
+		t.Errorf("expected keys %v, got %v", expected, keys)
+	}
+}
+
+func TestMap_JSONRoundTrip(t *testing.T) {
+	// Test that unmarshal -> marshal produces identical JSON
+	original := `{"zebra":"z","apple":"a","mango":"m","banana":"b"}`
+
+	m := New[string, string]()
+	if err := json.Unmarshal([]byte(original), m); err != nil {
+		t.Fatalf("Unmarshal failed: %v", err)
+	}
+
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("Marshal failed: %v", err)
+	}
+
+	if string(data) != original {
+		t.Errorf("round trip failed: expected %s, got %s", original, string(data))
+	}
+}
+
+func TestMap_ToMap(t *testing.T) {
+	m := New[string, int]()
+	m.Set("a", 1)
+	m.Set("b", 2)
+
+	regular := m.ToMap()
+
+	if len(regular) != 2 {
+		t.Errorf("expected len 2, got %d", len(regular))
+	}
+	if regular["a"] != 1 {
+		t.Errorf("expected regular[a] = 1, got %d", regular["a"])
+	}
+	if regular["b"] != 2 {
+		t.Errorf("expected regular[b] = 2, got %d", regular["b"])
+	}
+}
+
+func TestMap_NilSafety(t *testing.T) {
+	var m *Map[string, int]
+
+	// All operations should be safe on nil
+	if m.Len() != 0 {
+		t.Errorf("expected Len() = 0 on nil map, got %d", m.Len())
+	}
+
+	v, ok := m.Get("a")
+	if ok {
+		t.Error("expected Get on nil map to return false")
+	}
+	if v != 0 {
+		t.Errorf("expected zero value from nil map, got %d", v)
+	}
+
+	// Set on nil is a no-op
+	m.Set("a", 1)
+	if m.Len() != 0 {
+		t.Errorf("expected Len() = 0 after Set on nil, got %d", m.Len())
+	}
+
+	// All returns empty iterator
+	var keys []string
+	for k := range m.All() {
+		keys = append(keys, k)
+	}
+	if len(keys) != 0 {
+		t.Errorf("expected empty iteration on nil map, got %v", keys)
+	}
+
+	// ToMap returns nil
+	if m.ToMap() != nil {
+		t.Error("expected ToMap to return nil on nil map")
+	}
+
+	// MarshalJSON returns null
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("Marshal failed: %v", err)
+	}
+	if string(data) != "null" {
+		t.Errorf("expected null, got %s", string(data))
+	}
+}
+
+func TestMap_EmptyMapMarshal(t *testing.T) {
+	m := New[string, int]()
+
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("Marshal failed: %v", err)
+	}
+	if string(data) != "{}" {
+		t.Errorf("expected {}, got %s", string(data))
+	}
+}
+
+func TestMap_NestedValues(t *testing.T) {
+	m := New[string, any]()
+	m.Set("string", "hello")
+	m.Set("number", 42)
+	m.Set("bool", true)
+	m.Set("nested", map[string]int{"x": 1})
+
+	data, err := json.Marshal(m)
+	if err != nil {
+		t.Fatalf("Marshal failed: %v", err)
+	}
+
+	expected := `{"string":"hello","number":42,"bool":true,"nested":{"x":1}}`
+	if string(data) != expected {
+		t.Errorf("expected %s, got %s", expected, string(data))
+	}
+}
+
+func TestMap_AllIteratorEarlyExit(t *testing.T) {
+	m := New[string, int]()
+	m.Set("a", 1)
+	m.Set("b", 2)
+	m.Set("c", 3)
+	m.Set("d", 4)
+
+	// Collect only first 2
+	var keys []string
+	for k := range m.All() {
+		keys = append(keys, k)
+		if len(keys) == 2 {
+			break
+		}
+	}
+
+	expected := []string{"a", "b"}
+	if !slices.Equal(keys, expected) {
+		t.Errorf("expected %v, got %v", expected, keys)
+	}
+}
+
+func TestMap_IntegerKeys(t *testing.T) {
+	m := New[int, string]()
+	m.Set(3, "three")
+	m.Set(1, "one")
+	m.Set(2, "two")
+
+	var keys []int
+	for k := range m.All() {
+		keys = append(keys, k)
+	}
+
+	// Should preserve insertion order, not numerical order
+	expected := []int{3, 1, 2}
+	if !slices.Equal(keys, expected) {
+		t.Errorf("expected %v, got %v", expected, keys)
+	}
+}
+
+func TestMap_UnmarshalIntoExisting(t *testing.T) {
+	m := New[string, int]()
+	m.Set("existing", 999)
+
+	// Unmarshal should replace contents
+	if err := json.Unmarshal([]byte(`{"new":1}`), m); err != nil {
+		t.Fatalf("Unmarshal failed: %v", err)
+	}
+
+	_, ok := m.Get("existing")
+	if ok {
+		t.Error("existing key should be gone after unmarshal")
+	}
+
+	v, ok := m.Get("new")
+	if !ok || v != 1 {
+		t.Errorf("expected Get(new) = (1, true), got (%d, %v)", v, ok)
+	}
+}
+
+func TestMap_LargeOrderPreservation(t *testing.T) {
+	m := New[string, int]()
+
+	// Create many keys in specific order
+	keys := make([]string, 100)
+	for i := range 100 {
+		keys[i] = string(rune('a' + (99 - i))) // reverse order: 'd', 'c', 'b', 'a' (extended)
+		if i >= 26 {
+			keys[i] = string(rune('A'+i-26)) + string(rune('a'+i%26))
+		}
+	}
+
+	for i, k := range keys {
+		m.Set(k, i)
+	}
+
+	// Verify order preserved
+	var resultKeys []string
+	for k := range m.All() {
+		resultKeys = append(resultKeys, k)
+	}
+
+	if !slices.Equal(keys, resultKeys) {
+		t.Error("large map should preserve insertion order")
+	}
+}
--- a/kvcache/causal.go
+++ b/kvcache/causal.go
@@ -140,10 +140,6 @@ func (c *Causal) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity
 		c.config.CachePadding = 1
 	}

-	if c.config.MaskBatchPadding == 0 {
-		c.config.MaskBatchPadding = 1
-	}
-
 	if c.config.MaskDType == ml.DTypeOther {
 		c.config.MaskDType = ml.DTypeF32
 	}
@@ -364,15 +360,12 @@ func roundUp(length, pad int) int {
 // token in the history should apply. This is based on both the sequence and causality (the
 // position of the history is not ahead of the token in the batch).
 func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
-	// Align and pad the two dimensions as required by the backend
-	batchSize := roundUp(c.curBatchSize, c.config.MaskBatchPadding)
-
 	c.curCellRange.min = roundDown(c.curCellRange.min, c.config.CachePadding)
 	c.curCellRange.max = roundUp(c.curCellRange.max+1, c.config.CachePadding) - 1

 	length := c.curCellRange.max - c.curCellRange.min + 1

-	mask := make([]float32, batchSize*length)
+	mask := make([]float32, c.curBatchSize*length)

 	for i := range c.curBatchSize {
 		enabled := !slices.Contains(c.opts.Except, i)
@@ -386,13 +379,7 @@ func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
 		}
 	}

-	// Mask out any padding tokens we added. For padding that we added to the cache history, this
-	// has already been masked out because the sequence doesn't match.
-	for i := c.curBatchSize * length; i < len(mask); i++ {
-		mask[i] = float32(math.Inf(-1))
-	}
-
-	maskTensor := ctx.Input().FromFloats(mask, length, batchSize)
+	maskTensor := ctx.Input().FromFloats(mask, length, c.curBatchSize)

 	if c.config.MaskDType != ml.DTypeF32 {
 		maskTensor = maskTensor.Cast(ctx, c.config.MaskDType)
--- a/Show More
+++ b/Show More