move tokenizer to separate package

2026-02-02 19:54:33 -05:00 · 2026-01-28 14:25:31 -08:00
72 changed files with 807 additions and 2813 deletions
--- a/README.md
+++ b/README.md
@@ -358,7 +358,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
 - [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
- [Screenpipe](https://github.com/mediar-ai/screenpipe) (24/7 screen & mic recording with AI-powered search, uses Ollama for local LLM features)
 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
 - [IntelliBar](https://intellibar.app/) (AI-powered assistant for macOS)
@@ -466,7 +465,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Clueless](https://github.com/KashyapTan/clueless) (Open Source & Local Cluely: A desktop application LLM assistant to help you talk to anything on your screen using locally served Ollama models. Also undetectable to screenshare)
 - [ollama-co2](https://github.com/carbonatedWaterOrg/ollama-co2) (FastAPI web interface for monitoring and managing local and remote Ollama servers with real-time model monitoring and concurrent downloads)
 - [Hillnote](https://hillnote.com) (A Markdown-first workspace designed to supercharge your AI workflow. Create documents ready to integrate with Claude, ChatGPT, Gemini, Cursor, and more - all while keeping your work on your device.)
- [Stakpak](https://github.com/stakpak/agent) (An open source, vendor neutral DevOps agent that works with any model, and any stack, for teams who just want to ship)

 ### Cloud

--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1888,7 +1888,7 @@ func NewCLI() *cobra.Command {
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
-		Short:   "Start Ollama",
+		Short:   "Start ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -6,8 +6,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // Claude implements Runner for Claude Code integration
@@ -52,7 +50,7 @@ func (c *Claude) Run(model string) error {
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = append(os.Environ(),
-		"ANTHROPIC_BASE_URL="+envconfig.Host().String(),
+		"ANTHROPIC_BASE_URL=http://localhost:11434",
 		"ANTHROPIC_API_KEY=",
 		"ANTHROPIC_AUTH_TOKEN=ollama",
 	)
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -7,8 +7,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"slices"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // Droid implements Runner and Editor for Droid integration
@@ -119,7 +117,7 @@ func (d *Droid) Edit(models []string) error {
 		newModels = append(newModels, modelEntry{
 			Model:           model,
 			DisplayName:     model,
-			BaseURL:         envconfig.Host().String() + "/v1",
+			BaseURL:         "http://localhost:11434/v1",
 			APIKey:          "ollama",
 			Provider:        "generic-chat-completion-api",
 			MaxOutputTokens: 64000,
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
@@ -218,7 +218,7 @@ func TestDroidEdit(t *testing.T) {
 			}
 		}

-		if model["baseUrl"] != "http://127.0.0.1:11434/v1" {
+		if model["baseUrl"] != "http://localhost:11434/v1" {
 			t.Errorf("unexpected baseUrl: %s", model["baseUrl"])
 		}
 		if model["apiKey"] != "ollama" {
@@ -447,7 +447,7 @@ const testDroidSettingsFixture = `{
    {
      "model": "existing-ollama-model",
      "displayName": "existing-ollama-model",
-      "baseUrl": "http://127.0.0.1:11434/v1",
+      "baseUrl": "http://localhost:11434/v1",
      "apiKey": "ollama",
      "provider": "generic-chat-completion-api",
      "maxOutputTokens": 64000,
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -13,7 +13,6 @@ import (
 	"time"

 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/progress"
 	"github.com/spf13/cobra"
 )

@@ -42,26 +41,9 @@ type Editor interface {
 // integrations is the registry of available integrations.
 var integrations = map[string]Runner{
 	"claude":   &Claude{},
-	"clawdbot": &Openclaw{},
 	"codex":    &Codex{},
-	"moltbot":  &Openclaw{},
 	"droid":    &Droid{},
 	"opencode": &OpenCode{},
-	"openclaw": &Openclaw{},
-}
-
-// recommendedModels are shown when the user has no models or as suggestions.
-// Order matters: local models first, then cloud models.
-var recommendedModels = []selectItem{
-	{Name: "glm-4.7-flash", Description: "Recommended (requires ~25GB VRAM)"},
-	{Name: "glm-4.7:cloud", Description: "recommended"},
-	{Name: "kimi-k2.5:cloud", Description: "recommended"},
-}
-
-// integrationAliases are hidden from the interactive selector but work as CLI arguments.
-var integrationAliases = map[string]bool{
-	"clawdbot": true,
-	"moltbot":  true,
 }

 func selectIntegration() (string, error) {
@@ -72,9 +54,6 @@ func selectIntegration() (string, error) {
 	names := slices.Sorted(maps.Keys(integrations))
 	var items []selectItem
 	for _, name := range names {
-		if integrationAliases[name] {
-			continue
-		}
 		r := integrations[name]
 		description := r.String()
 		if conn, err := loadIntegration(name); err == nil && len(conn.Models) > 0 {
@@ -103,25 +82,62 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
 		return nil, err
 	}

-	var existing []modelInfo
-	for _, m := range models.Models {
-		existing = append(existing, modelInfo{Name: m.Name, Remote: m.RemoteModel != ""})
+	if len(models.Models) == 0 {
+		return nil, fmt.Errorf("no models available, run 'ollama pull <model>' first")
 	}

+	var items []selectItem
+	cloudModels := make(map[string]bool)
+	for _, m := range models.Models {
+		if m.RemoteModel != "" {
+			cloudModels[m.Name] = true
+		}
+		items = append(items, selectItem{Name: m.Name})
+	}
+
+	if len(items) == 0 {
+		return nil, fmt.Errorf("no local models available, run 'ollama pull <model>' first")
+	}
+
+	// Get previously configured models (saved config takes precedence)
 	var preChecked []string
 	if saved, err := loadIntegration(name); err == nil {
 		preChecked = saved.Models
 	} else if editor, ok := r.(Editor); ok {
 		preChecked = editor.Models()
 	}
-
-	items, preChecked, existingModels, cloudModels := buildModelList(existing, preChecked, current)
-
-	if len(items) == 0 {
-		return nil, fmt.Errorf("no models available")
+	checked := make(map[string]bool, len(preChecked))
+	for _, n := range preChecked {
+		checked[n] = true
 	}

+	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
+	for _, item := range items {
+		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
+			current = item.Name
+			break
+		}
+	}
+
+	// If current model is configured, move to front of preChecked
+	if checked[current] {
+		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
+	}
+
+	// Sort: checked first, then alphabetical
+	slices.SortFunc(items, func(a, b selectItem) int {
+		ac, bc := checked[a.Name], checked[b.Name]
+		if ac != bc {
+			if ac {
+				return -1
+			}
+			return 1
+		}
+		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
+	})
+
 	var selected []string
+	// only editors support multi-model selection
 	if _, ok := r.(Editor); ok {
 		selected, err = multiSelectPrompt(fmt.Sprintf("Select models for %s:", r), items, preChecked)
 		if err != nil {
@@ -135,27 +151,7 @@ func selectModels(ctx context.Context, name, current string) ([]string, error) {
 		selected = []string{model}
 	}

-	var toPull []string
-	for _, m := range selected {
-		if !existingModels[m] {
-			toPull = append(toPull, m)
-		}
-	}
-	if len(toPull) > 0 {
-		msg := fmt.Sprintf("Download %s?", strings.Join(toPull, ", "))
-		if ok, err := confirmPrompt(msg); err != nil {
-			return nil, err
-		} else if !ok {
-			return nil, errCancelled
-		}
-		for _, m := range toPull {
-			fmt.Fprintf(os.Stderr, "\n")
-			if err := pullModel(ctx, client, m); err != nil {
-				return nil, fmt.Errorf("failed to pull %s: %w", m, err)
-			}
-		}
-	}
-
+	// if any model in selected is a cloud model, ensure signed in
 	var selectedCloudModels []string
 	for _, m := range selected {
 		if cloudModels[m] {
@@ -249,7 +245,6 @@ Supported integrations:
  codex     Codex
  droid     Droid
  opencode  OpenCode
-  openclaw  OpenClaw (aliases: clawdbot, moltbot)

 Examples:
  ollama launch
@@ -278,6 +273,7 @@ Examples:
 				return fmt.Errorf("unknown integration: %s", name)
 			}

+			// If launching without --model, use saved config if available
 			if !configFlag && modelFlag == "" {
 				if config, err := loadIntegration(name); err == nil && len(config.Models) > 0 {
 					return runIntegration(name, config.Models[0])
@@ -286,6 +282,7 @@ Examples:

 			var models []string
 			if modelFlag != "" {
+				// When --model is specified, merge with existing models (new model becomes default)
 				models = []string{modelFlag}
 				if existing, err := loadIntegration(name); err == nil && len(existing.Models) > 0 {
 					for _, m := range existing.Models {
@@ -354,150 +351,3 @@ Examples:
 	cmd.Flags().BoolVar(&configFlag, "config", false, "Configure without launching")
 	return cmd
 }
-
-type modelInfo struct {
-	Name   string
-	Remote bool
-}
-
-// buildModelList merges existing models with recommendations, sorts them, and returns
-// the ordered items along with maps of existing and cloud model names.
-func buildModelList(existing []modelInfo, preChecked []string, current string) (items []selectItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
-	existingModels = make(map[string]bool)
-	cloudModels = make(map[string]bool)
-	recommended := make(map[string]bool)
-	var hasLocalModel, hasCloudModel bool
-
-	for _, rec := range recommendedModels {
-		recommended[rec.Name] = true
-	}
-
-	for _, m := range existing {
-		existingModels[m.Name] = true
-		if m.Remote {
-			cloudModels[m.Name] = true
-			hasCloudModel = true
-		} else {
-			hasLocalModel = true
-		}
-		displayName := strings.TrimSuffix(m.Name, ":latest")
-		existingModels[displayName] = true
-		item := selectItem{Name: displayName}
-		if recommended[displayName] {
-			item.Description = "recommended"
-		}
-		items = append(items, item)
-	}
-
-	for _, rec := range recommendedModels {
-		if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
-			continue
-		}
-		items = append(items, rec)
-		if isCloudModel(rec.Name) {
-			cloudModels[rec.Name] = true
-		}
-	}
-
-	checked := make(map[string]bool, len(preChecked))
-	for _, n := range preChecked {
-		checked[n] = true
-	}
-
-	// Resolve current to full name (e.g., "llama3.2" -> "llama3.2:latest")
-	for _, item := range items {
-		if item.Name == current || strings.HasPrefix(item.Name, current+":") {
-			current = item.Name
-			break
-		}
-	}
-
-	if checked[current] {
-		preChecked = append([]string{current}, slices.DeleteFunc(preChecked, func(m string) bool { return m == current })...)
-	}
-
-	// Non-existing models get "install?" suffix and are pushed to the bottom.
-	// When user has no models, preserve recommended order.
-	notInstalled := make(map[string]bool)
-	for i := range items {
-		if !existingModels[items[i].Name] {
-			notInstalled[items[i].Name] = true
-			items[i].Description = "recommended, install?"
-		}
-	}
-
-	if hasLocalModel || hasCloudModel {
-		slices.SortStableFunc(items, func(a, b selectItem) int {
-			ac, bc := checked[a.Name], checked[b.Name]
-			aNew, bNew := notInstalled[a.Name], notInstalled[b.Name]
-
-			if ac != bc {
-				if ac {
-					return -1
-				}
-				return 1
-			}
-			if !ac && !bc && aNew != bNew {
-				if aNew {
-					return 1
-				}
-				return -1
-			}
-			return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
-		})
-	}
-
-	return items, preChecked, existingModels, cloudModels
-}
-
-func isCloudModel(name string) bool {
-	return strings.HasSuffix(name, ":cloud")
-}
-
-func pullModel(ctx context.Context, client *api.Client, model string) error {
-	p := progress.NewProgress(os.Stderr)
-	defer p.Stop()
-
-	bars := make(map[string]*progress.Bar)
-	var status string
-	var spinner *progress.Spinner
-
-	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != "" {
-			if resp.Completed == 0 {
-				return nil
-			}
-
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			bar, ok := bars[resp.Digest]
-			if !ok {
-				name, isDigest := strings.CutPrefix(resp.Digest, "sha256:")
-				name = strings.TrimSpace(name)
-				if isDigest {
-					name = name[:min(12, len(name))]
-				}
-				bar = progress.NewBar(fmt.Sprintf("pulling %s:", name), resp.Total, resp.Completed)
-				bars[resp.Digest] = bar
-				p.Add(resp.Digest, bar)
-			}
-
-			bar.Set(resp.Completed)
-		} else if status != resp.Status {
-			if spinner != nil {
-				spinner.Stop()
-			}
-
-			status = resp.Status
-			spinner = progress.NewSpinner(status)
-			p.Add(status, spinner)
-		}
-
-		return nil
-	}
-
-	request := api.PullRequest{Name: model}
-	return client.Pull(ctx, &request, fn)
-}
--- a/cmd/config/integrations_test.go
+++ b/cmd/config/integrations_test.go
@@ -5,7 +5,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/google/go-cmp/cmp"
 	"github.com/spf13/cobra"
 )

@@ -175,226 +174,15 @@ func TestLaunchCmd_NilHeartbeat(t *testing.T) {
 func TestAllIntegrations_HaveRequiredMethods(t *testing.T) {
 	for name, r := range integrations {
 		t.Run(name, func(t *testing.T) {
+			// Test String() doesn't panic and returns non-empty
 			displayName := r.String()
 			if displayName == "" {
 				t.Error("String() should not return empty")
 			}
+
+			// Test Run() exists (we can't call it without actually running the command)
+			// Just verify the method is available
 			var _ func(string) error = r.Run
 		})
 	}
 }
-
-func TestIsCloudModel(t *testing.T) {
-	tests := []struct {
-		name string
-		want bool
-	}{
-		{"glm-4.7:cloud", true},
-		{"kimi-k2.5:cloud", true},
-		{"glm-4.7-flash", false},
-		{"glm-4.7-flash:latest", false},
-		{"cloud-model", false},
-		{"model:cloudish", false},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := isCloudModel(tt.name); got != tt.want {
-				t.Errorf("isCloudModel(%q) = %v, want %v", tt.name, got, tt.want)
-			}
-		})
-	}
-}
-
-func names(items []selectItem) []string {
-	var out []string
-	for _, item := range items {
-		out = append(out, item.Name)
-	}
-	return out
-}
-
-func TestBuildModelList_NoExistingModels(t *testing.T) {
-	items, _, _, _ := buildModelList(nil, nil, "")
-
-	want := []string{"glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, names(items)); diff != "" {
-		t.Errorf("with no existing models, items should be recommended in order (-want +got):\n%s", diff)
-	}
-
-	for _, item := range items {
-		if item.Description != "recommended, install?" {
-			t.Errorf("item %q should have description 'install?', got %q", item.Name, item.Description)
-		}
-	}
-}
-
-func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "qwen2.5:latest", Remote: false},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"llama3.2", "qwen2.5", "glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("cloud recs should be at bottom (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	want := []string{"glm-4.7:cloud", "llama3.2", "glm-4.7-flash", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("mixed models should be alphabetical (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_PreCheckedFirst(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
-	got := names(items)
-
-	if got[0] != "llama3.2" {
-		t.Errorf("pre-checked model should be first, got %v", got)
-	}
-}
-
-func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-
-	for _, item := range items {
-		switch item.Name {
-		case "glm-4.7-flash", "glm-4.7:cloud":
-			if item.Description != "recommended" {
-				t.Errorf("installed recommended %q should have description 'recommended', got %q", item.Name, item.Description)
-			}
-		case "kimi-k2.5:cloud":
-			if item.Description != "recommended, install?" {
-				t.Errorf("non-installed recommended %q should have description 'install?', got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// glm-4.7-flash and glm-4.7:cloud are installed so they sort normally;
-	// kimi-k2.5:cloud and qwen3:0.6b are not installed so they go to the bottom
-	want := []string{"glm-4.7-flash", "glm-4.7:cloud", "kimi-k2.5:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("existing cloud models should sort normally (-want +got):\n%s", diff)
-	}
-}
-
-func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "kimi-k2.5:cloud", Remote: true},
-	}
-
-	items, _, _, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// kimi-k2.5:cloud is installed so it sorts normally;
-	// the rest of the recommendations are not installed so they go to the bottom
-	want := []string{"kimi-k2.5:cloud", "llama3.2", "glm-4.7-flash", "glm-4.7:cloud"}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("only non-installed models should be at bottom (-want +got):\n%s", diff)
-	}
-
-	// Non-installed models should have "recommended, install?" description
-	for _, item := range items {
-		if !slices.Contains([]string{"kimi-k2.5:cloud", "llama3.2"}, item.Name) {
-			if item.Description != "recommended, install?" {
-				t.Errorf("non-installed %q should have description 'install?', got %q", item.Name, item.Description)
-			}
-		}
-	}
-}
-
-func TestBuildModelList_LatestTagStripped(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "glm-4.7-flash:latest", Remote: false},
-		{Name: "llama3.2:latest", Remote: false},
-	}
-
-	items, _, existingModels, _ := buildModelList(existing, nil, "")
-	got := names(items)
-
-	// :latest should be stripped from display names
-	for _, name := range got {
-		if strings.HasSuffix(name, ":latest") {
-			t.Errorf("name %q should not have :latest suffix", name)
-		}
-	}
-
-	// glm-4.7-flash should not be duplicated (existing :latest matches the recommendation)
-	count := 0
-	for _, name := range got {
-		if name == "glm-4.7-flash" {
-			count++
-		}
-	}
-	if count != 1 {
-		t.Errorf("glm-4.7-flash should appear exactly once, got %d in %v", count, got)
-	}
-
-	// Stripped name should be in existingModels so it won't be pulled
-	if !existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should be in existingModels")
-	}
-}
-
-func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
-	existing := []modelInfo{
-		{Name: "llama3.2:latest", Remote: false},
-		{Name: "glm-4.7:cloud", Remote: true},
-	}
-
-	_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
-
-	if !existingModels["llama3.2"] {
-		t.Error("llama3.2 should be in existingModels")
-	}
-	if !existingModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in existingModels")
-	}
-	if existingModels["glm-4.7-flash"] {
-		t.Error("glm-4.7-flash should not be in existingModels (it's a recommendation)")
-	}
-
-	if !cloudModels["glm-4.7:cloud"] {
-		t.Error("glm-4.7:cloud should be in cloudModels")
-	}
-	if !cloudModels["kimi-k2.5:cloud"] {
-		t.Error("kimi-k2.5:cloud should be in cloudModels (recommended cloud)")
-	}
-	if cloudModels["llama3.2"] {
-		t.Error("llama3.2 should not be in cloudModels")
-	}
-}
--- a/cmd/config/openclaw.go
+++ b/cmd/config/openclaw.go
@@ -1,254 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-type Openclaw struct{}
-
-func (c *Openclaw) String() string { return "OpenClaw" }
-
-const ansiGreen = "\033[32m"
-
-func (c *Openclaw) Run(model string) error {
-	bin := "openclaw"
-	if _, err := exec.LookPath(bin); err != nil {
-		bin = "clawdbot"
-		if _, err := exec.LookPath(bin); err != nil {
-			return fmt.Errorf("openclaw is not installed, install from https://docs.openclaw.ai")
-		}
-	}
-
-	models := []string{model}
-	if config, err := loadIntegration("openclaw"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	} else if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := c.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	if !c.onboarded() {
-		// Onboarding not completed: run it (model already set via Edit)
-		// Use "ollama" as gateway token for simple local access
-		cmd := exec.Command(bin, "onboard",
-			"--auth-choice", "skip",
-			"--gateway-token", "ollama",
-		)
-		cmd.Stdin = os.Stdin
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		return cmd.Run()
-	}
-
-	// Onboarding completed: run gateway
-	cmd := exec.Command(bin, "gateway")
-	cmd.Stdin = os.Stdin
-
-	// Capture output to detect "already running" message
-	var outputBuf bytes.Buffer
-	cmd.Stdout = io.MultiWriter(os.Stdout, &outputBuf)
-	cmd.Stderr = io.MultiWriter(os.Stderr, &outputBuf)
-
-	err := cmd.Run()
-	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
-		fmt.Fprintf(os.Stderr, "%sOpenClaw has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
-		return nil
-	}
-	return err
-}
-
-// onboarded checks if OpenClaw onboarding wizard was completed
-// by looking for the wizard.lastRunAt marker in the config
-func (c *Openclaw) onboarded() bool {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return false
-	}
-
-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else {
-		return false
-	}
-
-	// Check for wizard.lastRunAt marker (set when onboarding completes)
-	wizard, _ := config["wizard"].(map[string]any)
-	if wizard == nil {
-		return false
-	}
-	lastRunAt, _ := wizard["lastRunAt"].(string)
-	return lastRunAt != ""
-}
-
-func (c *Openclaw) Paths() []string {
-	home, _ := os.UserHomeDir()
-	p := filepath.Join(home, ".openclaw", "openclaw.json")
-	if _, err := os.Stat(p); err == nil {
-		return []string{p}
-	}
-	legacy := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if _, err := os.Stat(legacy); err == nil {
-		return []string{legacy}
-	}
-	return nil
-}
-
-func (c *Openclaw) Edit(models []string) error {
-	if len(models) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	configPath := filepath.Join(home, ".openclaw", "openclaw.json")
-	legacyPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
-		return err
-	}
-
-	// Read into map[string]any to preserve unknown fields
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	} else if data, err := os.ReadFile(legacyPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	}
-
-	// Navigate/create: models.providers.ollama (preserving other providers)
-	modelsSection, _ := config["models"].(map[string]any)
-	if modelsSection == nil {
-		modelsSection = make(map[string]any)
-	}
-	providers, _ := modelsSection["providers"].(map[string]any)
-	if providers == nil {
-		providers = make(map[string]any)
-	}
-	ollama, _ := providers["ollama"].(map[string]any)
-	if ollama == nil {
-		ollama = make(map[string]any)
-	}
-
-	ollama["baseUrl"] = envconfig.Host().String() + "/v1"
-	// needed to register provider
-	ollama["apiKey"] = "ollama-local"
-	// TODO(parthsareen): potentially move to responses
-	ollama["api"] = "openai-completions"
-
-	// Build map of existing models to preserve user customizations
-	existingModels, _ := ollama["models"].([]any)
-	existingByID := make(map[string]map[string]any)
-	for _, m := range existingModels {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				existingByID[id] = entry
-			}
-		}
-	}
-
-	var newModels []any
-	for _, model := range models {
-		entry := map[string]any{
-			"id":        model,
-			"name":      model,
-			"reasoning": false,
-			"input":     []any{"text"},
-			"cost": map[string]any{
-				"input":      0,
-				"output":     0,
-				"cacheRead":  0,
-				"cacheWrite": 0,
-			},
-			// TODO(parthsareen): get these values from API
-			"contextWindow": 131072,
-			"maxTokens":     16384,
-		}
-		// Merge existing fields (user customizations)
-		if existing, ok := existingByID[model]; ok {
-			for k, v := range existing {
-				if _, isNew := entry[k]; !isNew {
-					entry[k] = v
-				}
-			}
-		}
-		newModels = append(newModels, entry)
-	}
-	ollama["models"] = newModels
-
-	providers["ollama"] = ollama
-	modelsSection["providers"] = providers
-	config["models"] = modelsSection
-
-	// Update agents.defaults.model.primary (preserving other agent settings)
-	agents, _ := config["agents"].(map[string]any)
-	if agents == nil {
-		agents = make(map[string]any)
-	}
-	defaults, _ := agents["defaults"].(map[string]any)
-	if defaults == nil {
-		defaults = make(map[string]any)
-	}
-	modelConfig, _ := defaults["model"].(map[string]any)
-	if modelConfig == nil {
-		modelConfig = make(map[string]any)
-	}
-	modelConfig["primary"] = "ollama/" + models[0]
-	defaults["model"] = modelConfig
-	agents["defaults"] = defaults
-	config["agents"] = agents
-
-	data, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(configPath, data)
-}
-
-func (c *Openclaw) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	config, err := readJSONFile(filepath.Join(home, ".openclaw", "openclaw.json"))
-	if err != nil {
-		config, err = readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
-		if err != nil {
-			return nil
-		}
-	}
-
-	modelsSection, _ := config["models"].(map[string]any)
-	providers, _ := modelsSection["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-
-	var result []string
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				result = append(result, id)
-			}
-		}
-	}
-	return result
-}
--- a/cmd/config/openclaw_test.go
+++ b/cmd/config/openclaw_test.go
@@ -1,878 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestOpenclawIntegration(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "OpenClaw" {
-			t.Errorf("String() = %q, want %q", got, "OpenClaw")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = c
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = c
-	})
-}
-
-func TestOpenclawEdit(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("fresh install", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("multiple models - first is primary", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelExists(t, configPath, "mistral")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("preserve other providers", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"anthropic":{"apiKey":"xxx"}}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		if providers["anthropic"] == nil {
-			t.Error("anthropic provider was removed")
-		}
-	})
-
-	t.Run("preserve top-level keys", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"theme":"dark","mcp":{"servers":{}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("theme was removed")
-		}
-		if cfg["mcp"] == nil {
-			t.Error("mcp was removed")
-		}
-	})
-
-	t.Run("preserve user customizations on models", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2"})
-
-		// User adds custom field
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		ollama := providers["ollama"].(map[string]any)
-		modelList := ollama["models"].([]any)
-		entry := modelList[0].(map[string]any)
-		entry["customField"] = "user-value"
-		configData, _ := json.MarshalIndent(cfg, "", "  ")
-		os.WriteFile(configPath, configData, 0o644)
-
-		// Re-run Edit
-		c.Edit([]string{"llama3.2"})
-
-		data, _ = os.ReadFile(configPath)
-		json.Unmarshal(data, &cfg)
-		models = cfg["models"].(map[string]any)
-		providers = models["providers"].(map[string]any)
-		ollama = providers["ollama"].(map[string]any)
-		modelList = ollama["models"].([]any)
-		entry = modelList[0].(map[string]any)
-		if entry["customField"] != "user-value" {
-			t.Error("custom field was lost")
-		}
-	})
-
-	t.Run("edit replaces models list", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2", "mistral"})
-		c.Edit([]string{"llama3.2"})
-
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-		assertOpenclawModelNotExists(t, configPath, "mistral")
-	})
-
-	t.Run("empty models is no-op", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		original := `{"existing":"data"}`
-		os.WriteFile(configPath, []byte(original), 0o644)
-
-		c.Edit([]string{})
-
-		data, _ := os.ReadFile(configPath)
-		if string(data) != original {
-			t.Error("empty models should not modify file")
-		}
-	})
-
-	t.Run("corrupted JSON treated as empty", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		if err := json.Unmarshal(data, &cfg); err != nil {
-			t.Error("result should be valid JSON")
-		}
-	})
-
-	t.Run("wrong type models section", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"not a map"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2")
-	})
-}
-
-func TestOpenclawModels(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("no config returns nil", func(t *testing.T) {
-		if models := c.Models(); len(models) > 0 {
-			t.Errorf("expected nil/empty, got %v", models)
-		}
-	})
-
-	t.Run("returns all ollama models", func(t *testing.T) {
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[
-				{"id":"llama3.2"},
-				{"id":"mistral"}
-			]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 2 {
-			t.Errorf("expected 2 models, got %v", models)
-		}
-	})
-}
-
-// Helper functions
-func assertOpenclawModelExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				return
-			}
-		}
-	}
-	t.Errorf("model %s not found", model)
-}
-
-func assertOpenclawModelNotExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models, _ := cfg["models"].(map[string]any)
-	providers, _ := models["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				t.Errorf("model %s should not exist", model)
-			}
-		}
-	}
-}
-
-func assertOpenclawPrimaryModel(t *testing.T, path, expected string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	agents := cfg["agents"].(map[string]any)
-	defaults := agents["defaults"].(map[string]any)
-	model := defaults["model"].(map[string]any)
-	if model["primary"] != expected {
-		t.Errorf("primary model = %v, want %v", model["primary"], expected)
-	}
-}
-
-func TestOpenclawPaths(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("returns path when config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Errorf("expected 1 path, got %d", len(paths))
-		}
-	})
-
-	t.Run("returns nil when config missing", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if paths := c.Paths(); paths != nil {
-			t.Errorf("expected nil, got %v", paths)
-		}
-	})
-}
-
-func TestOpenclawModelsEdgeCases(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("corrupted JSON returns nil", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at models level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"string"}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at providers level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":"string"}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at ollama level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":"string"}}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("model entry missing id", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"name":"test"}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for missing id")
-		}
-	})
-
-	t.Run("model id is not string", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"id":123}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for non-string id")
-		}
-	})
-}
-
-func TestOpenclawEditSchemaFields(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-
-	if err := c.Edit([]string{"llama3.2"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	entry := modelList[0].(map[string]any)
-
-	// Verify required schema fields
-	if entry["reasoning"] != false {
-		t.Error("reasoning should be false")
-	}
-	if entry["input"] == nil {
-		t.Error("input should be set")
-	}
-	if entry["contextWindow"] == nil {
-		t.Error("contextWindow should be set")
-	}
-	if entry["maxTokens"] == nil {
-		t.Error("maxTokens should be set")
-	}
-	cost := entry["cost"].(map[string]any)
-	if cost["cacheRead"] == nil {
-		t.Error("cost.cacheRead should be set")
-	}
-	if cost["cacheWrite"] == nil {
-		t.Error("cost.cacheWrite should be set")
-	}
-}
-
-func TestOpenclawEditModelNames(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".openclaw")) }
-
-	t.Run("model with colon tag", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2:70b"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "llama3.2:70b")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/llama3.2:70b")
-	})
-
-	t.Run("model with slash", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"library/model:tag"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "library/model:tag")
-		assertOpenclawPrimaryModel(t, configPath, "ollama/library/model:tag")
-	})
-
-	t.Run("model with hyphen", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"test-model"}); err != nil {
-			t.Fatal(err)
-		}
-		assertOpenclawModelExists(t, configPath, "test-model")
-	})
-}
-
-func TestOpenclawEditAgentsPreservation(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("preserve other agent defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{"model":{"primary":"old"},"temperature":0.7}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		defaults := agents["defaults"].(map[string]any)
-		if defaults["temperature"] != 0.7 {
-			t.Error("temperature setting was lost")
-		}
-	})
-
-	t.Run("preserve other agents besides defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{},"custom-agent":{"foo":"bar"}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		if agents["custom-agent"] == nil {
-			t.Error("custom-agent was lost")
-		}
-	})
-}
-
-const testOpenclawFixture = `{
-  "theme": "dark",
-  "mcp": {"servers": {"custom": {"enabled": true}}},
-  "models": {
-    "providers": {
-      "anthropic": {"apiKey": "xxx"},
-      "ollama": {
-        "baseUrl": "http://127.0.0.1:11434/v1",
-        "models": [{"id": "old-model", "customField": "preserved"}]
-      }
-    }
-  },
-  "agents": {
-    "defaults": {"model": {"primary": "old"}, "temperature": 0.7},
-    "custom-agent": {"foo": "bar"}
-  }
-}`
-
-func TestOpenclawEdit_RoundTrip(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-
-	// Verify top-level preserved
-	if cfg["theme"] != "dark" {
-		t.Error("theme not preserved")
-	}
-	mcp := cfg["mcp"].(map[string]any)
-	servers := mcp["servers"].(map[string]any)
-	if servers["custom"] == nil {
-		t.Error("mcp.servers.custom not preserved")
-	}
-
-	// Verify other providers preserved
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	if providers["anthropic"] == nil {
-		t.Error("anthropic provider not preserved")
-	}
-
-	// Verify agents preserved
-	agents := cfg["agents"].(map[string]any)
-	if agents["custom-agent"] == nil {
-		t.Error("custom-agent not preserved")
-	}
-	defaults := agents["defaults"].(map[string]any)
-	if defaults["temperature"] != 0.7 {
-		t.Error("temperature not preserved")
-	}
-}
-
-func TestOpenclawEdit_Idempotent(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	firstData, _ := os.ReadFile(configPath)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	secondData, _ := os.ReadFile(configPath)
-
-	if string(firstData) != string(secondData) {
-		t.Error("repeated edits with same models produced different results")
-	}
-}
-
-func TestOpenclawEdit_MultipleConsecutiveEdits(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testOpenclawFixture), 0o644)
-
-	for i := range 10 {
-		models := []string{"model-a", "model-b"}
-		if i%2 == 0 {
-			models = []string{"model-x", "model-y", "model-z"}
-		}
-		if err := c.Edit(models); err != nil {
-			t.Fatalf("edit %d failed: %v", i, err)
-		}
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatalf("file is not valid JSON after multiple edits: %v", err)
-	}
-
-	if cfg["theme"] != "dark" {
-		t.Error("theme lost after multiple edits")
-	}
-}
-
-func TestOpenclawEdit_BackupCreated(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-	configPath := filepath.Join(configDir, "openclaw.json")
-	backupDir := filepath.Join(os.TempDir(), "ollama-backups")
-
-	os.MkdirAll(configDir, 0o755)
-	uniqueMarker := fmt.Sprintf("test-marker-%d", os.Getpid())
-	original := fmt.Sprintf(`{"theme": "%s"}`, uniqueMarker)
-	os.WriteFile(configPath, []byte(original), 0o644)
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	backups, _ := filepath.Glob(filepath.Join(backupDir, "openclaw.json.*"))
-	foundBackup := false
-	for _, backup := range backups {
-		data, _ := os.ReadFile(backup)
-		if string(data) == original {
-			foundBackup = true
-			break
-		}
-	}
-
-	if !foundBackup {
-		t.Error("backup with original content not found")
-	}
-}
-
-func TestOpenclawClawdbotAlias(t *testing.T) {
-	for _, alias := range []string{"clawdbot", "moltbot"} {
-		t.Run(alias+" alias resolves to Openclaw runner", func(t *testing.T) {
-			r, ok := integrations[alias]
-			if !ok {
-				t.Fatalf("%s not found in integrations", alias)
-			}
-			if _, ok := r.(*Openclaw); !ok {
-				t.Errorf("%s integration is %T, want *Openclaw", alias, r)
-			}
-		})
-
-		t.Run(alias+" is hidden from selector", func(t *testing.T) {
-			if !integrationAliases[alias] {
-				t.Errorf("%s should be in integrationAliases", alias)
-			}
-		})
-	}
-}
-
-func TestOpenclawLegacyPaths(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("falls back to legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(legacyDir, "clawdbot.json") {
-			t.Errorf("expected legacy path, got %s", paths[0])
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Fatalf("expected 1 path, got %d", len(paths))
-		}
-		if paths[0] != filepath.Join(newDir, "openclaw.json") {
-			t.Errorf("expected new path, got %s", paths[0])
-		}
-	})
-
-	t.Run("Models reads from legacy path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"llama3.2"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "llama3.2" {
-			t.Errorf("expected [llama3.2], got %v", models)
-		}
-	})
-
-	t.Run("Models prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"new-model"}]}}}
-		}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[{"id":"legacy-model"}]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 1 || models[0] != "new-model" {
-			t.Errorf("expected [new-model], got %v", models)
-		}
-	})
-
-	t.Run("Edit reads new path over legacy when both exist", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{"theme":"new"}`), 0o644)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"legacy"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(filepath.Join(newDir, "openclaw.json"))
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "new" {
-			t.Errorf("expected theme from new config, got %v", cfg["theme"])
-		}
-	})
-
-	t.Run("Edit migrates from legacy config", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		// Should write to new path
-		newPath := filepath.Join(tmpDir, ".openclaw", "openclaw.json")
-		data, err := os.ReadFile(newPath)
-		if err != nil {
-			t.Fatal("expected new config file to be created")
-		}
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("legacy theme setting was not migrated")
-		}
-	})
-}
-
-func TestOpenclawEdit_CreatesDirectoryIfMissing(t *testing.T) {
-	c := &Openclaw{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".openclaw")
-
-	if _, err := os.Stat(configDir); !os.IsNotExist(err) {
-		t.Fatal("directory should not exist before test")
-	}
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	if _, err := os.Stat(configDir); os.IsNotExist(err) {
-		t.Fatal("directory was not created")
-	}
-}
-
-func TestOpenclawOnboarded(t *testing.T) {
-	c := &Openclaw{}
-
-	t.Run("returns false when no config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if c.onboarded() {
-			t.Error("expected false when no config exists")
-		}
-	})
-
-	t.Run("returns false when config exists but no wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"theme":"dark"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when no wizard section")
-		}
-	})
-
-	t.Run("returns false when wizard section exists but no lastRunAt", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is missing")
-		}
-	})
-
-	t.Run("returns false when wizard.lastRunAt is empty string", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":""}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard.lastRunAt is empty")
-		}
-	})
-
-	t.Run("returns true when wizard.lastRunAt is set", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when wizard.lastRunAt is set")
-		}
-	})
-
-	t.Run("checks legacy clawdbot path", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(legacyDir, 0o755)
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if !c.onboarded() {
-			t.Error("expected true when legacy config has wizard.lastRunAt")
-		}
-	})
-
-	t.Run("prefers new path over legacy", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		newDir := filepath.Join(tmpDir, ".openclaw")
-		legacyDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(newDir, 0o755)
-		os.MkdirAll(legacyDir, 0o755)
-		// New path has no wizard marker
-		os.WriteFile(filepath.Join(newDir, "openclaw.json"), []byte(`{}`), 0o644)
-		// Legacy has wizard marker
-		os.WriteFile(filepath.Join(legacyDir, "clawdbot.json"), []byte(`{"wizard":{"lastRunAt":"2024-01-01T00:00:00Z"}}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false - should prefer new path which has no wizard marker")
-		}
-	})
-
-	t.Run("handles corrupted JSON gracefully", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{corrupted`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false for corrupted JSON")
-		}
-	})
-
-	t.Run("handles wrong type for wizard section", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".openclaw")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(`{"wizard":"not a map"}`), 0o644)
-
-		if c.onboarded() {
-			t.Error("expected false when wizard is wrong type")
-		}
-	})
-}
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -9,8 +9,6 @@ import (
 	"path/filepath"
 	"slices"
 	"strings"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // OpenCode implements Runner and Editor for OpenCode integration
@@ -90,7 +88,7 @@ func (o *OpenCode) Edit(modelList []string) error {
 			"npm":  "@ai-sdk/openai-compatible",
 			"name": "Ollama (local)",
 			"options": map[string]any{
-				"baseURL": envconfig.Host().String() + "/v1",
+				"baseURL": "http://localhost:11434/v1",
 			},
 		}
 	}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -275,11 +275,7 @@ func parseInput(r io.Reader) (inputEvent, byte, error) {
 func renderSelect(w io.Writer, prompt string, s *selectState) int {
 	filtered := s.filtered()

-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1

 	if len(filtered) == 0 {
@@ -318,11 +314,7 @@ func renderSelect(w io.Writer, prompt string, s *selectState) int {
 func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 	filtered := s.filtered()

-	if s.filter == "" {
-		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
-	} else {
-		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
-	}
+	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
 	lineCount := 1

 	if len(filtered) == 0 {
@@ -353,15 +345,10 @@ func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 				suffix = " " + ansiGray + "(default)" + ansiReset
 			}

-			desc := ""
-			if item.Description != "" {
-				desc = " " + ansiGray + "- " + item.Description + ansiReset
-			}
-
 			if idx == s.highlighted && !s.focusOnButton {
-				fmt.Fprintf(w, "  %s%s %s %s%s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, desc, suffix)
+				fmt.Fprintf(w, "  %s%s %s %s%s%s\r\n", ansiBold, prefix, checkbox, item.Name, ansiReset, suffix)
 			} else {
-				fmt.Fprintf(w, "  %s %s %s%s%s\r\n", prefix, checkbox, item.Name, desc, suffix)
+				fmt.Fprintf(w, "  %s %s %s%s\r\n", prefix, checkbox, item.Name, suffix)
 			}
 			lineCount++
 		}
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -71,10 +71,6 @@
    {
      "source": "/api",
      "destination": "/api/introduction"
-    },
-    {
-      "source": "/integrations/clawdbot",
-      "destination": "/integrations/openclaw"
    }
  ],
  "navigation": {
@@ -107,7 +103,6 @@
            "pages": [
              "/integrations/claude-code",
              "/integrations/cline",
-              "/integrations/openclaw",
              "/integrations/codex",
              "/integrations/droid",
              "/integrations/goose",
--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@@ -10,7 +10,6 @@ Check your compute compatibility to see if your card is supported:

 | Compute Capability | Family              | Cards                                                                                                                          |
 | ------------------ | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| 12.1               | NVIDIA              | `GB10 (DGX Spark)`                                                                                                             |
 | 12.0               | GeForce RTX 50xx    | `RTX 5060` `RTX 5060 Ti` `RTX 5070` `RTX 5070 Ti` `RTX 5080` `RTX 5090`                                                        |
 |                    | NVIDIA Professional | `RTX PRO 4000 Blackwell` `RTX PRO 4500 Blackwell` `RTX PRO 5000 Blackwell` `RTX PRO 6000 Blackwell`                            |
 | 9.0                | NVIDIA              | `H200` `H100`                                                                                                                  |
@@ -164,4 +163,4 @@ To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
 described in the [FAQ](faq#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
-by setting `GGML_VK_VISIBLE_DEVICES=-1` 
+by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/docs/import.mdx
+++ b/docs/import.mdx
@@ -134,12 +134,22 @@ success

 ### Supported Quantizations

+- `q4_0`
+- `q4_1`
+- `q5_0`
+- `q5_1`
 - `q8_0`

 #### K-means Quantizations

+- `q3_K_S`
+- `q3_K_M`
+- `q3_K_L`
 - `q4_K_S`
 - `q4_K_M`
+- `q5_K_S`
+- `q5_K_M`
+- `q6_K`

 ## Sharing your model on ollama.com

--- a/docs/integrations/openclaw.mdx
+++ b/docs/integrations/openclaw.mdx
@@ -1,50 +0,0 @@
---
-title: OpenClaw
---
-
-OpenClaw is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.
-
-## Install
-
-Install [OpenClaw](https://openclaw.ai/) 
-
-```bash
-npm install -g openclaw@latest
-```
-
-Then run the onboarding wizard:
-
-```bash
-openclaw onboard --install-daemon
-```
-
-<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
-
-## Usage with Ollama
-
-### Quick setup
-
-```bash
-ollama launch openclaw
-```
-
-<Note>Previously known as Clawdbot. `ollama launch clawdbot` still works as an alias.</Note>
-
-This configures OpenClaw to use Ollama and starts the gateway.
-If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 
-
-
-To configure without launching:
-
-```shell
-ollama launch openclaw --config
-```
-
-## Recommended Models
-
- `qwen3-coder`
- `glm-4.7`
- `gpt-oss:20b`
- `gpt-oss:120b`
-
-Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
--- a/docs/integrations/opencode.mdx
+++ b/docs/integrations/opencode.mdx
@@ -9,7 +9,7 @@ OpenCode is an open-source AI coding assistant that runs in your terminal.
 Install the [OpenCode CLI](https://opencode.ai):

 ```bash
-curl -fsSL https://opencode.ai/install | bash
+curl -fsSL https://opencode.ai/install.sh | bash
 ```

 <Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -201,7 +201,7 @@ var (
 	// Enable the new Ollama engine
 	NewEngine = Bool("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
-	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 0)
+	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 	// Auth enables authentication between the Ollama client and server
 	UseAuth = Bool("OLLAMA_AUTH")
 	// Enable Vulkan backend
@@ -290,7 +290,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
 		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
 		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
@@ -282,7 +282,7 @@ func TestVar(t *testing.T) {

 func TestContextLength(t *testing.T) {
 	cases := map[string]uint{
-		"":     0,
+		"":     4096,
 		"2048": 2048,
 	}

--- a/llm/server.go
+++ b/llm/server.go
@@ -34,6 +34,7 @@ import (
 	"github.com/ollama/ollama/logutil"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/tokenizer"
 )

 type filteredEnv []string
@@ -80,7 +81,6 @@ type LlamaServer interface {
 	GetPort() int
 	GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
 	HasExited() bool
-	ContextLength() int
 }

 // llmServer is an instance of a runner hosting a single model
@@ -116,7 +116,7 @@ type llamaServer struct {
 type ollamaServer struct {
 	llmServer

-	textProcessor model.TextProcessor // textProcessor handles text encoding/decoding
+	tokenizer tokenizer.Tokenizer // textProcessor handles text encoding/decoding
 }

 // LoadModel will load a model from disk. The model must be in the GGML format.
@@ -142,11 +142,11 @@ func LoadModel(model string, maxArraySize int) (*ggml.GGML, error) {
 // NewLlamaServer will run a server for the given GPUs
 func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath string, f *ggml.GGML, adapters, projectors []string, opts api.Options, numParallel int) (LlamaServer, error) {
 	var llamaModel *llama.Model
-	var textProcessor model.TextProcessor
+	var tokenizer tokenizer.Tokenizer
 	var err error
 	if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
 		if len(projectors) == 0 {
-			textProcessor, err = model.NewTextProcessor(modelPath)
+			tokenizer, err = model.NewTextProcessor(modelPath)
 		} else {
 			err = errors.New("split vision models aren't supported")
 		}
@@ -155,7 +155,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 			slog.Debug("model not yet supported by Ollama engine, switching to compatibility mode", "model", modelPath, "error", err)
 		}
 	}
-	if textProcessor == nil {
+	if tokenizer == nil {
 		llamaModel, err = llama.LoadModelFromFile(modelPath, llama.ModelParams{VocabOnly: true})
 		if err != nil {
 			return nil, err
@@ -211,7 +211,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st

 	kvct := strings.ToLower(envconfig.KvCacheType())

-	if textProcessor == nil {
+	if tokenizer == nil {
 		flashAttention := ml.FlashAttentionAuto
 		if faUserSet {
 			if fa {
@@ -261,7 +261,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 	gpuLibs := ml.LibraryPaths(gpus)
 	status := NewStatusWriter(os.Stderr)
 	cmd, port, err := StartRunner(
-		textProcessor != nil,
+		tokenizer != nil,
 		modelPath,
 		gpuLibs,
 		status,
@@ -310,8 +310,8 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 		}
 	}()

-	if textProcessor != nil {
-		return &ollamaServer{llmServer: s, textProcessor: textProcessor}, nil
+	if tokenizer != nil {
+		return &ollamaServer{llmServer: s, tokenizer: tokenizer}, nil
 	} else {
 		return &llamaServer{llmServer: s, ggml: f}, nil
 	}
@@ -1201,8 +1201,7 @@ func (s *llmServer) initModel(ctx context.Context, req LoadRequest, operation Lo

 	resp, err := http.DefaultClient.Do(r)
 	if err != nil {
-		slog.Error("do load request", "error", err)
-		return nil, errors.New("model failed to load, this may be due to resource limitations or an internal error, check ollama server logs for details")
+		return nil, fmt.Errorf("do load request: %w", err)
 	}
 	defer resp.Body.Close()

@@ -1774,7 +1773,7 @@ func (s *llamaServer) Tokenize(ctx context.Context, content string) ([]int, erro
 }

 func (s *ollamaServer) Tokenize(ctx context.Context, content string) ([]int, error) {
-	tokens, err := s.textProcessor.Encode(content, false)
+	tokens, err := s.tokenizer.Encode(content, false)
 	if err != nil {
 		return nil, err
 	}
@@ -1809,7 +1808,7 @@ func (s *ollamaServer) Detokenize(ctx context.Context, tokens []int) (string, er
 		toks[i] = int32(t)
 	}

-	content, err := s.textProcessor.Decode(toks)
+	content, err := s.tokenizer.Decode(toks)
 	if err != nil {
 		return "", err
 	}
@@ -1903,10 +1902,6 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
 	return 0
 }

-func (s *llmServer) ContextLength() int {
-	return s.options.NumCtx
-}
-
 func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
 	devices, err := ml.GetDevicesFromRunner(ctx, s)
 	if err != nil {
--- a/model/ignore_test.go
+++ b/model/ignore_test.go
--- a/model/model.go
+++ b/model/model.go
@@ -23,6 +23,7 @@ import (
 	_ "github.com/ollama/ollama/ml/backend"
 	"github.com/ollama/ollama/ml/nn/pooling"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 var (
@@ -133,7 +134,7 @@ func New(modelPath string, params ml.BackendParams) (Model, error) {
 	return m, nil
 }

-func NewTextProcessor(s string) (TextProcessor, error) {
+func NewTextProcessor(s string) (tokenizer.Tokenizer, error) {
 	r, err := os.Open(s)
 	if err != nil {
 		return nil, err
@@ -150,7 +151,7 @@ func NewTextProcessor(s string) (TextProcessor, error) {
 		return nil, err
 	}

-	tp, ok := m.(TextProcessor)
+	tp, ok := m.(tokenizer.Tokenizer)
 	if !ok {
 		return nil, ErrUnsupportedTokenizer
 	}
--- a/model/models/bert/embed.go
+++ b/model/models/bert/embed.go
@@ -10,11 +10,12 @@ import (
 	"github.com/ollama/ollama/ml/nn/pooling"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	TokenEmbedding     *nn.Embedding `gguf:"token_embd"`
 	TypeEmbedding      *nn.Embedding `gguf:"token_types"`
@@ -129,7 +130,7 @@ func (o Options) headDim() int {
 }

 func New(c fs.Config) (model.Model, error) {
-	vocab := &model.Vocabulary{
+	vocab := &tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -153,17 +154,17 @@ func New(c fs.Config) (model.Model, error) {
 		},
 	}

-	var processor model.TextProcessor
+	var t tokenizer.Tokenizer
 	switch c.String("tokenizer.ggml.model", "bert") {
 	case "bert":
-		processor = model.NewWordPiece(vocab, true)
+		t = tokenizer.NewWordPiece(vocab, true)
 	default:
 		return nil, model.ErrUnsupportedTokenizer
 	}

 	return &Model{
-		TextProcessor: processor,
-		Layers:        make([]EncoderLayer, c.Uint("block_count")),
+		Tokenizer: t,
+		Layers:    make([]EncoderLayer, c.Uint("block_count")),
 		Options: Options{
 			hiddenSize:  int(c.Uint("embedding_length")),
 			numHeads:    int(c.Uint("attention.head_count")),
--- a/model/models/deepseek2/model.go
+++ b/model/models/deepseek2/model.go
@@ -13,6 +13,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -222,7 +223,7 @@ func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tens

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -277,8 +278,8 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/deepseekocr/model.go
+++ b/model/models/deepseekocr/model.go
@@ -10,11 +10,12 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	Sam    *samModel    `gguf:"s"`
 	Vision *visionModel `gguf:"v"`
@@ -134,8 +135,8 @@ func init() {
 		}

 		m := Model{
-			TextProcessor: model.NewBytePairEncoding(
-				&model.Vocabulary{
+			Tokenizer: tokenizer.NewBytePairEncoding(
+				&tokenizer.Vocabulary{
 					Values: c.Strings("tokenizer.ggml.tokens"),
 					Types:  c.Ints("tokenizer.ggml.token_type"),
 					Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/gemma2/model.go
+++ b/model/models/gemma2/model.go
@@ -10,6 +10,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -27,7 +28,7 @@ func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions

 type Model struct {
 	model.Base
-	model.SentencePiece
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -43,8 +44,8 @@ const (

 func New(c fs.Config) (model.Model, error) {
 	m := Model{
-		SentencePiece: model.NewSentencePiece(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewSentencePiece(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Scores: c.Floats("tokenizer.ggml.scores"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
--- a/model/models/gemma3/embed.go
+++ b/model/models/gemma3/embed.go
@@ -7,11 +7,12 @@ import (
 	"github.com/ollama/ollama/ml/nn/pooling"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type embedModel struct {
 	model.Base
-	model.SentencePiece
+	tokenizer.Tokenizer

 	*TextModel
 	poolingType pooling.Type
@@ -31,8 +32,8 @@ func (m *embedModel) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, erro

 func newEmbedModel(c fs.Config) (model.Model, error) {
 	m := &embedModel{
-		SentencePiece: model.NewSentencePiece(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewSentencePiece(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Scores: c.Floats("tokenizer.ggml.scores"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
--- a/model/models/gemma3/model.go
+++ b/model/models/gemma3/model.go
@@ -12,11 +12,12 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	*VisionModel `gguf:"v"`
 	*TextModel
@@ -54,7 +55,7 @@ func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, i
 }

 func New(c fs.Config) (model.Model, error) {
-	vocabulary := model.Vocabulary{
+	vocabulary := tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -70,19 +71,19 @@ func New(c fs.Config) (model.Model, error) {
 		),
 	}

-	var processor model.TextProcessor
+	var t tokenizer.Tokenizer
 	switch c.String("tokenizer.ggml.model") {
 	case "gpt2":
-		processor = model.NewBytePairEncoding(&vocabulary)
+		t = tokenizer.NewBytePairEncoding(&vocabulary)
 	default:
 		// Previous uploads of Gemma 3 on Ollama did not have token 106
 		// (i.e. "<end_of_turn>") so we need to add in case it's not already present
 		vocabulary.EOS = append(vocabulary.EOS, int32(c.Uint("tokenizer.ggml.eot_token_id", 106)))
-		processor = model.NewSentencePiece(&vocabulary)
+		t = tokenizer.NewSentencePiece(&vocabulary)
 	}

 	m := Model{
-		TextProcessor:  processor,
+		Tokenizer:      t,
 		ImageProcessor: newImageProcessor(c),
 		VisionModel:    newVisionModel(c),
 		TextModel:      newTextModel(c),
--- a/model/models/gemma3n/model.go
+++ b/model/models/gemma3n/model.go
@@ -6,11 +6,12 @@ import (
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.SentencePiece
+	tokenizer.Tokenizer

 	*TextModel
 }
@@ -23,8 +24,8 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 func New(c fs.Config) (model.Model, error) {
 	m := Model{
 		TextModel: newTextModel(c),
-		SentencePiece: model.NewSentencePiece(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewSentencePiece(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Scores: c.Floats("tokenizer.ggml.scores"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
--- a/model/models/glm4moelite/model.go
+++ b/model/models/glm4moelite/model.go
@@ -10,6 +10,7 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 var ErrOldModelFormat = errors.New("this model uses a weight format that is no longer supported; please re-download it")
@@ -198,7 +199,7 @@ func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tens

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -236,8 +237,8 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/gptoss/model.go
+++ b/model/models/gptoss/model.go
@@ -12,11 +12,12 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Transformer struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	TokenEmbedding    *nn.Embedding      `gguf:"token_embd"`
 	TransformerBlocks []TransformerBlock `gguf:"blk"`
@@ -196,8 +197,8 @@ func (mlp *MLPBlock) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Optio
 func New(c fs.Config) (model.Model, error) {
 	m := Transformer{
 		TransformerBlocks: make([]TransformerBlock, c.Uint("block_count")),
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/lfm2/model.go
+++ b/model/models/lfm2/model.go
@@ -10,6 +10,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -59,7 +60,7 @@ func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -78,7 +79,7 @@ func New(c fs.Config) (model.Model, error) {
 		return nil, model.ErrUnsupportedTokenizer
 	}

-	vocabulary := model.Vocabulary{
+	vocabulary := tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -104,8 +105,8 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	m := Model{
-		TextProcessor: model.NewBytePairEncoding(&vocabulary, pretokenizers...),
-		Layers:        make([]Layer, c.Uint("block_count")),
+		Tokenizer: tokenizer.NewBytePairEncoding(&vocabulary, pretokenizers...),
+		Layers:    make([]Layer, c.Uint("block_count")),
 		Options: Options{
 			hiddenSize:            int(c.Uint("embedding_length")),
 			headDim:               int(c.Uint("attention.key_length")),
--- a/model/models/llama/model.go
+++ b/model/models/llama/model.go
@@ -11,6 +11,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -25,7 +26,7 @@ func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -41,8 +42,8 @@ func New(c fs.Config) (model.Model, error) {
 		return nil, model.ErrUnsupportedModel
 	}

-	var processor model.TextProcessor
-	vocabulary := model.Vocabulary{
+	var processor tokenizer.Tokenizer
+	vocabulary := tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -80,16 +81,16 @@ func New(c fs.Config) (model.Model, error) {
 				"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
 			}
 		}
-		processor = model.NewBytePairEncoding(&vocabulary, pretokenizers...)
+		processor = tokenizer.NewBytePairEncoding(&vocabulary, pretokenizers...)
 	case "llama":
-		processor = model.NewSentencePiece(&vocabulary)
+		processor = tokenizer.NewSentencePiece(&vocabulary)
 	default:
 		return nil, model.ErrUnsupportedTokenizer
 	}

 	m := Model{
-		TextProcessor: processor,
-		Layers:        make([]Layer, c.Uint("block_count")),
+		Tokenizer: processor,
+		Layers:    make([]Layer, c.Uint("block_count")),
 		Options: Options{
 			hiddenSize: int(c.Uint("embedding_length")),
 			numHeads:   int(c.Uint("attention.head_count")),
--- a/model/models/llama4/model.go
+++ b/model/models/llama4/model.go
@@ -11,11 +11,12 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer
 	ImageProcessor

 	*VisionModel `gguf:"v"`
@@ -33,8 +34,8 @@ func (p *Projector) Forward(ctx ml.Context, visionOutputs ml.Tensor) ml.Tensor {

 func New(c fs.Config) (model.Model, error) {
 	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/mistral3/model.go
+++ b/model/models/mistral3/model.go
@@ -11,11 +11,12 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	*TextModel
 	*VisionModel         `gguf:"v"`
@@ -28,12 +29,12 @@ type Model struct {
 var _ model.MultimodalProcessor = (*Model)(nil)

 // Implement TextProcessor interface
-var _ model.TextProcessor = (*Model)(nil)
+var _ tokenizer.Tokenizer = (*Model)(nil)

 func New(c fs.Config) (model.Model, error) {
 	m := &Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/mllama/model.go
+++ b/model/models/mllama/model.go
@@ -11,11 +11,12 @@ import (
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	*VisionModel `gguf:"v"`
 	*TextModel
@@ -32,8 +33,8 @@ const (

 func New(c fs.Config) (model.Model, error) {
 	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/nomicbert/model.go
+++ b/model/models/nomicbert/model.go
@@ -11,11 +11,12 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	TokenEmbedding     *nn.Embedding `gguf:"token_embd"`
 	TypeEmbedding      *nn.Embedding `gguf:"token_types"`
@@ -178,8 +179,8 @@ func New(c fs.Config) (model.Model, error) {
 	numHeads := int(c.Uint("attention.head_count"))
 	headDim := hiddenSize / numHeads

-	processor := model.NewWordPiece(
-		&model.Vocabulary{
+	tokenizer := tokenizer.NewWordPiece(
+		&tokenizer.Vocabulary{
 			Values: c.Strings("tokenizer.ggml.tokens"),
 			Scores: c.Floats("tokenizer.ggml.scores"),
 			Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -219,8 +220,8 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	return &Model{
-		TextProcessor: processor,
-		Layers:        layers,
+		Tokenizer: tokenizer,
+		Layers:    layers,
 		Options: Options{
 			hiddenSize:      hiddenSize,
 			numHeads:        numHeads,
--- a/model/models/olmo3/model.go
+++ b/model/models/olmo3/model.go
@@ -11,6 +11,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 const (
@@ -33,7 +34,7 @@ type Options struct {

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	Layers         []Layer       `gguf:"blk"`
@@ -44,7 +45,7 @@ type Model struct {
 }

 func New(c fs.Config) (model.Model, error) {
-	vocabulary := model.Vocabulary{
+	vocabulary := tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
@@ -58,14 +59,14 @@ func New(c fs.Config) (model.Model, error) {
 		),
 	}

-	processor := model.NewBytePairEncoding(
+	tokenizer := tokenizer.NewBytePairEncoding(
 		&vocabulary,
 		"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
 	)

 	m := Model{
-		TextProcessor: processor,
-		Layers:        make([]Layer, c.Uint("block_count")),
+		Tokenizer: tokenizer,
+		Layers:    make([]Layer, c.Uint("block_count")),
 		Options: Options{
 			hiddenSize:            int(c.Uint("embedding_length")),
 			numHeads:              int(c.Uint("attention.head_count")),
--- a/model/models/qwen2/model.go
+++ b/model/models/qwen2/model.go
@@ -13,6 +13,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -92,7 +93,7 @@ func (d DecoderLayer) Forward(ctx ml.Context, hiddenStates, positions, outputs m

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding  `gguf:"token_embd"`
 	Layers         []DecoderLayer `gguf:"blk"`
@@ -139,8 +140,8 @@ func New(c fs.Config) (model.Model, error) {
 	}
 	m := Model{
 		Layers: make([]DecoderLayer, c.Uint("block_count")),
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/qwen25vl/model.go
+++ b/model/models/qwen25vl/model.go
@@ -10,11 +10,12 @@ import (
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	*TextModel
 	*VisionModel `gguf:"v"`
@@ -27,8 +28,8 @@ var _ model.MultimodalProcessor = (*Model)(nil)

 func New(c fs.Config) (model.Model, error) {
 	m := &Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/qwen3/embed.go
+++ b/model/models/qwen3/embed.go
@@ -7,11 +7,12 @@ import (
 	"github.com/ollama/ollama/ml/nn/pooling"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type embedModel struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	*Model
 	poolingType pooling.Type
@@ -34,8 +35,8 @@ func newEmbed(c fs.Config) (model.Model, error) {
 		layers[i].MLP = &dense{}
 	}
 	m := embedModel{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/qwen3/model.go
+++ b/model/models/qwen3/model.go
@@ -12,6 +12,7 @@ import (
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Options struct {
@@ -159,7 +160,7 @@ func (d *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tens

 type Model struct {
 	model.Base
-	model.BytePairEncoding
+	tokenizer.Tokenizer

 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	OutputNorm     *nn.RMSNorm   `gguf:"output_norm"`
@@ -218,8 +219,8 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	m := Model{
-		BytePairEncoding: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/models/qwen3vl/model.go
+++ b/model/models/qwen3vl/model.go
@@ -10,11 +10,12 @@ import (
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
+	"github.com/ollama/ollama/tokenizer"
 )

 type Model struct {
 	model.Base
-	model.TextProcessor
+	tokenizer.Tokenizer

 	*TextModel
 	*VisionModel `gguf:"v"`
@@ -172,8 +173,8 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {

 func New(c fs.Config) (model.Model, error) {
 	m := Model{
-		TextProcessor: model.NewBytePairEncoding(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewBytePairEncoding(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
--- a/model/parsers/ministral.go
+++ b/model/parsers/ministral.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
-	"unicode"

 	"github.com/ollama/ollama/api"
 )
@@ -18,34 +17,12 @@ const (
 	ministralCollectingToolArgs
 )

-// ministralEvent represents an event emitted during parsing
-type ministralEvent interface {
-	isMinistralEvent()
-}
-
-type ministralEventContent struct {
-	content string
-}
-
-type ministralEventThinking struct {
-	thinking string
-}
-
-type ministralEventToolCall struct {
-	name string
-	args string // raw JSON string
-}
-
-func (ministralEventContent) isMinistralEvent()  {}
-func (ministralEventThinking) isMinistralEvent() {}
-func (ministralEventToolCall) isMinistralEvent() {}
-
 type MinistralParser struct {
 	state              ministralParserState
 	buffer             strings.Builder
 	tools              []api.Tool
 	hasThinkingSupport bool
-	pendingToolName    string // stores tool name while collecting args
+	currentTool        *api.Tool
 }

 func (p *MinistralParser) HasToolSupport() bool {
@@ -86,251 +63,74 @@ func toolByName(tools []api.Tool, n string) (*api.Tool, error) {
 	return nil, fmt.Errorf("tool '%s' not found", n)
 }

-const (
-	ministralToolCallsTag = "[TOOL_CALLS]"
-	ministralThinkTag     = "[THINK]"
-	ministralThinkEndTag  = "[/THINK]"
-	ministralArgsTag      = "[ARGS]"
-)
-
-// eat consumes the parser's buffer, and returns a list of any unambiguous
-// events from the current parser state. The second return value indicates
-// whether to keep looping (true when state transitions, false when waiting
-// for more data).
-func (p *MinistralParser) eat() ([]ministralEvent, bool) {
-	var events []ministralEvent
-
-	switch p.state {
-	case ministralCollectingContent:
-		bufStr := p.buffer.String()
-
-		// Check for [TOOL_CALLS] tag
-		if strings.Contains(bufStr, ministralToolCallsTag) {
-			split := strings.SplitN(bufStr, ministralToolCallsTag, 2)
-			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			if len(before) > 0 {
-				events = append(events, ministralEventContent{content: before})
-			}
-			after := split[1]
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingToolName
-			return events, true
-		}
-
-		// Check for [THINK] tag
-		if strings.Contains(bufStr, ministralThinkTag) {
-			split := strings.SplitN(bufStr, ministralThinkTag, 2)
-			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			if len(before) > 0 {
-				events = append(events, ministralEventContent{content: before})
-			}
-			after := split[1]
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingThinkingContent
-			return events, true
-		}
-
-		// Check for partial tag overlap with [TOOL_CALLS] or [THINK]
-		overlapToolCalls := overlap(bufStr, ministralToolCallsTag)
-		overlapThink := overlap(bufStr, ministralThinkTag)
-		maxOverlap := max(overlapToolCalls, overlapThink)
-
-		if maxOverlap > 0 {
-			// Withhold the potential partial tag
-			beforePartialTag := bufStr[:len(bufStr)-maxOverlap]
-			trailingWS := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingWS
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, ministralEventContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-		// No tag found: emit content but withhold trailing whitespace
-		whitespaceLen := trailingWhitespaceLen(bufStr)
-		ambiguousStart := len(bufStr) - whitespaceLen
-		unambiguous := bufStr[:ambiguousStart]
-		ambiguous := bufStr[ambiguousStart:]
-		p.buffer.Reset()
-		p.buffer.WriteString(ambiguous)
-		if len(unambiguous) > 0 {
-			events = append(events, ministralEventContent{content: unambiguous})
-		}
-		return events, false
-
-	case ministralCollectingThinkingContent:
-		bufStr := p.buffer.String()
-
-		if strings.Contains(bufStr, ministralThinkEndTag) {
-			split := strings.SplitN(bufStr, ministralThinkEndTag, 2)
-			thinkingContent := split[0]
-			after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			if len(thinkingContent) > 0 {
-				events = append(events, ministralEventThinking{thinking: thinkingContent})
-			}
-			p.state = ministralCollectingContent
-			return events, true
-		}
-
-		// Check for partial overlap with [/THINK]
-		if overlapLen := overlap(bufStr, ministralThinkEndTag); overlapLen > 0 {
-			unambiguous := bufStr[:len(bufStr)-overlapLen]
-			ambiguous := bufStr[len(bufStr)-overlapLen:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, ministralEventThinking{thinking: unambiguous})
-			}
-			return events, false
-		}
-
-		// No tag found: emit all thinking content
-		p.buffer.Reset()
-		if len(bufStr) > 0 {
-			events = append(events, ministralEventThinking{thinking: bufStr})
-		}
-		return events, false
-
-	case ministralCollectingToolName:
-		bufStr := p.buffer.String()
-
-		if strings.Contains(bufStr, ministralArgsTag) {
-			split := strings.SplitN(bufStr, ministralArgsTag, 2)
-			toolName := split[0]
-			after := split[1]
-			p.pendingToolName = toolName
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingToolArgs
-			return events, true
-		}
-		// Wait for more data
-		return events, false
-
-	case ministralCollectingToolArgs:
-		bufStr := p.buffer.String()
-		jsonEnd := findJSONEnd(bufStr)
-
-		if jsonEnd != -1 {
-			jsonStr := bufStr[:jsonEnd+1]
-			remaining := bufStr[jsonEnd+1:]
-
-			events = append(events, ministralEventToolCall{
-				name: p.pendingToolName,
-				args: jsonStr,
-			})
-
-			p.pendingToolName = ""
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = ministralCollectingContent
-			return events, true
-		}
-		// Wait for more data
-		return events, false
-
-	default:
-		panic("unexpected ministral event")
-	}
-}
-
-// parseEvents loops calling eat() until it returns false
-func (p *MinistralParser) parseEvents() []ministralEvent {
-	var all []ministralEvent
-	keepLooping := true
-	for keepLooping {
-		var events []ministralEvent
-		events, keepLooping = p.eat()
-		all = append(all, events...)
-	}
-	return all
-}
-
 func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
 	p.buffer.WriteString(s)

-	events := p.parseEvents()
-
-	var contentBuilder, thinkingBuilder strings.Builder
-	var toolCalls []api.ToolCall
-
-	for _, event := range events {
-		switch e := event.(type) {
-		case ministralEventContent:
-			contentBuilder.WriteString(e.content)
-		case ministralEventThinking:
-			thinkingBuilder.WriteString(e.thinking)
-		case ministralEventToolCall:
-			// Validate tool exists
-			tool, toolErr := toolByName(p.tools, e.name)
-			if toolErr != nil {
-				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, toolErr
+	switch p.state {
+	case ministralCollectingContent:
+		if strings.Contains(p.buffer.String(), "[TOOL_CALLS]") {
+			before, _ := splitAtTag(&p.buffer, "[TOOL_CALLS]", false)
+			if before != "" {
+				return before, "", calls, nil
 			}
-			// Parse JSON arguments
+			p.state = ministralCollectingToolName
+		} else if strings.Contains(p.buffer.String(), "[THINK]") {
+			p.state = ministralCollectingThinkingContent
+			return "", "", calls, nil
+		} else {
+			p.buffer.Reset()
+			return s, "", calls, nil
+		}
+	case ministralCollectingThinkingContent:
+		if strings.Contains(p.buffer.String(), "[/THINK]") {
+			thinkingContent, after := splitAtTag(&p.buffer, "[/THINK]", true)
+			p.state = ministralCollectingContent
+			if after != "" {
+				p.buffer.Reset()
+				return after, thinkingContent, calls, nil
+			}
+			return "", thinkingContent, calls, nil
+		} else {
+			p.buffer.Reset()
+			return "", s, calls, nil
+		}
+	case ministralCollectingToolName:
+		if strings.Contains(p.buffer.String(), "[ARGS]") {
+			name, _ := splitAtTag(&p.buffer, "[ARGS]", false)
+
+			t, err := toolByName(p.tools, name)
+			if err != nil {
+				return "", "", calls, err
+			}
+			p.currentTool = t
+			p.state = ministralCollectingToolArgs
+			return "", "", calls, nil
+		}
+		return "", "", calls, nil
+	case ministralCollectingToolArgs:
+		if strings.Contains(p.buffer.String(), "}") {
+			before, _ := splitAtTag(&p.buffer, "}", false)
+			before += "}"
+
 			var args api.ToolCallFunctionArguments
-			if jsonErr := json.Unmarshal([]byte(e.args), &args); jsonErr != nil {
-				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, jsonErr
+			if err := json.Unmarshal([]byte(before), &args); err != nil {
+				// todo - throw a better error
+				return "", "", calls, err
 			}
-			toolCalls = append(toolCalls, api.ToolCall{
+
+			p.state = ministralCollectingContent
+
+			call := api.ToolCall{
 				Function: api.ToolCallFunction{
-					Name:      tool.Function.Name,
+					Name:      p.currentTool.Function.Name,
 					Arguments: args,
 				},
-			})
+			}
+			calls = append(calls, call)
+			return "", "", calls, nil
 		}
+		return "", "", calls, nil
 	}

-	return contentBuilder.String(), thinkingBuilder.String(), toolCalls, nil
-}
-
-// findJSONEnd finds the index of the closing brace that completes a JSON object.
-// It properly handles nested objects, arrays, and strings (including escaped characters).
-// Returns -1 if the JSON is not yet complete.
-func findJSONEnd(s string) int {
-	depth := 0
-	inString := false
-	escaped := false
-
-	for i, r := range s {
-		if inString {
-			switch {
-			case escaped:
-				// If the previous character was a backslash, skip this character
-				escaped = false
-			case r == '\\':
-				// Mark the next character as escaped
-				escaped = true
-			case r == '"':
-				// End of string literal
-				inString = false
-			}
-			continue
-		}
-
-		switch r {
-		case '"':
-			// Start of string literal
-			inString = true
-		case '{', '[':
-			// Increase nesting level for objects and arrays
-			depth++
-		case '}', ']':
-			// Decrease nesting level
-			depth--
-			if depth == 0 {
-				// Reached the end of the root JSON structure
-				return i
-			}
-		}
-	}
-
-	return -1
+	return p.buffer.String(), thinking, calls, nil
 }
--- a/model/parsers/ministral_test.go
+++ b/model/parsers/ministral_test.go
@@ -1,545 +0,0 @@
-package parsers
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestMinistralParserStreaming(t *testing.T) {
-	type step struct {
-		input      string
-		wantEvents []ministralEvent
-	}
-
-	cases := []struct {
-		desc  string
-		tools []api.Tool
-		steps []step
-		think bool // whether to enable thinking support
-	}{
-		// Content streaming
-		{
-			desc: "simple content",
-			steps: []step{
-				{input: "Hello, how can I help you?", wantEvents: []ministralEvent{
-					ministralEventContent{content: "Hello, how can I help you?"},
-				}},
-			},
-		},
-		{
-			desc: "streaming content word by word",
-			steps: []step{
-				{input: "Hello,", wantEvents: []ministralEvent{ministralEventContent{content: "Hello,"}}},
-				{input: " how", wantEvents: []ministralEvent{ministralEventContent{content: " how"}}},
-				{input: " can I help?", wantEvents: []ministralEvent{ministralEventContent{content: " can I help?"}}},
-			},
-		},
-
-		// Simple tool calls
-		{
-			desc:  "simple tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "San Francisco"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "get_weather", args: `{"location": "San Francisco"}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with nested object",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]create_entities[ARGS]{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with deeply nested objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "update_config"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]update_config[ARGS]{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "update_config", args: `{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with array of objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "process_items"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]process_items[ARGS]{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "process_items", args: `{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with escaped quotes in string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "search"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]search[ARGS]{"query": "say \"hello\""}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "search", args: `{"query": "say \"hello\""}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with braces inside string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "format"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]format[ARGS]{"template": "Hello {name}!"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "format", args: `{"template": "Hello {name}!"}`},
-				}},
-			},
-		},
-		{
-			desc:  "empty JSON object",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "no_args"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]no_args[ARGS]{}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "no_args", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "JSON with newlines in string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "write"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]write[ARGS]{"content": "line1\nline2\nline3"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "write", args: `{"content": "line1\nline2\nline3"}`},
-				}},
-			},
-		},
-		{
-			desc:  "backslash in string value",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "path"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]path[ARGS]{"dir": "C:\\Users\\test"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "path", args: `{"dir": "C:\\Users\\test"}`},
-				}},
-			},
-		},
-
-		// Content after tool call
-		{
-			desc:  "content after tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				// NOTE: It's unclear if this is valid Ministral output, but the parser
-				// currently treats text after a tool call as regular content. This test
-				// documents that behavior so we notice if it changes.
-				{input: `[TOOL_CALLS]test[ARGS]{"a": 1}some content after`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{"a": 1}`},
-					ministralEventContent{content: "some content after"},
-				}},
-			},
-		},
-
-		// Multiple tool calls
-		{
-			desc: "multiple tool calls in sequence",
-			tools: []api.Tool{
-				{Function: api.ToolFunction{Name: "get_weather"}},
-				{Function: api.ToolFunction{Name: "get_time"}},
-			},
-			steps: []step{
-				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "NYC"}[TOOL_CALLS]get_time[ARGS]{"timezone": "EST"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "get_weather", args: `{"location": "NYC"}`},
-					ministralEventToolCall{name: "get_time", args: `{"timezone": "EST"}`},
-				}},
-			},
-		},
-		{
-			desc: "multiple tool calls streamed separately",
-			tools: []api.Tool{
-				{Function: api.ToolFunction{Name: "tool_a"}},
-				{Function: api.ToolFunction{Name: "tool_b"}},
-			},
-			steps: []step{
-				{input: `[TOOL_CALLS]tool_a[ARGS]{"x": 1}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "tool_a", args: `{"x": 1}`},
-				}},
-				{input: `[TOOL_CALLS]tool_b[ARGS]{"y": 2}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "tool_b", args: `{"y": 2}`},
-				}},
-			},
-		},
-
-		// Streaming tool calls
-		{
-			desc:  "streaming tool call with nested objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
-			steps: []step{
-				{input: "[TOOL_CALLS]create_entities[ARGS]", wantEvents: []ministralEvent{}},
-				{input: `{"entities": [{"entityType": "Person",`, wantEvents: []ministralEvent{}},
-				{input: ` "name": "Jack",`, wantEvents: []ministralEvent{}},
-				{input: ` "observations": ["Works`, wantEvents: []ministralEvent{}},
-				{input: ` as a baker"]}`, wantEvents: []ministralEvent{}},
-				{input: `]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "streaming with incomplete JSON waits for completion",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "[TOOL_CALLS]test[ARGS]{", wantEvents: []ministralEvent{}},
-				{input: `"a": {`, wantEvents: []ministralEvent{}},
-				{input: `"b": 1`, wantEvents: []ministralEvent{}},
-				{input: `}`, wantEvents: []ministralEvent{}},
-				{input: `}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{"a": {"b": 1}}`},
-				}},
-			},
-		},
-
-		// Partial tag handling
-		{
-			desc: "partial tool tag fakeout",
-			steps: []step{
-				{input: "abc[TOOL", wantEvents: []ministralEvent{ministralEventContent{content: "abc"}}},
-				{input: " not a tag", wantEvents: []ministralEvent{ministralEventContent{content: "[TOOL not a tag"}}},
-			},
-		},
-		{
-			desc:  "tool call tag split across chunks",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "[TOOL_", wantEvents: []ministralEvent{}},
-				{input: "CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "content before tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
-			steps: []step{
-				{input: "hello [TOOL_CALLS]get_weather[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "hello"},
-					ministralEventToolCall{name: "get_weather", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "whitespace between content and tool call is trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "content \n [TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "tabs and newlines before tool call are trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "content\t\n\t[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "non-breaking space before tool call is trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				// \u00a0 is non-breaking space, which unicode.IsSpace considers whitespace
-				{input: "content\u00a0[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc: "whitespace before THINK tag is trimmed",
-			steps: []step{
-				{input: "content \n [THINK]thinking[/THINK]after", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "after"},
-				}},
-			},
-		},
-		{
-			desc: "trailing whitespace withheld then emitted",
-			steps: []step{
-				{input: "Hello ", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
-				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: " world"}}},
-			},
-		},
-		{
-			desc: "trailing newline withheld then emitted",
-			steps: []step{
-				{input: "Hello\n", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
-				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: "\nworld"}}},
-			},
-		},
-
-		// Thinking support
-		{
-			desc:  "thinking content",
-			think: true,
-			steps: []step{
-				{input: "thinking here[/THINK]", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "thinking here"},
-				}},
-				{input: "content after", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content after"},
-				}},
-			},
-		},
-		{
-			desc:  "thinking with whitespace after end tag",
-			think: true,
-			steps: []step{
-				{input: "my thoughts[/THINK]  \n  response", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "my thoughts"},
-					ministralEventContent{content: "response"},
-				}},
-			},
-		},
-		{
-			desc:  "non-breaking space after think end tag is trimmed",
-			think: true,
-			steps: []step{
-				// \u00a0 is non-breaking space
-				{input: "thinking[/THINK]\u00a0response", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "response"},
-				}},
-			},
-		},
-		{
-			desc:  "partial think end tag",
-			think: true,
-			steps: []step{
-				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
-				{input: "NK]after", wantEvents: []ministralEvent{ministralEventContent{content: "after"}}},
-			},
-		},
-		{
-			desc:  "think tag fakeout",
-			think: true,
-			steps: []step{
-				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
-				{input: "not end tag", wantEvents: []ministralEvent{ministralEventThinking{thinking: "[/THInot end tag"}}},
-			},
-		},
-		{
-			desc:  "thinking then tool call",
-			think: true,
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "let me think[/THINK][TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "let me think"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-
-		// Content then THINK tag transition
-		{
-			desc: "content then think tag",
-			steps: []step{
-				{input: "content[THINK]thinking[/THINK]more", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "more"},
-				}},
-			},
-		},
-
-		// Unicode handling
-		{
-			desc: "unicode content",
-			steps: []step{
-				{input: "你好 🌍 مرحبا", wantEvents: []ministralEvent{
-					ministralEventContent{content: "你好 🌍 مرحبا"},
-				}},
-			},
-		},
-		{
-			desc:  "unicode in tool args",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "greet"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]greet[ARGS]{"message": "你好 🌍"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "greet", args: `{"message": "你好 🌍"}`},
-				}},
-			},
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := MinistralParser{}
-			parser.hasThinkingSupport = tc.think
-			parser.Init(tc.tools, nil, nil)
-
-			for i, step := range tc.steps {
-				parser.buffer.WriteString(step.input)
-				gotEvents := parser.parseEvents()
-
-				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
-					// avoid deep equal on empty vs. nil slices
-					continue
-				}
-
-				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
-					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
-				}
-			}
-		})
-	}
-}
-
-func TestMinistralParser_Errors(t *testing.T) {
-	t.Run("unknown tool returns error", func(t *testing.T) {
-		p := &MinistralParser{}
-		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "known_tool"}}}, nil, nil)
-
-		_, _, _, err := p.Add(`[TOOL_CALLS]unknown_tool[ARGS]{"a": 1}`, true)
-		if err == nil {
-			t.Fatal("expected error for unknown tool")
-		}
-	})
-
-	t.Run("invalid JSON returns error", func(t *testing.T) {
-		p := &MinistralParser{}
-		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "test"}}}, nil, nil)
-
-		_, _, _, err := p.Add(`[TOOL_CALLS]test[ARGS]{invalid json}`, true)
-		if err == nil {
-			t.Fatal("expected error for invalid JSON")
-		}
-	})
-}
-
-func TestFindJSONEnd(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    string
-		expected int
-	}{
-		{
-			name:     "simple object",
-			input:    `{"a": 1}`,
-			expected: 7,
-		},
-		{
-			name:     "nested object",
-			input:    `{"a": {"b": 2}}`,
-			expected: 14,
-		},
-		{
-			name:     "array inside object",
-			input:    `{"items": [1, 2, 3]}`,
-			expected: 19,
-		},
-		{
-			name:     "braces in string",
-			input:    `{"template": "Hello {name}!"}`,
-			expected: 28,
-		},
-		{
-			name:     "escaped quotes",
-			input:    `{"msg": "say \"hi\""}`,
-			expected: 20,
-		},
-		{
-			name:     "incomplete object",
-			input:    `{"a": {"b": 1}`,
-			expected: -1,
-		},
-		{
-			name:     "deeply nested",
-			input:    `{"a": {"b": {"c": {"d": 1}}}}`,
-			expected: 28,
-		},
-		{
-			name:     "object with trailing content",
-			input:    `{"a": 1} extra`,
-			expected: 7,
-		},
-		{
-			name:     "array",
-			input:    `[{"a": 1}, {"b": 2}]`,
-			expected: 19,
-		},
-		{
-			name:     "escaped backslash before quote",
-			input:    `{"path": "C:\\"}`,
-			expected: 15,
-		},
-		{
-			name:     "empty string",
-			input:    "",
-			expected: -1,
-		},
-		{
-			name:     "no opening brace",
-			input:    "hello world",
-			expected: -1,
-		},
-		{
-			name:     "only opening brace",
-			input:    "{",
-			expected: -1,
-		},
-		{
-			name:     "unclosed string",
-			input:    `{"key": "unclosed`,
-			expected: -1,
-		},
-		{
-			name:     "double escaped backslash then quote",
-			input:    `{"path": "C:\\\\"}`,
-			expected: 17,
-		},
-		{
-			name:     "unicode in key and value",
-			input:    `{"키": "값"}`,
-			expected: 13,
-		},
-		{
-			name:     "nested arrays",
-			input:    `{"matrix": [[1, 2], [3, 4]]}`,
-			expected: 27,
-		},
-		{
-			name:     "mixed nesting",
-			input:    `{"a": [{"b": {"c": [1, 2, 3]}}]}`,
-			expected: 31,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := findJSONEnd(tt.input)
-			if result != tt.expected {
-				t.Errorf("findJSONEnd(%q) = %d, want %d", tt.input, result, tt.expected)
-			}
-		})
-	}
-}
-
-func TestMinistralParser_HasToolSupport(t *testing.T) {
-	p := &MinistralParser{}
-	if !p.HasToolSupport() {
-		t.Error("expected HasToolSupport to return true")
-	}
-}
-
-func TestMinistralParser_HasThinkingSupport(t *testing.T) {
-	p := &MinistralParser{hasThinkingSupport: false}
-	if p.HasThinkingSupport() {
-		t.Error("expected HasThinkingSupport to return false")
-	}
-
-	p = &MinistralParser{hasThinkingSupport: true}
-	if !p.HasThinkingSupport() {
-		t.Error("expected HasThinkingSupport to return true")
-	}
-}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -3,7 +3,6 @@ package parsers
 import (
 	"strings"
 	"unicode"
-	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/harmony"
@@ -115,33 +114,3 @@ func splitAtTag(sb *strings.Builder, tag string, trimAfter bool) (string, string
 	sb.WriteString(after)
 	return before, after // return events
 }
-
-// overlap returns the longest overlap between the suffix of s and the prefix of delim
-func overlap(s, delim string) int {
-	max := min(len(delim), len(s))
-	for i := max; i > 0; i-- {
-		if strings.HasSuffix(s, delim[:i]) {
-			return i
-		}
-	}
-	return 0
-}
-
-// trailingWhitespaceLen returns the length in bytes of trailing whitespace in s
-func trailingWhitespaceLen(s string) int {
-	remaining := s
-	total := 0
-	for len(remaining) > 0 {
-		r, size := utf8.DecodeLastRuneInString(remaining)
-		// if it's an invalid utf8 rune, assume it isn't whitespace
-		if r == utf8.RuneError && size == 1 {
-			break
-		}
-		if !unicode.IsSpace(r) {
-			break
-		}
-		total += size
-		remaining = remaining[:len(remaining)-size]
-	}
-	return total
-}
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -11,6 +11,7 @@ import (
 	"strconv"
 	"strings"
 	"unicode"
+	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/logutil"
@@ -193,6 +194,36 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
 	}
 }

+// TODO(drifkin): move this to a shared location
+// longest overlap between suffix of s and prefix of delim
+func overlap(s, delim string) int {
+	max := min(len(delim), len(s))
+	for i := max; i > 0; i-- {
+		if strings.HasSuffix(s, delim[:i]) {
+			return i
+		}
+	}
+	return 0
+}
+
+func trailingWhitespaceLen(s string) int {
+	remaining := s
+	total := 0
+	for len(remaining) > 0 {
+		r, size := utf8.DecodeLastRuneInString(remaining)
+		// if it's an invalid utf8 rune, assume it isn't whitespace
+		if r == utf8.RuneError && size == 1 {
+			break
+		}
+		if !unicode.IsSpace(r) {
+			break
+		}
+		total += size
+		remaining = remaining[:len(remaining)-size]
+	}
+	return total
+}
+
 type XMLFunctionCall struct {
 	XMLName    xml.Name       `xml:"function"`
 	Name       string         `xml:"name,attr"`
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -37,6 +37,7 @@ import (
 	"github.com/ollama/ollama/model/input"
 	"github.com/ollama/ollama/runner/common"
 	"github.com/ollama/ollama/sample"
+	"github.com/ollama/ollama/tokenizer"

 	_ "github.com/ollama/ollama/model/models"
 )
@@ -210,9 +211,9 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
 }

 // calculateLogprobs converts raw logits to log probabilities and finds top K tokens
-func calculateLogprobs(logits []float32, selectedToken int32, topK int, textProcessor model.TextProcessor) []llm.Logprob {
+func calculateLogprobs(logits []float32, selectedToken int32, topK int, tokenizer tokenizer.Tokenizer) []llm.Logprob {
 	decoder := func(tokenID int) string {
-		text, _ := textProcessor.Decode([]int32{int32(tokenID)})
+		text, _ := tokenizer.Decode([]int32{int32(tokenID)})
 		return text
 	}
 	return common.CalculateLogprobs(logits, int(selectedToken), topK, decoder)
@@ -242,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]*input.Input,

 	for i, part := range parts {
 		// text - tokenize
-		tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
+		tokens, err := s.model.(tokenizer.Tokenizer).Encode(part, i == 0)
 		if err != nil {
 			return nil, nil, nil, err
 		}
@@ -766,7 +767,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
 		nextBatchTokens[i].Token = token

 		// if it's an end of sequence token, break
-		if s.model.(model.TextProcessor).Is(token, model.SpecialEOS) {
+		if s.model.(tokenizer.Tokenizer).Is(token, tokenizer.SpecialEOS) {
 			// TODO (jmorganca): we should send this back
 			// as it's important for the /api/generate context
 			// seq.responses <- piece
@@ -775,14 +776,14 @@ func (s *Server) computeBatch(activeBatch batchState) {
 			continue
 		}

-		piece, err := s.model.(model.TextProcessor).Decode([]int32{token})
+		piece, err := s.model.(tokenizer.Tokenizer).Decode([]int32{token})
 		if err != nil {
 			panic("failed to decode token")
 		}

 		// Calculate logprobs if requested (after EOS check to avoid logprobs for EOS tokens)
 		if seq.logprobs {
-			logprobs := calculateLogprobs(logits, token, seq.topLogprobs, s.model.(model.TextProcessor))
+			logprobs := calculateLogprobs(logits, token, seq.topLogprobs, s.model.(tokenizer.Tokenizer))
 			seq.pendingLogprobs = append(seq.pendingLogprobs, logprobs...)
 		}

@@ -873,7 +874,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
 	var grammar *sample.GrammarSampler
 	var err error
 	if req.Grammar != "" {
-		grammar, err = sample.NewGrammarSampler(s.model.(model.TextProcessor), req.Grammar)
+		grammar, err = sample.NewGrammarSampler(s.model.(tokenizer.Tokenizer), req.Grammar)
 		if err != nil {
 			http.Error(w, "failed to load model vocabulary required for format", http.StatusInternalServerError)
 			return
@@ -1358,7 +1359,7 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) {
 		// Dummy load to get the backend wired up
 		f, err := os.CreateTemp("", "*.bin")
 		if err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}
 		defer f.Close()
@@ -1368,13 +1369,13 @@ func (s *Server) info(w http.ResponseWriter, r *http.Request) {
 			"general.architecture": "llama",
 			"tokenizer.ggml.model": "gpt2",
 		}, nil); err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}

 		m, err = model.New(f.Name(), ml.BackendParams{NumThreads: runtime.NumCPU(), AllocMemory: false, GPULayers: ml.GPULayersList{{}}})
 		if err != nil {
-			http.Error(w, fmt.Sprintf("failed to initialize backend: %v", err), http.StatusInternalServerError)
+			http.Error(w, fmt.Sprintf("failed to initialize baackend: %v", err), http.StatusInternalServerError)
 			return
 		}
 		slog.Debug("dummy model load took", "duration", time.Since(startLoad))
--- a/sample/samplers.go
+++ b/sample/samplers.go
@@ -7,7 +7,7 @@ import (
 	"slices"

 	"github.com/ollama/ollama/llama"
-	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/tokenizer"
 )

 // token represents information about a single token during sampling
@@ -168,15 +168,15 @@ type GrammarSampler struct {
 	grammar *llama.Grammar
 }

-func NewGrammarSampler(model model.TextProcessor, grammarStr string) (*GrammarSampler, error) {
-	vocabIds := make([]uint32, len(model.Vocabulary().Values))
-	pieces := make([]string, len(model.Vocabulary().Values))
-	for i := range model.Vocabulary().Values {
-		pieces[i], _ = model.Decode([]int32{int32(i)})
+func NewGrammarSampler(tokenizer tokenizer.Tokenizer, grammarStr string) (*GrammarSampler, error) {
+	vocabIds := make([]uint32, len(tokenizer.Vocabulary().Values))
+	pieces := make([]string, len(tokenizer.Vocabulary().Values))
+	for i := range tokenizer.Vocabulary().Values {
+		pieces[i], _ = tokenizer.Decode([]int32{int32(i)})
 		vocabIds[i] = uint32(i)
 	}

-	grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, model.Vocabulary().EOS)
+	grammar := llama.NewGrammar(grammarStr, vocabIds, pieces, tokenizer.Vocabulary().EOS)
 	if grammar == nil {
 		return nil, errors.New("sample: failed to initialize grammar")
 	}
--- a/sample/samplers_test.go
+++ b/sample/samplers_test.go
@@ -8,7 +8,7 @@ import (
 	"path/filepath"
 	"testing"

-	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/tokenizer"
 )

 func TestWeighted(t *testing.T) {
@@ -60,10 +60,10 @@ func TestWeighted(t *testing.T) {
 	}
 }

-func modelHelper(t testing.TB) model.BytePairEncoding {
+func modelHelper(t testing.TB) tokenizer.Tokenizer {
 	t.Helper()

-	f, err := os.Open(filepath.Join("..", "model", "testdata", "llama3.2", "encoder.json"))
+	f, err := os.Open(filepath.Join("..", "testdata", "testdata", "llama3.2", "encoder.json"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -81,8 +81,8 @@ func modelHelper(t testing.TB) model.BytePairEncoding {

 	merges := make([]string, 0, 1)
 	// Only need vocab for Grammar Test
-	return model.NewBytePairEncoding(
-		&model.Vocabulary{
+	return tokenizer.NewBytePairEncoding(
+		&tokenizer.Vocabulary{
 			Values: tokens,
 			Types:  make([]int32, len(vocab)),
 			Merges: merges,
--- a/server/routes.go
+++ b/server/routes.go
@@ -75,12 +75,16 @@ func experimentEnabled(name string) bool {

 var useClient2 = experimentEnabled("client2")

+// Low VRAM mode is based on the sum of total VRAM (not free) and triggers
+// reduced context length on some models
+var lowVRAMThreshold uint64 = 20 * format.GibiByte
+
 var mode string = gin.DebugMode

 type Server struct {
-	addr          net.Addr
-	sched         *Scheduler
-	defaultNumCtx int
+	addr    net.Addr
+	sched   *Scheduler
+	lowVRAM bool
 }

 func init() {
@@ -103,12 +107,8 @@ var (
 	errBadTemplate = errors.New("template error")
 )

-func (s *Server) modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
+func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
 	opts := api.DefaultOptions()
-	if opts.NumCtx == 0 {
-		opts.NumCtx = s.defaultNumCtx
-	}
-
 	if err := opts.FromMap(model.Options); err != nil {
 		return api.Options{}, err
 	}
@@ -140,11 +140,20 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
 		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
 	}

-	opts, err := s.modelOptions(model, requestOpts)
+	opts, err := modelOptions(model, requestOpts)
 	if err != nil {
 		return nil, nil, nil, err
 	}

+	// This model is much more capable with a larger context, so set that
+	// unless it would penalize performance too much
+	if !s.lowVRAM && slices.Contains([]string{
+		"gptoss", "gpt-oss",
+		"qwen3vl", "qwen3vlmoe",
+	}, model.Config.ModelFamily) {
+		opts.NumCtx = max(opts.NumCtx, 8192)
+	}
+
 	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
 	var runner *runnerRef
 	select {
@@ -1711,18 +1720,10 @@ func Serve(ln net.Listener) error {
 	for _, gpu := range gpus {
 		totalVRAM += gpu.TotalMemory - envconfig.GpuOverhead()
 	}
-
-	// Set default context based on VRAM tier
-	// Use slightly lower thresholds (47/23 GiB vs. 48/24 GiB) to account for small differences in the exact value
-	switch {
-	case totalVRAM >= 47*format.GibiByte:
-		s.defaultNumCtx = 262144
-	case totalVRAM >= 23*format.GibiByte:
-		s.defaultNumCtx = 32768
-	default:
-		s.defaultNumCtx = 4096
+	if totalVRAM < lowVRAMThreshold {
+		s.lowVRAM = true
+		slog.Info("entering low vram mode", "total vram", format.HumanBytes2(totalVRAM), "threshold", format.HumanBytes2(lowVRAMThreshold))
 	}
-	slog.Info("vram-based default context", "total_vram", format.HumanBytes2(totalVRAM), "default_num_ctx", s.defaultNumCtx)

 	err = srvr.Serve(ln)
 	// If server is closed from the signal handler, wait for the ctx to be done
@@ -1896,8 +1897,8 @@ func (s *Server) PsHandler(c *gin.Context) {
 			Details:   modelDetails,
 			ExpiresAt: v.expiresAt,
 		}
-		if v.llama != nil {
-			mr.ContextLength = v.llama.ContextLength()
+		if v.Options != nil {
+			mr.ContextLength = v.Options.NumCtx
 		}
 		// The scheduler waits to set expiresAt, so if a model is loading it's
 		// possible that it will be set to the unix epoch. For those cases, just
--- a/server/routes_debug_test.go
+++ b/server/routes_debug_test.go
@@ -15,7 +15,6 @@ import (
 )

 func TestGenerateDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -209,7 +208,6 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
 }

 func TestChatDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
--- a/server/routes_generate_renderer_test.go
+++ b/server/routes_generate_renderer_test.go
@@ -20,7 +20,6 @@ import (
 // TestGenerateWithBuiltinRenderer tests that api/generate uses built-in renderers
 // when in chat-like flow (messages present, no suffix, no template)
 func TestGenerateWithBuiltinRenderer(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -205,7 +204,6 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {

 // TestGenerateWithDebugRenderOnly tests that debug_render_only works with built-in renderers
 func TestGenerateWithDebugRenderOnly(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -162,7 +162,6 @@ func TestGenerateChatRemote(t *testing.T) {
 }

 func TestGenerateChat(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -879,7 +878,6 @@ func TestGenerateChat(t *testing.T) {
 }

 func TestGenerate(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	mock := mockRunner{
@@ -2357,7 +2355,6 @@ func TestGenerateWithImages(t *testing.T) {
 // TestImageGenerateStreamFalse tests that image generation respects stream=false
 // and returns a single JSON response instead of streaming ndjson.
 func TestImageGenerateStreamFalse(t *testing.T) {
-	t.Setenv("OLLAMA_CONTEXT_LENGTH", "4096")
 	gin.SetMode(gin.TestMode)

 	p := t.TempDir()
--- a/server/routes_options_test.go
+++ b/server/routes_options_test.go
@@ -1,127 +0,0 @@
-package server
-
-import (
-	"testing"
-)
-
-func TestModelOptionsNumCtxPriority(t *testing.T) {
-	tests := []struct {
-		name           string
-		envContextLen  string // empty means not set (uses 0 sentinel)
-		defaultNumCtx  int    // VRAM-based default
-		modelNumCtx    int    // 0 means not set in model
-		requestNumCtx  int    // 0 means not set in request
-		expectedNumCtx int
-	}{
-		{
-			name:           "vram default when nothing else set",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 32768,
-		},
-		{
-			name:           "env var overrides vram default",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 8192,
-		},
-		{
-			name:           "model overrides vram default",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  0,
-			expectedNumCtx: 16384,
-		},
-		{
-			name:           "model overrides env var",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  0,
-			expectedNumCtx: 16384,
-		},
-		{
-			name:           "request overrides everything",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "request overrides vram default",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    0,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "request overrides model",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			modelNumCtx:    16384,
-			requestNumCtx:  4096,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "low vram tier default",
-			envContextLen:  "",
-			defaultNumCtx:  4096,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 4096,
-		},
-		{
-			name:           "high vram tier default",
-			envContextLen:  "",
-			defaultNumCtx:  262144,
-			modelNumCtx:    0,
-			requestNumCtx:  0,
-			expectedNumCtx: 262144,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Set or clear environment variable
-			if tt.envContextLen != "" {
-				t.Setenv("OLLAMA_CONTEXT_LENGTH", tt.envContextLen)
-			}
-
-			// Create server with VRAM-based default
-			s := &Server{
-				defaultNumCtx: tt.defaultNumCtx,
-			}
-
-			// Create model options (use float64 as FromMap expects JSON-style numbers)
-			var modelOpts map[string]any
-			if tt.modelNumCtx != 0 {
-				modelOpts = map[string]any{"num_ctx": float64(tt.modelNumCtx)}
-			}
-			model := &Model{
-				Options: modelOpts,
-			}
-
-			// Create request options (use float64 as FromMap expects JSON-style numbers)
-			var requestOpts map[string]any
-			if tt.requestNumCtx != 0 {
-				requestOpts = map[string]any{"num_ctx": float64(tt.requestNumCtx)}
-			}
-
-			opts, err := s.modelOptions(model, requestOpts)
-			if err != nil {
-				t.Fatalf("modelOptions failed: %v", err)
-			}
-
-			if opts.NumCtx != tt.expectedNumCtx {
-				t.Errorf("NumCtx = %d, want %d", opts.NumCtx, tt.expectedNumCtx)
-			}
-		})
-	}
-}
--- a/server/sched_test.go
+++ b/server/sched_test.go
@@ -804,7 +804,6 @@ func (s *mockLlm) GetPort() int                                       { return -
 func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil }
 func (s *mockLlm) HasExited() bool                                    { return false }
 func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID                  { return nil }
-func (s *mockLlm) ContextLength() int                                 { return 0 }

 // TestImageGenRunnerCanBeEvicted verifies that an image generation model
 // loaded in the scheduler can be evicted when idle.
--- a/tokenizer/bytepairencoding.go
+++ b/tokenizer/bytepairencoding.go
@@ -1,8 +1,10 @@
-package model
+package tokenizer

 import (
 	"cmp"
+	"fmt"
 	"iter"
+	"log/slog"
 	"slices"
 	"strings"

@@ -11,24 +13,24 @@ import (
 	"github.com/ollama/ollama/logutil"
 )

-type BytePairEncoding struct {
+type bytePairEncoding struct {
 	vocab   *Vocabulary
 	regexps []*regexp2.Regexp
 }

-var _ TextProcessor = (*BytePairEncoding)(nil)
+var _ Tokenizer = (*bytePairEncoding)(nil)

-func NewBytePairEncoding(vocab *Vocabulary, pretokenizers ...string) BytePairEncoding {
-	if len(pretokenizers) == 0 {
+func NewBytePairEncoding(vocab *Vocabulary, pretokenizer ...string) bytePairEncoding {
+	if len(pretokenizer) == 0 {
 		// set default byte-level pretokenizer if none provided, e.g.
-		// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/pre_tokenizers/byte_level.rs#L44
-		pretokenizers = []string{`'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+`}
+		// https://github.com/huggingface/tokenizer/blob/main/tokenizer/src/pre_tokenizer/byte_level.rs#L44
+		pretokenizer = []string{`'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+`}
 	}

-	return BytePairEncoding{
+	return bytePairEncoding{
 		vocab: vocab,
 		regexps: slices.Collect(func(yield func(*regexp2.Regexp) bool) {
-			for _, p := range pretokenizers {
+			for _, p := range pretokenizer {
 				if !yield(regexp2.MustCompile(p, regexp2.RE2)) {
 					return
 				}
@@ -37,15 +39,15 @@ func NewBytePairEncoding(vocab *Vocabulary, pretokenizers ...string) BytePairEnc
 	}
 }

-func (bpe BytePairEncoding) Vocabulary() *Vocabulary {
+func (bpe bytePairEncoding) Vocabulary() *Vocabulary {
 	return bpe.vocab
 }

-func (bpe BytePairEncoding) Is(id int32, special Special) bool {
+func (bpe bytePairEncoding) Is(id int32, special Special) bool {
 	return bpe.vocab.Is(id, special)
 }

-func (bpe *BytePairEncoding) split(s string) iter.Seq[string] {
+func (bpe *bytePairEncoding) split(s string) iter.Seq[string] {
 	parts := []string{s}
 	for _, re := range bpe.regexps {
 		parts = slices.Collect(func(yield func(string) bool) {
@@ -96,7 +98,7 @@ type merge struct {
 	runes []rune
 }

-func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
+func (bpe bytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
 	fragments := []fragment{{value: s}}
 	for _, special := range bpe.vocab.SpecialVocabulary() {
 		// TODO: process special tokens concurrently
@@ -243,7 +245,15 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
 	return ids, nil
 }

-func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
+type lazyIdsString struct {
+	ids []int32
+}
+
+func (l lazyIdsString) LogValue() slog.Value {
+	return slog.AnyValue(fmt.Sprint(l.ids))
+}
+
+func (bpe bytePairEncoding) Decode(ids []int32) (string, error) {
 	var sb strings.Builder
 	for _, id := range ids {
 		for _, r := range bpe.vocab.Decode(id) {
@@ -267,6 +277,6 @@ func (bpe BytePairEncoding) Decode(ids []int32) (string, error) {
 		}
 	}

-	logutil.Trace("decoded", "string", sb.String(), "from", ids)
+	logutil.Trace("decoded", "string", sb.String(), "from", lazyIdsString{ids: ids})
 	return sb.String(), nil
 }
--- a/tokenizer/bytepairencoding_test.go
+++ b/tokenizer/bytepairencoding_test.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"bufio"
@@ -14,10 +14,10 @@ import (
 	"github.com/google/go-cmp/cmp"
 )

-func llama(t testing.TB) BytePairEncoding {
+func llama(t testing.TB) bytePairEncoding {
 	t.Helper()

-	f, err := os.Open(filepath.Join("testdata", "llama3.2", "encoder.json"))
+	f, err := os.Open(filepath.FromSlash("testdata/llama3.2/encoder.json"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -43,7 +43,7 @@ func llama(t testing.TB) BytePairEncoding {
 		}
 	}

-	f, err = os.Open(filepath.Join("testdata", "llama3.2", "vocab.bpe"))
+	f, err = os.Open(filepath.FromSlash("testdata/llama3.2/vocab.bpe"))
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/tokenizer/sentencepiece.go
+++ b/tokenizer/sentencepiece.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"container/heap"
@@ -12,18 +12,18 @@ import (

 const spmWhitespaceSep = "▁"

-type SentencePiece struct {
+type sentencePiece struct {
 	maxTokenLen int
 	vocab       *Vocabulary
 }

-var _ TextProcessor = (*SentencePiece)(nil)
+var _ Tokenizer = (*sentencePiece)(nil)

-func (spm SentencePiece) Vocabulary() *Vocabulary {
+func (spm sentencePiece) Vocabulary() *Vocabulary {
 	return spm.vocab
 }

-func NewSentencePiece(vocab *Vocabulary) SentencePiece {
+func NewSentencePiece(vocab *Vocabulary) sentencePiece {
 	logutil.Trace("Tokens", "num tokens", len(vocab.Values), "vals", vocab.Values[:5], "scores", vocab.Scores[:5], "types", vocab.Types[:5])

 	counter := map[int]int{}
@@ -42,17 +42,17 @@ func NewSentencePiece(vocab *Vocabulary) SentencePiece {
 		"user defined", counter[TOKEN_TYPE_USER_DEFINED], "unused", counter[TOKEN_TYPE_UNUSED], "byte", counter[TOKEN_TYPE_BYTE],
 		"max token len", maxTokenLen)

-	return SentencePiece{
+	return sentencePiece{
 		maxTokenLen: maxTokenLen,
 		vocab:       vocab,
 	}
 }

-func (spm SentencePiece) Is(id int32, special Special) bool {
+func (spm sentencePiece) Is(id int32, special Special) bool {
 	return spm.vocab.Is(id, special)
 }

-func (spm SentencePiece) Encode(s string, addSpecial bool) ([]int32, error) {
+func (spm sentencePiece) Encode(s string, addSpecial bool) ([]int32, error) {
 	fragments := []fragment{{value: s}}
 	for _, special := range spm.vocab.SpecialVocabulary() {
 		id := spm.vocab.Encode(special)
@@ -218,13 +218,13 @@ func (q *queue) Pop() interface{} {
 	return item
 }

-func (spm SentencePiece) Decode(ids []int32) (string, error) {
+func (spm sentencePiece) Decode(ids []int32) (string, error) {
 	var sb strings.Builder
 	for _, id := range ids {
 		data := spm.vocab.Decode(id)
 		data = strings.ReplaceAll(data, spmWhitespaceSep, " ")

-		// For tokenizers that use byte tokens like "<0xEA>"
+		// For tokenizer that use byte tokens like "<0xEA>"
 		// convert them to the partial unicode character
 		// so they are buffered correctly by the runner instead
 		// of being sent back to the api as "<0xEA>"
--- a/tokenizer/sentencepiece_test.go
+++ b/tokenizer/sentencepiece_test.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"log/slog"
@@ -12,10 +12,10 @@ import (
 	"github.com/ollama/ollama/convert/sentencepiece"
 )

-func loadSentencePieceVocab(t *testing.T) SentencePiece {
+func loadSentencePieceVocab(t *testing.T) sentencePiece {
 	t.Helper()

-	bts, err := os.ReadFile(filepath.Join("testdata", "gemma2", "tokenizer.model"))
+	bts, err := os.ReadFile(filepath.FromSlash("testdata/gemma2/tokenizer.model"))
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/tokenizer/testdata/gemma2/tokenizer.model
+++ b/tokenizer/testdata/gemma2/tokenizer.model
--- a/tokenizer/testdata/llama3.2/encoder.json
+++ b/tokenizer/testdata/llama3.2/encoder.json
--- a/tokenizer/testdata/llama3.2/vocab.bpe
+++ b/tokenizer/testdata/llama3.2/vocab.bpe
--- a/tokenizer/testdata/war-and-peace.txt
+++ b/tokenizer/testdata/war-and-peace.txt
--- a/tokenizer/tokenizer.go
+++ b/tokenizer/tokenizer.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 const (
 	TOKEN_TYPE_NORMAL = iota + 1
@@ -9,7 +9,7 @@ const (
 	TOKEN_TYPE_BYTE
 )

-type TextProcessor interface {
+type Tokenizer interface {
 	Encode(s string, addSpecial bool) ([]int32, error)
 	Decode([]int32) (string, error)
 	Is(int32, Special) bool
--- a/tokenizer/vocabulary.go
+++ b/tokenizer/vocabulary.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"log/slog"
--- a/tokenizer/vocabulary_test.go
+++ b/tokenizer/vocabulary_test.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"testing"
--- a/tokenizer/wordpiece.go
+++ b/tokenizer/wordpiece.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"fmt"
@@ -9,7 +9,7 @@ import (
 	"github.com/ollama/ollama/logutil"
 )

-type WordPiece struct {
+type wordPiece struct {
 	vocab     *Vocabulary
 	lowercase bool
 }
@@ -32,8 +32,8 @@ var wordPieceReplacer = strings.NewReplacer(
 	" 're", "'re",
 )

-// Decode implements TextProcessor.
-func (wpm WordPiece) Decode(ids []int32) (string, error) {
+// Decode implements Tokenizer.
+func (wpm wordPiece) Decode(ids []int32) (string, error) {
 	var sb strings.Builder
 	for i, id := range ids {
 		if id < 0 || int(id) >= len(wpm.vocab.Values) {
@@ -56,7 +56,7 @@ func (wpm WordPiece) Decode(ids []int32) (string, error) {

 // words splits a string into words, treating CJK characters as separate words.
 // TODO: this is specifically for BERT and may need to be adjusted or refactored for other models.
-func (wpm WordPiece) words(s string) iter.Seq[string] {
+func (wpm wordPiece) words(s string) iter.Seq[string] {
 	return func(yield func(string) bool) {
 		runes := make([]rune, 0, len(s)*3)
 		for _, r := range s {
@@ -96,8 +96,8 @@ func (wpm WordPiece) words(s string) iter.Seq[string] {
 	}
 }

-// Encode implements TextProcessor.
-func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
+// Encode implements Tokenizer.
+func (wpm wordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
 	var ids []int32

 	// TODO: use [UNK] from config
@@ -151,20 +151,20 @@ func (wpm WordPiece) Encode(s string, addSpecial bool) ([]int32, error) {
 	return ids, nil
 }

-// Is implements TextProcessor.
-func (wpm WordPiece) Is(id int32, special Special) bool {
+// Is implements Tokenizer.
+func (wpm wordPiece) Is(id int32, special Special) bool {
 	return wpm.vocab.Is(id, special)
 }

-// Vocabulary implements TextProcessor.
-func (wpm WordPiece) Vocabulary() *Vocabulary {
+// Vocabulary implements Tokenizer.
+func (wpm wordPiece) Vocabulary() *Vocabulary {
 	return wpm.vocab
 }

-var _ TextProcessor = (*WordPiece)(nil)
+var _ Tokenizer = (*wordPiece)(nil)

-func NewWordPiece(vocab *Vocabulary, lowercase bool) WordPiece {
-	return WordPiece{
+func NewWordPiece(vocab *Vocabulary, lowercase bool) wordPiece {
+	return wordPiece{
 		vocab:     vocab,
 		lowercase: lowercase,
 	}
--- a/tokenizer/wordpiece_test.go
+++ b/tokenizer/wordpiece_test.go
@@ -1,4 +1,4 @@
-package model
+package tokenizer

 import (
 	"slices"
@@ -39,7 +39,7 @@ func TestWordPiece(t *testing.T) {
 }

 func TestWordPieceWords(t *testing.T) {
-	var wpm WordPiece
+	var wpm wordPiece

 	basic := slices.Collect(wpm.words("Hey friend!     How are you?!?"))
 	if diff := cmp.Diff([]string{"Hey", "friend", "!", "How", "are", "you", "?", "!", "?"}, basic); diff != "" {
--- a/x/imagegen/server.go
+++ b/x/imagegen/server.go
@@ -347,11 +347,6 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 {
 	return s.vramSize
 }

-// Context length is not applicable for image generation.
-func (s *Server) ContextLength() int {
-	return 0
-}
-
 func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) {
 	return nil, 0, errors.New("not supported")
 }
--- a/x/model/bytepairencoding_test.go
+++ b/x/model/bytepairencoding_test.go
@@ -17,7 +17,7 @@ import (
 func llama(t testing.TB) BytePairEncoding {
 	t.Helper()

-	f, err := os.Open(filepath.Join("..", "..", "model", "testdata", "llama3.2", "encoder.json"))
+	f, err := os.Open(filepath.Join("..", "..", "tokenizer", "testdata", "llama3.2", "encoder.json"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -43,7 +43,7 @@ func llama(t testing.TB) BytePairEncoding {
 		}
 	}

-	f, err = os.Open(filepath.Join("..", "..", "model", "testdata", "llama3.2", "vocab.bpe"))
+	f, err = os.Open(filepath.Join("..", "..", "tokenizer", "testdata", "llama3.2", "vocab.bpe"))
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/x/model/models/gemma3/model.go
+++ b/x/model/models/gemma3/model.go
@@ -9,6 +9,7 @@ import (
 	"slices"

 	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/tokenizer"
 	"github.com/ollama/ollama/x/kvcache"
 	"github.com/ollama/ollama/x/ml"
 	"github.com/ollama/ollama/x/ml/nn"
@@ -18,7 +19,7 @@ import (

 type Model struct {
 	model.Base
-	model.SentencePiece
+	tokenizer.Tokenizer

 	*VisionModel `gguf:"vision_tower.vision_model"`
 	*TextModel   `gguf:"language_model.model"`
@@ -58,8 +59,8 @@ func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, i
 func New(c fs.Config) (model.Model, error) {
 	// slog.Info("XXX Config", "c", c)
 	m := Model{
-		SentencePiece: model.NewSentencePiece(
-			&model.Vocabulary{
+		Tokenizer: tokenizer.NewSentencePiece(
+			&tokenizer.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Scores: c.Floats("tokenizer.ggml.scores"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
--- a/x/model/sentencepiece_test.go
+++ b/x/model/sentencepiece_test.go
@@ -15,7 +15,7 @@ import (
 func loadSentencePieceVocab(t *testing.T) SentencePiece {
 	t.Helper()

-	bts, err := os.ReadFile(filepath.Join("..", "..", "model", "testdata", "gemma2", "tokenizer.model"))
+	bts, err := os.ReadFile(filepath.Join("..", "..", "tokenizer", "testdata", "gemma2", "tokenizer.model"))
 	if err != nil {
 		t.Fatal(err)
 	}