lint

2026-02-02 03:33:38 -05:00 · 2026-02-02 00:26:43 -08:00 · 2026-02-02 00:07:52 -08:00 · 2026-02-01 22:54:12 -08:00 · 2026-02-01 22:24:32 -08:00 · 2026-01-26 14:07:21 -08:00
38 changed files with 1658 additions and 2098 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -912,19 +912,6 @@ type UserResponse struct {
 	Plan      string    `json:"plan,omitempty"`
 }

-type UsageResponse struct {
-	// Start is the time the server started tracking usage (UTC, RFC 3339).
-	Start time.Time        `json:"start"`
-	Usage []ModelUsageData `json:"usage"`
-}
-
-type ModelUsageData struct {
-	Model            string `json:"model"`
-	Requests         int64  `json:"requests"`
-	PromptTokens     int64  `json:"prompt_tokens"`
-	CompletionTokens int64  `json:"completion_tokens"`
-}
-
 // Tensor describes the metadata for a given tensor.
 type Tensor struct {
 	Name  string   `json:"name"`
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -6,8 +6,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // Claude implements Runner for Claude Code integration
@@ -52,7 +50,7 @@ func (c *Claude) Run(model string) error {
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = append(os.Environ(),
-		"ANTHROPIC_BASE_URL="+envconfig.Host().String(),
+		"ANTHROPIC_BASE_URL=http://localhost:11434",
 		"ANTHROPIC_API_KEY=",
 		"ANTHROPIC_AUTH_TOKEN=ollama",
 	)
--- a/cmd/config/clawdbot.go
+++ b/cmd/config/clawdbot.go
@@ -1,195 +0,0 @@
-package config
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-type Clawdbot struct{}
-
-func (c *Clawdbot) String() string { return "Clawdbot" }
-
-const ansiGreen = "\033[32m"
-
-func (c *Clawdbot) Run(model string) error {
-	if _, err := exec.LookPath("clawdbot"); err != nil {
-		return fmt.Errorf("clawdbot is not installed, install from https://docs.clawd.bot")
-	}
-
-	models := []string{model}
-	if config, err := loadIntegration("clawdbot"); err == nil && len(config.Models) > 0 {
-		models = config.Models
-	}
-	if err := c.Edit(models); err != nil {
-		return fmt.Errorf("setup failed: %w", err)
-	}
-
-	cmd := exec.Command("clawdbot", "gateway")
-	cmd.Stdin = os.Stdin
-
-	// Capture output to detect "already running" message
-	var outputBuf bytes.Buffer
-	cmd.Stdout = io.MultiWriter(os.Stdout, &outputBuf)
-	cmd.Stderr = io.MultiWriter(os.Stderr, &outputBuf)
-
-	err := cmd.Run()
-	if err != nil && strings.Contains(outputBuf.String(), "Gateway already running") {
-		fmt.Fprintf(os.Stderr, "%sClawdbot has been configured with Ollama. Gateway is already running.%s\n", ansiGreen, ansiReset)
-		return nil
-	}
-	return err
-}
-
-func (c *Clawdbot) Paths() []string {
-	home, _ := os.UserHomeDir()
-	p := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if _, err := os.Stat(p); err == nil {
-		return []string{p}
-	}
-	return nil
-}
-
-func (c *Clawdbot) Edit(models []string) error {
-	if len(models) == 0 {
-		return nil
-	}
-
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-
-	configPath := filepath.Join(home, ".clawdbot", "clawdbot.json")
-	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
-		return err
-	}
-
-	// Read into map[string]any to preserve unknown fields
-	config := make(map[string]any)
-	if data, err := os.ReadFile(configPath); err == nil {
-		_ = json.Unmarshal(data, &config)
-	}
-
-	// Navigate/create: models.providers.ollama (preserving other providers)
-	modelsSection, _ := config["models"].(map[string]any)
-	if modelsSection == nil {
-		modelsSection = make(map[string]any)
-	}
-	providers, _ := modelsSection["providers"].(map[string]any)
-	if providers == nil {
-		providers = make(map[string]any)
-	}
-	ollama, _ := providers["ollama"].(map[string]any)
-	if ollama == nil {
-		ollama = make(map[string]any)
-	}
-
-	ollama["baseUrl"] = envconfig.Host().String() + "/v1"
-	// needed to register provider
-	ollama["apiKey"] = "ollama-local"
-	// TODO(parthsareen): potentially move to responses
-	ollama["api"] = "openai-completions"
-
-	// Build map of existing models to preserve user customizations
-	existingModels, _ := ollama["models"].([]any)
-	existingByID := make(map[string]map[string]any)
-	for _, m := range existingModels {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				existingByID[id] = entry
-			}
-		}
-	}
-
-	var newModels []any
-	for _, model := range models {
-		entry := map[string]any{
-			"id":        model,
-			"name":      model,
-			"reasoning": false,
-			"input":     []any{"text"},
-			"cost": map[string]any{
-				"input":      0,
-				"output":     0,
-				"cacheRead":  0,
-				"cacheWrite": 0,
-			},
-			// TODO(parthsareen): get these values from API
-			"contextWindow": 131072,
-			"maxTokens":     16384,
-		}
-		// Merge existing fields (user customizations)
-		if existing, ok := existingByID[model]; ok {
-			for k, v := range existing {
-				if _, isNew := entry[k]; !isNew {
-					entry[k] = v
-				}
-			}
-		}
-		newModels = append(newModels, entry)
-	}
-	ollama["models"] = newModels
-
-	providers["ollama"] = ollama
-	modelsSection["providers"] = providers
-	config["models"] = modelsSection
-
-	// Update agents.defaults.model.primary (preserving other agent settings)
-	agents, _ := config["agents"].(map[string]any)
-	if agents == nil {
-		agents = make(map[string]any)
-	}
-	defaults, _ := agents["defaults"].(map[string]any)
-	if defaults == nil {
-		defaults = make(map[string]any)
-	}
-	modelConfig, _ := defaults["model"].(map[string]any)
-	if modelConfig == nil {
-		modelConfig = make(map[string]any)
-	}
-	modelConfig["primary"] = "ollama/" + models[0]
-	defaults["model"] = modelConfig
-	agents["defaults"] = defaults
-	config["agents"] = agents
-
-	data, err := json.MarshalIndent(config, "", "  ")
-	if err != nil {
-		return err
-	}
-	return writeWithBackup(configPath, data)
-}
-
-func (c *Clawdbot) Models() []string {
-	home, err := os.UserHomeDir()
-	if err != nil {
-		return nil
-	}
-
-	config, err := readJSONFile(filepath.Join(home, ".clawdbot", "clawdbot.json"))
-	if err != nil {
-		return nil
-	}
-
-	modelsSection, _ := config["models"].(map[string]any)
-	providers, _ := modelsSection["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-
-	var result []string
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if id, ok := entry["id"].(string); ok {
-				result = append(result, id)
-			}
-		}
-	}
-	return result
-}
--- a/cmd/config/clawdbot_test.go
+++ b/cmd/config/clawdbot_test.go
@@ -1,625 +0,0 @@
-package config
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-func TestClawdbotIntegration(t *testing.T) {
-	c := &Clawdbot{}
-
-	t.Run("String", func(t *testing.T) {
-		if got := c.String(); got != "Clawdbot" {
-			t.Errorf("String() = %q, want %q", got, "Clawdbot")
-		}
-	})
-
-	t.Run("implements Runner", func(t *testing.T) {
-		var _ Runner = c
-	})
-
-	t.Run("implements Editor", func(t *testing.T) {
-		var _ Editor = c
-	})
-}
-
-func TestClawdbotEdit(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("fresh install", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "llama3.2")
-		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("multiple models - first is primary", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "llama3.2")
-		assertClawdbotModelExists(t, configPath, "mistral")
-		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2")
-	})
-
-	t.Run("preserve other providers", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"anthropic":{"apiKey":"xxx"}}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		if providers["anthropic"] == nil {
-			t.Error("anthropic provider was removed")
-		}
-	})
-
-	t.Run("preserve top-level keys", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"theme":"dark","mcp":{"servers":{}}}`), 0o644)
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		if cfg["theme"] != "dark" {
-			t.Error("theme was removed")
-		}
-		if cfg["mcp"] == nil {
-			t.Error("mcp was removed")
-		}
-	})
-
-	t.Run("preserve user customizations on models", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2"})
-
-		// User adds custom field
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		models := cfg["models"].(map[string]any)
-		providers := models["providers"].(map[string]any)
-		ollama := providers["ollama"].(map[string]any)
-		modelList := ollama["models"].([]any)
-		entry := modelList[0].(map[string]any)
-		entry["customField"] = "user-value"
-		configData, _ := json.MarshalIndent(cfg, "", "  ")
-		os.WriteFile(configPath, configData, 0o644)
-
-		// Re-run Edit
-		c.Edit([]string{"llama3.2"})
-
-		data, _ = os.ReadFile(configPath)
-		json.Unmarshal(data, &cfg)
-		models = cfg["models"].(map[string]any)
-		providers = models["providers"].(map[string]any)
-		ollama = providers["ollama"].(map[string]any)
-		modelList = ollama["models"].([]any)
-		entry = modelList[0].(map[string]any)
-		if entry["customField"] != "user-value" {
-			t.Error("custom field was lost")
-		}
-	})
-
-	t.Run("edit replaces models list", func(t *testing.T) {
-		cleanup()
-		c.Edit([]string{"llama3.2", "mistral"})
-		c.Edit([]string{"llama3.2"})
-
-		assertClawdbotModelExists(t, configPath, "llama3.2")
-		assertClawdbotModelNotExists(t, configPath, "mistral")
-	})
-
-	t.Run("empty models is no-op", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		original := `{"existing":"data"}`
-		os.WriteFile(configPath, []byte(original), 0o644)
-
-		c.Edit([]string{})
-
-		data, _ := os.ReadFile(configPath)
-		if string(data) != original {
-			t.Error("empty models should not modify file")
-		}
-	})
-
-	t.Run("corrupted JSON treated as empty", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		if err := json.Unmarshal(data, &cfg); err != nil {
-			t.Error("result should be valid JSON")
-		}
-	})
-
-	t.Run("wrong type models section", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"not a map"}`), 0o644)
-
-		if err := c.Edit([]string{"llama3.2"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "llama3.2")
-	})
-}
-
-func TestClawdbotModels(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-
-	t.Run("no config returns nil", func(t *testing.T) {
-		if models := c.Models(); len(models) > 0 {
-			t.Errorf("expected nil/empty, got %v", models)
-		}
-	})
-
-	t.Run("returns all ollama models", func(t *testing.T) {
-		configDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{
-			"models":{"providers":{"ollama":{"models":[
-				{"id":"llama3.2"},
-				{"id":"mistral"}
-			]}}}
-		}`), 0o644)
-
-		models := c.Models()
-		if len(models) != 2 {
-			t.Errorf("expected 2 models, got %v", models)
-		}
-	})
-}
-
-// Helper functions
-func assertClawdbotModelExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				return
-			}
-		}
-	}
-	t.Errorf("model %s not found", model)
-}
-
-func assertClawdbotModelNotExists(t *testing.T, path, model string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models, _ := cfg["models"].(map[string]any)
-	providers, _ := models["providers"].(map[string]any)
-	ollama, _ := providers["ollama"].(map[string]any)
-	modelList, _ := ollama["models"].([]any)
-	for _, m := range modelList {
-		if entry, ok := m.(map[string]any); ok {
-			if entry["id"] == model {
-				t.Errorf("model %s should not exist", model)
-			}
-		}
-	}
-}
-
-func assertClawdbotPrimaryModel(t *testing.T, path, expected string) {
-	t.Helper()
-	data, _ := os.ReadFile(path)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	agents := cfg["agents"].(map[string]any)
-	defaults := agents["defaults"].(map[string]any)
-	model := defaults["model"].(map[string]any)
-	if model["primary"] != expected {
-		t.Errorf("primary model = %v, want %v", model["primary"], expected)
-	}
-}
-
-func TestClawdbotPaths(t *testing.T) {
-	c := &Clawdbot{}
-
-	t.Run("returns path when config exists", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		configDir := filepath.Join(tmpDir, ".clawdbot")
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(filepath.Join(configDir, "clawdbot.json"), []byte(`{}`), 0o644)
-
-		paths := c.Paths()
-		if len(paths) != 1 {
-			t.Errorf("expected 1 path, got %d", len(paths))
-		}
-	})
-
-	t.Run("returns nil when config missing", func(t *testing.T) {
-		tmpDir := t.TempDir()
-		setTestHome(t, tmpDir)
-		if paths := c.Paths(); paths != nil {
-			t.Errorf("expected nil, got %v", paths)
-		}
-	})
-}
-
-func TestClawdbotModelsEdgeCases(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("corrupted JSON returns nil", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{corrupted`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at models level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":"string"}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at providers level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":"string"}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("wrong type at ollama level", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":"string"}}}`), 0o644)
-		if models := c.Models(); models != nil {
-			t.Errorf("expected nil, got %v", models)
-		}
-	})
-
-	t.Run("model entry missing id", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"name":"test"}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for missing id")
-		}
-	})
-
-	t.Run("model id is not string", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"models":{"providers":{"ollama":{"models":[{"id":123}]}}}}`), 0o644)
-		if len(c.Models()) != 0 {
-			t.Error("expected empty for non-string id")
-		}
-	})
-}
-
-func TestClawdbotEditSchemaFields(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")
-
-	if err := c.Edit([]string{"llama3.2"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	ollama := providers["ollama"].(map[string]any)
-	modelList := ollama["models"].([]any)
-	entry := modelList[0].(map[string]any)
-
-	// Verify required schema fields
-	if entry["reasoning"] != false {
-		t.Error("reasoning should be false")
-	}
-	if entry["input"] == nil {
-		t.Error("input should be set")
-	}
-	if entry["contextWindow"] == nil {
-		t.Error("contextWindow should be set")
-	}
-	if entry["maxTokens"] == nil {
-		t.Error("maxTokens should be set")
-	}
-	cost := entry["cost"].(map[string]any)
-	if cost["cacheRead"] == nil {
-		t.Error("cost.cacheRead should be set")
-	}
-	if cost["cacheWrite"] == nil {
-		t.Error("cost.cacheWrite should be set")
-	}
-}
-
-func TestClawdbotEditModelNames(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configPath := filepath.Join(tmpDir, ".clawdbot", "clawdbot.json")
-	cleanup := func() { os.RemoveAll(filepath.Join(tmpDir, ".clawdbot")) }
-
-	t.Run("model with colon tag", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"llama3.2:70b"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "llama3.2:70b")
-		assertClawdbotPrimaryModel(t, configPath, "ollama/llama3.2:70b")
-	})
-
-	t.Run("model with slash", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"library/model:tag"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "library/model:tag")
-		assertClawdbotPrimaryModel(t, configPath, "ollama/library/model:tag")
-	})
-
-	t.Run("model with hyphen", func(t *testing.T) {
-		cleanup()
-		if err := c.Edit([]string{"test-model"}); err != nil {
-			t.Fatal(err)
-		}
-		assertClawdbotModelExists(t, configPath, "test-model")
-	})
-}
-
-func TestClawdbotEditAgentsPreservation(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-	cleanup := func() { os.RemoveAll(configDir) }
-
-	t.Run("preserve other agent defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{"model":{"primary":"old"},"temperature":0.7}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		defaults := agents["defaults"].(map[string]any)
-		if defaults["temperature"] != 0.7 {
-			t.Error("temperature setting was lost")
-		}
-	})
-
-	t.Run("preserve other agents besides defaults", func(t *testing.T) {
-		cleanup()
-		os.MkdirAll(configDir, 0o755)
-		os.WriteFile(configPath, []byte(`{"agents":{"defaults":{},"custom-agent":{"foo":"bar"}}}`), 0o644)
-
-		c.Edit([]string{"llama3.2"})
-
-		data, _ := os.ReadFile(configPath)
-		var cfg map[string]any
-		json.Unmarshal(data, &cfg)
-		agents := cfg["agents"].(map[string]any)
-		if agents["custom-agent"] == nil {
-			t.Error("custom-agent was lost")
-		}
-	})
-}
-
-const testClawdbotFixture = `{
-  "theme": "dark",
-  "mcp": {"servers": {"custom": {"enabled": true}}},
-  "models": {
-    "providers": {
-      "anthropic": {"apiKey": "xxx"},
-      "ollama": {
-        "baseUrl": "http://127.0.0.1:11434/v1",
-        "models": [{"id": "old-model", "customField": "preserved"}]
-      }
-    }
-  },
-  "agents": {
-    "defaults": {"model": {"primary": "old"}, "temperature": 0.7},
-    "custom-agent": {"foo": "bar"}
-  }
-}`
-
-func TestClawdbotEdit_RoundTrip(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
-
-	if err := c.Edit([]string{"llama3.2", "mistral"}); err != nil {
-		t.Fatal(err)
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	json.Unmarshal(data, &cfg)
-
-	// Verify top-level preserved
-	if cfg["theme"] != "dark" {
-		t.Error("theme not preserved")
-	}
-	mcp := cfg["mcp"].(map[string]any)
-	servers := mcp["servers"].(map[string]any)
-	if servers["custom"] == nil {
-		t.Error("mcp.servers.custom not preserved")
-	}
-
-	// Verify other providers preserved
-	models := cfg["models"].(map[string]any)
-	providers := models["providers"].(map[string]any)
-	if providers["anthropic"] == nil {
-		t.Error("anthropic provider not preserved")
-	}
-
-	// Verify agents preserved
-	agents := cfg["agents"].(map[string]any)
-	if agents["custom-agent"] == nil {
-		t.Error("custom-agent not preserved")
-	}
-	defaults := agents["defaults"].(map[string]any)
-	if defaults["temperature"] != 0.7 {
-		t.Error("temperature not preserved")
-	}
-}
-
-func TestClawdbotEdit_Idempotent(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	firstData, _ := os.ReadFile(configPath)
-
-	c.Edit([]string{"llama3.2", "mistral"})
-	secondData, _ := os.ReadFile(configPath)
-
-	if string(firstData) != string(secondData) {
-		t.Error("repeated edits with same models produced different results")
-	}
-}
-
-func TestClawdbotEdit_MultipleConsecutiveEdits(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-
-	os.MkdirAll(configDir, 0o755)
-	os.WriteFile(configPath, []byte(testClawdbotFixture), 0o644)
-
-	for i := range 10 {
-		models := []string{"model-a", "model-b"}
-		if i%2 == 0 {
-			models = []string{"model-x", "model-y", "model-z"}
-		}
-		if err := c.Edit(models); err != nil {
-			t.Fatalf("edit %d failed: %v", i, err)
-		}
-	}
-
-	data, _ := os.ReadFile(configPath)
-	var cfg map[string]any
-	if err := json.Unmarshal(data, &cfg); err != nil {
-		t.Fatalf("file is not valid JSON after multiple edits: %v", err)
-	}
-
-	if cfg["theme"] != "dark" {
-		t.Error("theme lost after multiple edits")
-	}
-}
-
-func TestClawdbotEdit_BackupCreated(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-	configPath := filepath.Join(configDir, "clawdbot.json")
-	backupDir := filepath.Join(os.TempDir(), "ollama-backups")
-
-	os.MkdirAll(configDir, 0o755)
-	uniqueMarker := fmt.Sprintf("test-marker-%d", os.Getpid())
-	original := fmt.Sprintf(`{"theme": "%s"}`, uniqueMarker)
-	os.WriteFile(configPath, []byte(original), 0o644)
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	backups, _ := filepath.Glob(filepath.Join(backupDir, "clawdbot.json.*"))
-	foundBackup := false
-	for _, backup := range backups {
-		data, _ := os.ReadFile(backup)
-		if string(data) == original {
-			foundBackup = true
-			break
-		}
-	}
-
-	if !foundBackup {
-		t.Error("backup with original content not found")
-	}
-}
-
-func TestClawdbotEdit_CreatesDirectoryIfMissing(t *testing.T) {
-	c := &Clawdbot{}
-	tmpDir := t.TempDir()
-	setTestHome(t, tmpDir)
-	configDir := filepath.Join(tmpDir, ".clawdbot")
-
-	if _, err := os.Stat(configDir); !os.IsNotExist(err) {
-		t.Fatal("directory should not exist before test")
-	}
-
-	if err := c.Edit([]string{"model-a"}); err != nil {
-		t.Fatal(err)
-	}
-
-	if _, err := os.Stat(configDir); os.IsNotExist(err) {
-		t.Fatal("directory was not created")
-	}
-}
--- a/cmd/config/droid.go
+++ b/cmd/config/droid.go
@@ -7,8 +7,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"slices"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // Droid implements Runner and Editor for Droid integration
@@ -119,7 +117,7 @@ func (d *Droid) Edit(models []string) error {
 		newModels = append(newModels, modelEntry{
 			Model:           model,
 			DisplayName:     model,
-			BaseURL:         envconfig.Host().String() + "/v1",
+			BaseURL:         "http://localhost:11434/v1",
 			APIKey:          "ollama",
 			Provider:        "generic-chat-completion-api",
 			MaxOutputTokens: 64000,
--- a/cmd/config/droid_test.go
+++ b/cmd/config/droid_test.go
@@ -218,7 +218,7 @@ func TestDroidEdit(t *testing.T) {
 			}
 		}

-		if model["baseUrl"] != "http://127.0.0.1:11434/v1" {
+		if model["baseUrl"] != "http://localhost:11434/v1" {
 			t.Errorf("unexpected baseUrl: %s", model["baseUrl"])
 		}
 		if model["apiKey"] != "ollama" {
@@ -447,7 +447,7 @@ const testDroidSettingsFixture = `{
    {
      "model": "existing-ollama-model",
      "displayName": "existing-ollama-model",
-      "baseUrl": "http://127.0.0.1:11434/v1",
+      "baseUrl": "http://localhost:11434/v1",
      "apiKey": "ollama",
      "provider": "generic-chat-completion-api",
      "maxOutputTokens": 64000,
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -41,7 +41,6 @@ type Editor interface {
 // integrations is the registry of available integrations.
 var integrations = map[string]Runner{
 	"claude":   &Claude{},
-	"clawdbot": &Clawdbot{},
 	"codex":    &Codex{},
 	"droid":    &Droid{},
 	"opencode": &OpenCode{},
@@ -243,7 +242,6 @@ func LaunchCmd(checkServerHeartbeat func(cmd *cobra.Command, args []string) erro

 Supported integrations:
  claude    Claude Code
-  clawdbot  Clawdbot
  codex     Codex
  droid     Droid
  opencode  OpenCode
--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -9,8 +9,6 @@ import (
 	"path/filepath"
 	"slices"
 	"strings"
-
-	"github.com/ollama/ollama/envconfig"
 )

 // OpenCode implements Runner and Editor for OpenCode integration
@@ -90,7 +88,7 @@ func (o *OpenCode) Edit(modelList []string) error {
 			"npm":  "@ai-sdk/openai-compatible",
 			"name": "Ollama (local)",
 			"options": map[string]any{
-				"baseURL": envconfig.Host().String() + "/v1",
+				"baseURL": "http://localhost:11434/v1",
 			},
 		}
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -313,6 +313,8 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &deepseek2Model{}
 	case "Glm4MoeLiteForCausalLM":
 		conv = &glm4MoeLiteModel{}
+	case "GlmOcrForConditionalGeneration":
+		conv = &glmOcrModel{}
 	case "Lfm2ForCausalLM":
 		conv = &lfm2Model{}
 	default:
--- a/convert/convert_glmocr.go
+++ b/convert/convert_glmocr.go
@@ -0,0 +1,469 @@
+package convert
+
+import (
+	"cmp"
+	"encoding/json"
+	"io/fs"
+	"log/slog"
+	"regexp"
+	"strconv"
+	"strings"
+
+	"github.com/ollama/ollama/fs/ggml"
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"
+)
+
+// normalToNeoXRepacker creates a repacker that permutes Q/K weights from interleaved (LLaMA)
+// to NeoX ordering for compatibility with GGML's M-RoPE kernel.
+//
+// For weights: reshape [out, in] -> [n_heads, head_dim, in], permute rotary dims, reshape back
+// For biases: reshape [out] -> [n_heads, head_dim], permute rotary dims, reshape back
+func normalToNeoXRepacker(nHeads, headDim int, partialRotaryFactor float32) func(string, []float32, []uint64) ([]float32, error) {
+	return func(_ string, data []float32, shape []uint64) ([]float32, error) {
+		rotaryDim := int(float32(headDim) * partialRotaryFactor)
+		if rotaryDim%2 != 0 {
+			rotaryDim = (rotaryDim / 2) * 2 // Round down to even
+		}
+
+		// Handle 1D (bias) or 2D (weight) tensors
+		is1D := len(shape) == 1
+		var inFeatures int
+		if is1D {
+			inFeatures = 1
+		} else {
+			inFeatures = int(shape[1])
+		}
+		outFeatures := int(shape[0])
+		nEffectiveHeads := outFeatures / headDim
+
+		if nEffectiveHeads != nHeads {
+			slog.Warn("normalToNeoX: unexpected head count", "effective", nEffectiveHeads, "expected", nHeads)
+		}
+
+		// Reshape to [n_heads, head_dim, in_features]
+		reshaped := make([]float32, len(data))
+		copy(reshaped, data)
+
+		// Permute the rotary dimensions: even indices first, then odd
+		// For each head, reorder [0,1,2,3,4,5...] to [0,2,4...,1,3,5...]
+		result := make([]float32, len(data))
+		halfRotary := rotaryDim / 2
+
+		for h := range nEffectiveHeads {
+			for f := range inFeatures {
+				for i := range halfRotary {
+					// Even dim (0, 2, 4, ...) -> position i
+					srcIdx := h*headDim*inFeatures + (2*i)*inFeatures + f
+					dstIdx := h*headDim*inFeatures + i*inFeatures + f
+					result[dstIdx] = reshaped[srcIdx]
+
+					// Odd dim (1, 3, 5, ...) -> position halfRotary + i
+					srcIdx = h*headDim*inFeatures + (2*i+1)*inFeatures + f
+					dstIdx = h*headDim*inFeatures + (halfRotary+i)*inFeatures + f
+					result[dstIdx] = reshaped[srcIdx]
+				}
+
+				// Non-rotary part: copy as-is
+				for i := rotaryDim; i < headDim; i++ {
+					srcIdx := h*headDim*inFeatures + i*inFeatures + f
+					result[srcIdx] = reshaped[srcIdx]
+				}
+			}
+		}
+
+		return result, nil
+	}
+}
+
+type glmOcrModel struct {
+	ModelParameters
+
+	TextConfig struct {
+		HiddenSize          uint32  `json:"hidden_size"`
+		IntermediateSize    uint32  `json:"intermediate_size"`
+		NumHiddenLayers     uint32  `json:"num_hidden_layers"`
+		NumAttentionHeads   uint32  `json:"num_attention_heads"`
+		NumKeyValueHeads    uint32  `json:"num_key_value_heads"`
+		HeadDim             uint32  `json:"head_dim"`
+		MaxPositionEmbed    uint32  `json:"max_position_embeddings"`
+		RMSNormEps          float32 `json:"rms_norm_eps"`
+		PartialRotaryFactor float32 `json:"partial_rotary_factor"`
+		RopeParameters      struct {
+			RopeType            string  `json:"rope_type"`
+			MRopeSection        []int32 `json:"mrope_section"`
+			RopeTheta           float32 `json:"rope_theta"`
+			PartialRotaryFactor float32 `json:"partial_rotary_factor"`
+		} `json:"rope_parameters"`
+	} `json:"text_config"`
+
+	VisionConfig struct {
+		HiddenSize        uint32  `json:"hidden_size"`
+		IntermediateSize  uint32  `json:"intermediate_size"`
+		Depth             uint32  `json:"depth"`
+		NumHeads          uint32  `json:"num_heads"`
+		ImageSize         uint32  `json:"image_size"`
+		PatchSize         uint32  `json:"patch_size"`
+		OutHiddenSize     uint32  `json:"out_hidden_size"`
+		RMSNormEps        float32 `json:"rms_norm_eps"`
+		SpatialMergeSize  uint32  `json:"spatial_merge_size"`
+		TemporalPatchSize uint32  `json:"temporal_patch_size"`
+	} `json:"vision_config"`
+
+	ImageStartTokenID uint32 `json:"image_start_token_id"`
+	ImageEndTokenID   uint32 `json:"image_end_token_id"`
+	VideoStartTokenID uint32 `json:"video_start_token_id"`
+	VideoEndTokenID   uint32 `json:"video_end_token_id"`
+	ImageTokenID      uint32 `json:"image_token_id"`
+	VideoTokenID      uint32 `json:"video_token_id"`
+
+	// Preprocessor config (preprocessor_config.json)
+	Preprocessor struct {
+		Size struct {
+			ShortestEdge uint32 `json:"shortest_edge"`
+			LongestEdge  uint32 `json:"longest_edge"`
+		} `json:"size"`
+		PatchSize         uint32    `json:"patch_size"`
+		TemporalPatchSize uint32    `json:"temporal_patch_size"`
+		MergeSize         uint32    `json:"merge_size"`
+		ImageMean         []float32 `json:"image_mean"`
+		ImageStd          []float32 `json:"image_std"`
+	} `json:"-"`
+}
+
+var _ ModelConverter = (*glmOcrModel)(nil)
+
+func (m *glmOcrModel) parseMore(fsys fs.FS) error {
+	bts, err := fs.ReadFile(fsys, "preprocessor_config.json")
+	if err != nil {
+		return err
+	}
+
+	return json.Unmarshal(bts, &m.Preprocessor)
+}
+
+func (m *glmOcrModel) KV(t *Tokenizer) KV {
+	kv := m.ModelParameters.KV(t)
+	kv["general.architecture"] = "glmocr"
+
+	// Text model parameters
+	kv["glmocr.block_count"] = cmp.Or(m.TextConfig.NumHiddenLayers, 16)
+	kv["glmocr.embedding_length"] = cmp.Or(m.TextConfig.HiddenSize, 1536)
+	kv["glmocr.attention.head_count"] = cmp.Or(m.TextConfig.NumAttentionHeads, 16)
+	kv["glmocr.attention.head_count_kv"] = cmp.Or(m.TextConfig.NumKeyValueHeads, 8)
+	headDim := cmp.Or(m.TextConfig.HeadDim, m.TextConfig.HiddenSize/m.TextConfig.NumAttentionHeads)
+	kv["glmocr.attention.key_length"] = headDim
+	kv["glmocr.attention.value_length"] = headDim
+	kv["glmocr.feed_forward_length"] = cmp.Or(m.TextConfig.IntermediateSize, 4608)
+	kv["glmocr.attention.layer_norm_rms_epsilon"] = cmp.Or(m.TextConfig.RMSNormEps, 1e-5)
+	kv["glmocr.context_length"] = cmp.Or(m.TextConfig.MaxPositionEmbed, 131072)
+	kv["glmocr.rope.freq_base"] = cmp.Or(m.TextConfig.RopeParameters.RopeTheta, float32(10000))
+	kv["glmocr.rope.partial_rotary_factor"] = cmp.Or(m.TextConfig.RopeParameters.PartialRotaryFactor, m.TextConfig.PartialRotaryFactor, float32(1.0))
+	if len(m.TextConfig.RopeParameters.MRopeSection) > 0 {
+		kv["glmocr.rope.mrope_section"] = m.TextConfig.RopeParameters.MRopeSection
+	}
+
+	// Vision model parameters
+	kv["glmocr.vision.block_count"] = cmp.Or(m.VisionConfig.Depth, 24)
+	kv["glmocr.vision.embedding_length"] = cmp.Or(m.VisionConfig.HiddenSize, 1024)
+	kv["glmocr.vision.attention.head_count"] = cmp.Or(m.VisionConfig.NumHeads, 16)
+	kv["glmocr.vision.image_size"] = cmp.Or(m.VisionConfig.ImageSize, 336)
+	kv["glmocr.vision.patch_size"] = cmp.Or(m.VisionConfig.PatchSize, m.Preprocessor.PatchSize, 14)
+	kv["glmocr.vision.spatial_merge_size"] = cmp.Or(m.VisionConfig.SpatialMergeSize, m.Preprocessor.MergeSize, 2)
+	kv["glmocr.vision.temporal_patch_size"] = cmp.Or(m.VisionConfig.TemporalPatchSize, m.Preprocessor.TemporalPatchSize, 2)
+	kv["glmocr.vision.out_hidden_size"] = cmp.Or(m.VisionConfig.OutHiddenSize, 1536)
+	kv["glmocr.vision.intermediate_size"] = cmp.Or(m.VisionConfig.IntermediateSize, 4096)
+	kv["glmocr.vision.attention.layer_norm_rms_epsilon"] = cmp.Or(m.VisionConfig.RMSNormEps, 1e-5)
+
+	// Preprocessor-derived image settings (min/max pixels and normalization)
+	// Note: fs.Config.keyValue() auto-prepends architecture prefix, so use full key
+	if m.Preprocessor.Size.ShortestEdge > 0 {
+		kv["glmocr.vision.min_pixels"] = m.Preprocessor.Size.ShortestEdge
+	}
+	if m.Preprocessor.Size.LongestEdge > 0 {
+		kv["glmocr.vision.max_pixels"] = m.Preprocessor.Size.LongestEdge
+	}
+	if len(m.Preprocessor.ImageMean) == 3 {
+		kv["glmocr.vision.image_mean"] = m.Preprocessor.ImageMean
+	}
+	if len(m.Preprocessor.ImageStd) == 3 {
+		kv["glmocr.vision.image_std"] = m.Preprocessor.ImageStd
+	}
+
+	// Special tokens
+	kv["glmocr.image_token_id"] = m.ImageTokenID
+	kv["glmocr.image_start_token_id"] = m.ImageStartTokenID
+	kv["glmocr.image_end_token_id"] = m.ImageEndTokenID
+	kv["glmocr.video_token_id"] = m.VideoTokenID
+	kv["glmocr.video_start_token_id"] = m.VideoStartTokenID
+	kv["glmocr.video_end_token_id"] = m.VideoEndTokenID
+
+	return kv
+}
+
+func (m *glmOcrModel) Tensors(ts []Tensor) []*ggml.Tensor {
+	var out []*ggml.Tensor
+
+	// Skip layers >= num_hidden_layers (Multi-Token Prediction layers not needed for basic inference)
+	numLayers := int(cmp.Or(m.TextConfig.NumHiddenLayers, 16))
+	skipLayer := func(name string) bool {
+		// Tensor names are already replaced to "blk.N.xxx" format
+		re := regexp.MustCompile(`^blk\.(\d+)`)
+		matches := re.FindStringSubmatch(name)
+		if matches == nil {
+			return false
+		}
+		blkNum, err := strconv.Atoi(matches[1])
+		if err != nil {
+			return false
+		}
+		return blkNum >= numLayers
+	}
+
+	for _, t := range ts {
+		name := t.Name()
+
+		// Skip next-n prediction layers (layers >= num_hidden_layers)
+		if skipLayer(name) {
+			continue
+		}
+
+		// Split ffn_gate_up into separate gate and up projections
+		if strings.Contains(name, "ffn_gate_up") {
+			for t := range splitDim(t, 0,
+				split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_gate")},
+				split{Replacer: strings.NewReplacer("ffn_gate_up", "ffn_up")},
+			) {
+				out = append(out, t)
+			}
+			continue
+		}
+
+		// Split 5D Conv3D patch_embed weight into two Conv2D weights along temporal dimension
+		// Shape: [out_channels, in_channels, temporal=2, height, width] -> 2x [out_channels, in_channels, height, width]
+		// NOTE: Tensor names are already renamed via Replacements() before Tensors() is called,
+		// so we check for "patch_embd" (renamed) not "patch_embed" (original safetensors name)
+		// NOTE: Ollama Conv2D expects PyTorch format [OC, IC, KH, KW] - no transpose needed
+		if strings.HasSuffix(name, "patch_embd.weight") {
+			shape := t.Shape()
+			if len(shape) == 5 && shape[2] == 2 {
+				// Original shape: [OC, IC, 2, KH, KW] -> [OC, IC, KH, KW] (PyTorch format, no transpose)
+				newShape := []uint64{shape[0], shape[1], shape[3], shape[4]}
+
+				// Create repacker for first temporal slice (t=0)
+				t0 := t.Clone()
+				t0.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
+					dims := make([]int, len(shape))
+					for i := range shape {
+						dims[i] = int(shape[i])
+					}
+					var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+					// Slice first temporal frame: [:, :, 0, :, :]
+					tt, err := tt.Slice(nil, nil, tensor.S(0, 1), nil, nil)
+					if err != nil {
+						return nil, err
+					}
+					tt = tensor.Materialize(tt)
+					// Reshape to 4D by squeezing temporal dim [OC, IC, 1, KH, KW] -> [OC, IC, KH, KW]
+					newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
+					if err := tt.Reshape(newDims...); err != nil {
+						return nil, err
+					}
+					// No transpose - keep PyTorch format
+					if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
+						return nil, err
+					}
+					return native.VectorF32(tt.(*tensor.Dense))
+				})
+				out = append(out, &ggml.Tensor{
+					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
+					Kind:     t.Kind(),
+					Shape:    newShape,
+					WriterTo: t0,
+				})
+
+				// Create repacker for second temporal slice (t=1)
+				t1 := t.Clone()
+				t1.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
+					dims := make([]int, len(shape))
+					for i := range shape {
+						dims[i] = int(shape[i])
+					}
+					var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+					// Slice second temporal frame: [:, :, 1, :, :]
+					tt, err := tt.Slice(nil, nil, tensor.S(1, 2), nil, nil)
+					if err != nil {
+						return nil, err
+					}
+					tt = tensor.Materialize(tt)
+					// Reshape to 4D by squeezing temporal dim [OC, IC, 1, KH, KW] -> [OC, IC, KH, KW]
+					newDims := []int{int(shape[0]), int(shape[1]), int(shape[3]), int(shape[4])}
+					if err := tt.Reshape(newDims...); err != nil {
+						return nil, err
+					}
+					// No transpose - keep PyTorch format
+					if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
+						return nil, err
+					}
+					return native.VectorF32(tt.(*tensor.Dense))
+				})
+				out = append(out, &ggml.Tensor{
+					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_1.weight", 1),
+					Kind:     t.Kind(),
+					Shape:    newShape,
+					WriterTo: t1,
+				})
+
+				continue
+			}
+
+			if len(shape) == 4 {
+				out = append(out, &ggml.Tensor{
+					Name:     strings.Replace(name, "patch_embd.weight", "patch_embd_0.weight", 1),
+					Kind:     t.Kind(),
+					Shape:    t.Shape(),
+					WriterTo: t,
+				})
+				continue
+			}
+
+			slog.Warn("glmocr: patch_embed weight has unexpected shape - not splitting", "shape", shape)
+			// Fall through to default handling
+		}
+
+		// Handle pre-split patch embedding weights
+		// Pattern 1: v.patch_embd.0.weight, v.patch_embd.1.weight -> patch_embd_0.weight, patch_embd_1.weight
+		// Pattern 2: v.patch_embd.weight.0, v.patch_embd.weight.1 -> patch_embd_0.weight, patch_embd_1.weight
+		if strings.Contains(name, "patch_embd.0.") {
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(name, "patch_embd.0.", "patch_embd_0.", 1),
+				Kind:     t.Kind(),
+				Shape:    t.Shape(),
+				WriterTo: t,
+			})
+			continue
+		}
+		if strings.Contains(name, "patch_embd.1.") {
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(name, "patch_embd.1.", "patch_embd_1.", 1),
+				Kind:     t.Kind(),
+				Shape:    t.Shape(),
+				WriterTo: t,
+			})
+			continue
+		}
+		// Handle .weight.0 and .weight.1 suffix patterns
+		if strings.HasSuffix(name, "patch_embd.weight.0") {
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(name, "patch_embd.weight.0", "patch_embd_0.weight", 1),
+				Kind:     t.Kind(),
+				Shape:    t.Shape(),
+				WriterTo: t,
+			})
+			continue
+		}
+		if strings.HasSuffix(name, "patch_embd.weight.1") {
+			out = append(out, &ggml.Tensor{
+				Name:     strings.Replace(name, "patch_embd.weight.1", "patch_embd_1.weight", 1),
+				Kind:     t.Kind(),
+				Shape:    t.Shape(),
+				WriterTo: t,
+			})
+			continue
+		}
+
+		// Permute Q/K weights for M-RoPE compatibility (interleaved -> NeoX ordering)
+		// GGML's M-RoPE kernel uses NeoX-style rotation, but GLM-OCR uses interleaved (LLaMA-style)
+		// We permute at conversion time so the weights work correctly with GGML's kernel
+		// This aligns Q/K rotary dimensions with GGML's NeoX-style rotation
+		if len(m.TextConfig.RopeParameters.MRopeSection) > 0 &&
+			strings.Contains(name, "blk.") && (strings.Contains(name, "attn_q.") || strings.Contains(name, "attn_k.")) {
+			// Get config values for permutation
+			nHeads := int(cmp.Or(m.TextConfig.NumAttentionHeads, 16))
+			nKVHeads := int(cmp.Or(m.TextConfig.NumKeyValueHeads, 8))
+			hiddenSize := int(cmp.Or(m.TextConfig.HiddenSize, 1536))
+			headDim := int(cmp.Or(m.TextConfig.HeadDim, uint32(hiddenSize/nHeads)))
+			partialRotaryFactor := cmp.Or(m.TextConfig.PartialRotaryFactor, m.TextConfig.RopeParameters.PartialRotaryFactor, float32(1.0))
+
+			// Use appropriate head count: nHeads for Q, nKVHeads for K
+			effectiveHeads := nHeads
+			if strings.Contains(name, "attn_k.") {
+				effectiveHeads = nKVHeads
+			}
+
+			permutedT := t.Clone()
+			permutedT.SetRepacker(normalToNeoXRepacker(effectiveHeads, headDim, partialRotaryFactor))
+			out = append(out, &ggml.Tensor{
+				Name:     name,
+				Kind:     t.Kind(),
+				Shape:    t.Shape(),
+				WriterTo: permutedT,
+			})
+			continue
+		}
+
+		out = append(out, &ggml.Tensor{
+			Name:     name,
+			Kind:     t.Kind(),
+			Shape:    t.Shape(),
+			WriterTo: t,
+		})
+	}
+
+	return out
+}
+
+func (m *glmOcrModel) Replacements() []string {
+	return []string{
+		// Vision encoder
+		"model.visual.patch_embed.proj_1", "v.patch_embd_1", // Second temporal split
+		"model.visual.patch_embed.proj", "v.patch_embd",
+		"model.visual.blocks", "v.blk",
+		"model.visual.post_layernorm", "v.post_ln",
+		"model.visual.downsample", "mm.patch_merger",
+
+		// Vision attention
+		"attn.qkv", "attn_qkv",
+		"attn.proj", "attn_out",
+		"attn.q_norm", "attn_q_norm",
+		"attn.k_norm", "attn_k_norm",
+
+		// Vision norms
+		"norm1", "ln1",
+		"norm2", "ln2",
+
+		// Vision MLP
+		"mlp.gate_proj", "ffn_gate",
+		"mlp.up_proj", "ffn_up",
+		"mlp.down_proj", "ffn_down",
+
+		// Merger (multimodal projector)
+		"model.visual.merger.proj", "mm.model.fc",
+		"model.visual.merger.post_projection_norm", "mm.post_norm",
+		"model.visual.merger.gate_proj", "mm.gate",
+		"model.visual.merger.up_proj", "mm.up",
+		"model.visual.merger.down_proj", "mm.down",
+
+		// Language model
+		"model.language_model.embed_tokens", "token_embd",
+		"model.language_model.layers", "blk",
+		"model.language_model.norm", "output_norm",
+		"lm_head", "output",
+
+		// Language model attention
+		"self_attn.q_proj", "attn_q",
+		"self_attn.k_proj", "attn_k",
+		"self_attn.v_proj", "attn_v",
+		"self_attn.o_proj", "attn_out",
+
+		// Language model norms
+		"input_layernorm", "attn_norm",
+		"post_attention_layernorm", "ffn_norm",
+		"post_self_attn_layernorm", "post_attn_norm",
+		"post_mlp_layernorm", "post_ffn_norm",
+
+		// Language model MLP (remove mlp. prefix so ffn_* names work)
+		"mlp.gate_up_proj", "ffn_gate_up",
+		"mlp.down_proj", "ffn_down",
+	}
+}
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -99,6 +99,7 @@ func (st safetensor) Kind() uint32 {
 	if st.dtype == "BF16" &&
 		!strings.HasPrefix(st.name, "v.") &&
 		!strings.HasPrefix(st.name, "s.") &&
+		!strings.HasPrefix(st.name, "mm.") &&
 		kind != tensorKindFP32 {
 		kind = tensorKindBF16
 	}
--- a/docs/api.md
+++ b/docs/api.md
@@ -15,7 +15,6 @@
 - [Push a Model](#push-a-model)
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
- [Usage](#usage)
 - [Version](#version)
 - [Experimental: Image Generation](#image-generation-experimental)

@@ -1855,53 +1854,6 @@ curl http://localhost:11434/api/embeddings -d '{
 }
 ```

-## Usage
-
-```
-GET /api/usage
-```
-
-Show aggregate usage statistics per model since the server started. All timestamps are UTC in RFC 3339 format.
-
-### Examples
-
-#### Request
-
-```shell
-curl http://localhost:11434/api/usage
-```
-
-#### Response
-
-```json
-{
-  "start": "2025-01-27T20:00:00Z",
-  "usage": [
-    {
-      "model": "llama3.2",
-      "requests": 5,
-      "prompt_tokens": 130,
-      "completion_tokens": 890
-    },
-    {
-      "model": "deepseek-r1",
-      "requests": 2,
-      "prompt_tokens": 48,
-      "completion_tokens": 312
-    }
-  ]
-}
-```
-
-#### Response fields
-
- `start`: when the server started tracking usage (UTC, RFC 3339)
- `usage`: list of per-model usage statistics
-  - `model`: model name
-  - `requests`: total number of completed requests
-  - `prompt_tokens`: total prompt tokens evaluated
-  - `completion_tokens`: total completion tokens generated
-
 ## Version

 ```
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -102,7 +102,6 @@
            "group": "Integrations",
            "pages": [
              "/integrations/claude-code",
-              "/integrations/clawdbot",
              "/integrations/cline",
              "/integrations/codex",
              "/integrations/droid",
--- a/docs/integrations/clawdbot.mdx
+++ b/docs/integrations/clawdbot.mdx
@@ -1,48 +0,0 @@
---
-title: Clawdbot
---
-
-Clawdbot is a personal AI assistant that runs on your own devices. It bridges messaging services (WhatsApp, Telegram, Slack, Discord, iMessage, and more) to AI coding agents through a centralized gateway.
-
-## Install
-
-Install [Clawdbot](https://clawd.bot/) 
-
-```bash
-npm install -g clawdbot@latest
-```
-
-Then run the onboarding wizard:
-
-```bash
-clawdbot onboard --install-daemon
-```
-
-<Note>Clawdbot requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
-
-## Usage with Ollama
-
-### Quick setup
-
-```bash
-ollama launch clawdbot
-```
-
-This configures Clawdbot to use Ollama and starts the gateway.
-If the gateway is already running, no changes need to be made as the gateway will auto-reload the changes. 
-
-
-To configure without launching:
-
-```shell
-ollama launch clawdbot --config
-```
-
-## Recommended Models
-
- `qwen3-coder`
- `glm-4.7`
- `gpt-oss:20b`
- `gpt-oss:120b`
-
-Cloud models are also available at [ollama.com/search?c=cloud](https://ollama.com/search?c=cloud).
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -270,6 +270,7 @@ func (kv KV) OllamaEngineRequired() bool {
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
 		"glm4moelite",
+		"glmocr",
 		"lfm2",
 	}, kv.Architecture())
 }
@@ -859,6 +860,7 @@ func (f GGML) FlashAttention() bool {
 		"bert",
 		"gemma3",
 		"glm4moelite",
+		"glmocr",
 		"gptoss", "gpt-oss",
 		"lfm2",
 		"mistral3",
--- a/llm/server.go
+++ b/llm/server.go
@@ -242,7 +242,6 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 	} else {
 		// For Ollama engine, use our SupportsFlashAttention logic
 		if fa {
-			slog.Info("enabling flash attention")
 			loadRequest.FlashAttention = ml.FlashAttentionEnabled

 			// Flash Attention also supports kv cache quantization
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -170,6 +170,7 @@ type Tensor interface {
 	Cos(ctx Context) Tensor
 	Tanh(ctx Context) Tensor
 	GELU(ctx Context, up ...Tensor) Tensor
+	GELU_ERF(ctx Context) Tensor
 	QuickGELU(ctx Context, up ...Tensor) Tensor
 	SILU(ctx Context, up ...Tensor) Tensor
 	RELU(ctx Context, up ...Tensor) Tensor
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1581,6 +1581,13 @@ func (t *Tensor) GELU(ctx ml.Context, t2 ...ml.Tensor) ml.Tensor {
 	}
 }

+func (t *Tensor) GELU_ERF(ctx ml.Context) ml.Tensor {
+	return &Tensor{
+		b: t.b,
+		t: C.ggml_gelu_erf_inplace(ctx.(*Context).ctx, t.t),
+	}
+}
+
 func (t *Tensor) QuickGELU(ctx ml.Context, t2 ...ml.Tensor) ml.Tensor {
 	var tt *C.struct_ggml_tensor
 	if len(t2) > 0 {
--- a/model/imageproc/images.go
+++ b/model/imageproc/images.go
@@ -20,6 +20,7 @@ const (
 	ResizeBilinear = iota
 	ResizeNearestNeighbor
 	ResizeApproxBilinear
+	ResizeBicubic
 	ResizeCatmullrom
 )

@@ -45,6 +46,7 @@ func Resize(img image.Image, newSize image.Point, method int) image.Image {
 		ResizeBilinear:        draw.BiLinear,
 		ResizeNearestNeighbor: draw.NearestNeighbor,
 		ResizeApproxBilinear:  draw.ApproxBiLinear,
+		ResizeBicubic:         draw.CatmullRom,
 		ResizeCatmullrom:      draw.CatmullRom,
 	}

--- a/model/models/glmocr/imageprocessor.go
+++ b/model/models/glmocr/imageprocessor.go
@@ -0,0 +1,171 @@
+package glmocr
+
+import (
+	"image"
+	"math"
+
+	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/model/imageproc"
+)
+
+type ImageProcessor struct {
+	imageSize         int
+	patchSize         int
+	temporalPatchSize int
+	spatialMergeSize  int
+	minPixels         int
+	maxPixels         int
+	factor            int
+	imageMean         [3]float32
+	imageStd          [3]float32
+}
+
+func newImageProcessor(c fs.Config) ImageProcessor {
+	patchSize := int(c.Uint("vision.patch_size", 14))
+	spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
+	temporalPatchSize := int(c.Uint("vision.temporal_patch_size", 2))
+
+	// Read normalization values from config if available, otherwise use CLIP defaults
+	imageMean := c.Floats("vision.image_mean", imageproc.ClipDefaultMean[:])
+	imageStd := c.Floats("vision.image_std", imageproc.ClipDefaultSTD[:])
+
+	// Default max_pixels: 2048 * patchSize² * mergeSize² * temporal = ~3.2M pixels
+	// This limits to ~16k patches (4k output tokens) to keep memory stable without flash attention
+	defaultMaxPixels := 2048 * patchSize * patchSize * spatialMergeSize * spatialMergeSize * temporalPatchSize
+
+	return ImageProcessor{
+		imageSize:         int(c.Uint("vision.image_size", 336)),
+		patchSize:         patchSize,
+		temporalPatchSize: temporalPatchSize,
+		spatialMergeSize:  spatialMergeSize,
+		minPixels:         int(c.Uint("vision.min_pixels", uint32(8*patchSize*patchSize*spatialMergeSize*spatialMergeSize*temporalPatchSize))),
+		maxPixels:         int(c.Uint("vision.max_pixels", uint32(defaultMaxPixels))),
+		factor:            patchSize * spatialMergeSize,
+		imageMean:         [3]float32{imageMean[0], imageMean[1], imageMean[2]},
+		imageStd:          [3]float32{imageStd[0], imageStd[1], imageStd[2]},
+	}
+}
+
+func (p *ImageProcessor) SmartResize(height, width int) (int, int) {
+	factor := p.factor
+	temporalFactor := p.temporalPatchSize
+	numFrames := temporalFactor // single image
+
+	if height < factor || width < factor {
+		// Scale up small images
+		scale := float64(factor) / float64(min(height, width))
+		height = int(math.Ceil(float64(height) * scale))
+		width = int(math.Ceil(float64(width) * scale))
+	}
+
+	if temporalFactor <= 0 {
+		panic("temporal_patch_size must be > 0")
+	}
+	if numFrames < temporalFactor {
+		panic("num_frames must be >= temporal_patch_size")
+	}
+	if aspectRatio := float64(max(height, width)) / float64(min(height, width)); aspectRatio > 200 {
+		panic("absolute aspect ratio must be smaller than 200")
+	}
+
+	round := func(x float64) int { return int(math.RoundToEven(x)) }
+
+	hBar := round(float64(height)/float64(factor)) * factor
+	wBar := round(float64(width)/float64(factor)) * factor
+	tBar := round(float64(numFrames)/float64(temporalFactor)) * temporalFactor
+
+	if tBar*hBar*wBar > p.maxPixels {
+		beta := math.Sqrt(float64(numFrames*height*width) / float64(p.maxPixels))
+		hBar = int(math.Floor(float64(height)/beta/float64(factor))) * factor
+		wBar = int(math.Floor(float64(width)/beta/float64(factor))) * factor
+	} else if tBar*hBar*wBar < p.minPixels {
+		beta := math.Sqrt(float64(p.minPixels) / float64(numFrames*height*width))
+		hBar = int(math.Ceil(float64(height)*beta/float64(factor))) * factor
+		wBar = int(math.Ceil(float64(width)*beta/float64(factor))) * factor
+	}
+
+	return hBar, wBar
+}
+
+func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, *Grid, error) {
+	img = imageproc.Composite(img)
+
+	origWidth := img.Bounds().Dx()
+	origHeight := img.Bounds().Dy()
+
+	// Calculate smart resize dimensions
+	resizedHeight, resizedWidth := p.SmartResize(origHeight, origWidth)
+
+	// Resize image
+	resizedImg := imageproc.Resize(img, image.Point{X: resizedWidth, Y: resizedHeight}, imageproc.ResizeBicubic)
+
+	// Normalize pixels - output format is [C, H, W] with rescale and channelFirst
+	// We keep [C, H, W] for patch extraction
+	normalizedPixels := imageproc.Normalize(resizedImg, p.imageMean, p.imageStd, true, true)
+
+	// Calculate grid dimensions (after Conv2D patching)
+	grid := &Grid{
+		Height:      resizedHeight / p.patchSize,
+		Width:       resizedWidth / p.patchSize,
+		Temporal:    1, // Single image
+		ImageHeight: resizedHeight,
+		ImageWidth:  resizedWidth,
+	}
+
+	patches, err := p.createPatches(normalizedPixels, resizedHeight, resizedWidth, grid)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return patches, grid, nil
+}
+
+func (p *ImageProcessor) createPatches(pixels []float32, height, width int, grid *Grid) ([]float32, error) {
+	channels := 3
+	patchSize := p.patchSize
+	mergeSize := p.spatialMergeSize
+	temporalPatchSize := p.temporalPatchSize
+
+	numPatches := grid.Temporal * grid.Height * grid.Width
+	patchDim := channels * temporalPatchSize * patchSize * patchSize
+	result := make([]float32, numPatches*patchDim)
+	patchIndex := 0
+
+	// Single temporal frame handling (copies to all frames)
+	for range grid.Temporal {
+		for h := 0; h < grid.Height; h += mergeSize {
+			for w := 0; w < grid.Width; w += mergeSize {
+				for mh := range mergeSize {
+					for mw := range mergeSize {
+						baseOffset := patchIndex * patchDim
+						for c := range channels {
+							channelOffset := baseOffset + (c * temporalPatchSize * patchSize * patchSize)
+							for py := range patchSize {
+								for px := range patchSize {
+									y := (h+mh)*patchSize + py
+									x := (w+mw)*patchSize + px
+									srcIdx := c*height*width + y*width + x
+									dstIdx := channelOffset + (py * patchSize) + px
+									result[dstIdx] = pixels[srcIdx]
+								}
+							}
+
+							if temporalPatchSize > 1 {
+								frameSize := patchSize * patchSize
+								for tp := 1; tp < temporalPatchSize; tp++ {
+									currentFrameOffset := channelOffset + (tp * frameSize)
+									copy(result[currentFrameOffset:currentFrameOffset+frameSize],
+										result[channelOffset:channelOffset+frameSize])
+								}
+							}
+						}
+
+						patchIndex++
+					}
+				}
+			}
+		}
+	}
+
+	return result, nil
+}
--- a/model/models/glmocr/model.go
+++ b/model/models/glmocr/model.go
@@ -0,0 +1,235 @@
+package glmocr
+
+import (
+	"bytes"
+	"errors"
+	"image"
+	"slices"
+
+	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/kvcache"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/model"
+	"github.com/ollama/ollama/model/input"
+)
+
+type Model struct {
+	model.Base
+	model.BytePairEncoding
+
+	*TextModel
+	*VisionModel     `gguf:"v"`
+	VisionDownsample *VisionDownsample `gguf:"mm.patch_merger"`
+	PatchMerger      *PatchMerger      `gguf:"mm"`
+
+	ImageProcessor
+
+	imageTokenID      int32
+	imageStartTokenID int32
+	imageEndTokenID   int32
+}
+
+var _ model.MultimodalProcessor = (*Model)(nil)
+
+func New(c fs.Config) (model.Model, error) {
+	eosTokenID := int32(c.Uint("tokenizer.ggml.eos_token_id"))
+	eosTokenIDs := c.Ints("tokenizer.ggml.eos_token_ids")
+	allEOS := append([]int32{eosTokenID}, eosTokenIDs...)
+
+	m := &Model{
+		BytePairEncoding: model.NewBytePairEncoding(
+			&model.Vocabulary{
+				Values: c.Strings("tokenizer.ggml.tokens"),
+				Types:  c.Ints("tokenizer.ggml.token_type"),
+				Merges: c.Strings("tokenizer.ggml.merges"),
+				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
+				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
+				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
+				EOS:    allEOS,
+			},
+			`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
+		),
+		TextModel:         newTextModel(c),
+		VisionModel:       newVisionModel(c),
+		ImageProcessor:    newImageProcessor(c),
+		imageTokenID:      int32(c.Uint("image_token_id", 59280)),
+		imageStartTokenID: int32(c.Uint("image_start_token_id", 59256)),
+		imageEndTokenID:   int32(c.Uint("image_end_token_id", 59257)),
+	}
+
+	m.Cache = kvcache.NewCausalCache(m.TextModel.Shift)
+
+	return m, nil
+}
+
+func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input.Multimodal, error) {
+	if len(m.VisionModel.Blocks) == 0 {
+		return nil, model.ErrNoVisionModel
+	}
+
+	img, _, err := image.Decode(bytes.NewReader(multimodalData))
+	if err != nil {
+		return nil, err
+	}
+
+	f32s, grid, err := m.ImageProcessor.ProcessImage(img)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create pixel values tensor from flattened patches
+	// Shape: [patchDim, numPatches]
+	patchDim := m.VisionModel.numChannels * m.temporalPatchSize * m.patchSize * m.patchSize
+	numPatches := grid.Temporal * grid.Height * grid.Width
+	pixelValues := ctx.Input().FromFloats(f32s, patchDim, numPatches)
+
+	// Forward through vision encoder
+	visionOutputs := m.VisionModel.Forward(ctx, pixelValues, grid)
+
+	// Forward through downsample (patch merger)
+	if m.VisionDownsample == nil || m.VisionDownsample.Weight == nil {
+		return nil, errors.New("glmocr: missing vision downsample weights")
+	}
+	visionOutputs = m.VisionDownsample.Forward(ctx, visionOutputs, grid, m.VisionModel.VisionModelOptions)
+
+	// Forward through patch merger (FC + LayerNorm + GELU + SwiGLU FFN)
+	if m.PatchMerger == nil {
+		return nil, errors.New("glmocr: missing patch merger weights")
+	}
+	visionOutputs = m.PatchMerger.Forward(ctx, visionOutputs, m.VisionModel.VisionModelOptions)
+
+	return []input.Multimodal{{Tensor: visionOutputs, Data: grid}}, nil
+}
+
+func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
+	var result []*input.Input
+
+	// Reset position cache
+	m.TextModel.positionCache = m.TextModel.positionCache[:0]
+	m.TextModel.ropeDelta = 0
+
+	pos := int32(0)
+	for _, inp := range inputs {
+		if inp.Multimodal == nil {
+			result = append(result, inp)
+			m.TextModel.positionCache = append(m.TextModel.positionCache, pos)
+			pos++
+			continue
+		}
+
+		// Get grid info for position calculation
+		grid := inp.Multimodal[0].Data.(*Grid)
+		mergedH := grid.Height / m.VisionModel.spatialMergeSize
+		mergedW := grid.Width / m.VisionModel.spatialMergeSize
+
+		// Add image start token
+		result = append(result, &input.Input{Token: m.imageStartTokenID})
+		m.TextModel.positionCache = append(m.TextModel.positionCache, pos)
+		pos++
+
+		// Add image tokens with multimodal data
+		// All image tokens share the same base position for temporal dimension
+		tokensPerGrid := inp.Multimodal[0].Tensor.Dim(1)
+		basePos := pos
+		sameBatch := tokensPerGrid - 1
+		if sameBatch < 0 {
+			sameBatch = 0
+		}
+		result = append(result, &input.Input{
+			Token:          m.imageTokenID,
+			Multimodal:     inp.Multimodal,
+			MultimodalHash: inp.MultimodalHash,
+			SameBatch:      sameBatch,
+		})
+		m.TextModel.positionCache = append(m.TextModel.positionCache, basePos)
+
+		// Add placeholder tokens for remaining positions
+		// All image tokens use the same base position (temporal stays constant)
+		for range tokensPerGrid - 1 {
+			result = append(result, &input.Input{Token: m.imageTokenID})
+			m.TextModel.positionCache = append(m.TextModel.positionCache, basePos)
+		}
+
+		// Advance position by max(mergedH, mergedW) after image tokens
+		pos = basePos + int32(max(mergedH, mergedW))
+
+		// Add image end token
+		result = append(result, &input.Input{Token: m.imageEndTokenID})
+		m.TextModel.positionCache = append(m.TextModel.positionCache, pos)
+		pos++
+	}
+
+	// Compute rope delta for continuation after the prefill segment:
+	// delta = (max_position_id + 1) - sequence_length
+	if len(m.TextModel.positionCache) > 0 {
+		last := m.TextModel.positionCache[len(m.TextModel.positionCache)-1]
+		m.TextModel.ropeDelta = last + 1 - int32(len(m.TextModel.positionCache))
+	}
+
+	return result, nil
+}
+
+func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
+	// Initial token embedding
+	hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs).Duplicate(ctx)
+	ctx.Forward(hiddenStates)
+
+	// Build position slices for M-RoPE
+	positionSlice := func() [][]int32 {
+		s := [][]int32{
+			make([]int32, len(batch.Positions)), // temporal
+			make([]int32, len(batch.Positions)), // height
+			make([]int32, len(batch.Positions)), // width
+			make([]int32, len(batch.Positions)), // unused (zeros)
+		}
+		for i, position := range batch.Positions {
+			// Translate through position cache or continue sequence
+			if position < int32(len(m.TextModel.positionCache)) {
+				position = m.TextModel.positionCache[position]
+			} else if len(m.TextModel.positionCache) > 0 {
+				// Continue sequence after cached positions using ropeDelta
+				position = position + m.TextModel.ropeDelta
+			}
+
+			s[0][i] = position
+			s[1][i] = position
+			s[2][i] = position
+		}
+		return s
+	}()
+
+	// Inject vision embeddings and adjust positions for image tokens
+	for _, mi := range batch.Multimodal {
+		img := mi.Multimodal[0].Tensor
+		ctx.Forward(img.Copy(ctx, hiddenStates.View(ctx, mi.Index*hiddenStates.Stride(1), img.Dim(0)*img.Dim(1))))
+
+		if grid, ok := mi.Multimodal[0].Data.(*Grid); ok {
+			w := grid.Width / m.VisionModel.spatialMergeSize
+			for i := range img.Dim(1) {
+				positionSlice[1][mi.Index+i] += int32(i / w)
+				positionSlice[2][mi.Index+i] += int32(i % w)
+			}
+		}
+	}
+
+	positions := ctx.Input().FromInts(slices.Concat(positionSlice...), len(positionSlice[0])*len(positionSlice))
+
+	// Process through transformer layers
+	for i, layer := range m.TextModel.Layers {
+		m.Cache.SetLayer(i)
+
+		var lastLayerOutputs ml.Tensor
+		if i == len(m.TextModel.Layers)-1 {
+			lastLayerOutputs = batch.Outputs
+		}
+
+		hiddenStates = layer.Forward(ctx, hiddenStates, positions, lastLayerOutputs, m.Cache, m.TextModel.TextModelOptions)
+	}
+
+	hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.TextModel.eps)
+	return m.Output.Forward(ctx, hiddenStates), nil
+}
+
+func init() {
+	model.Register("glmocr", New)
+}
--- a/model/models/glmocr/model_text.go
+++ b/model/models/glmocr/model_text.go
@@ -0,0 +1,180 @@
+package glmocr
+
+import (
+	"math"
+
+	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/kvcache"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/ml/nn"
+	"github.com/ollama/ollama/ml/nn/rope"
+)
+
+type TextModelOptions struct {
+	hiddenSize       int
+	numHeads         int
+	numKVHeads       int
+	headDim          int
+	rotaryDim        int
+	intermediateSize int
+	eps              float32
+	ropeBase         float32
+	mropeSections    []int
+}
+
+func (o *TextModelOptions) applyMRoPE(ctx ml.Context, states, positions ml.Tensor) ml.Tensor {
+	// GLM4 uses standard M-RoPE (not interleaved like Qwen3VL)
+	// With 4 sections for [temporal, height, width, unused]
+	return nn.RoPE(ctx, states, positions, o.rotaryDim, o.ropeBase, 1.0, rope.WithMRoPE(o.mropeSections))
+}
+
+type TextSelfAttention struct {
+	Query  *nn.Linear `gguf:"attn_q"`
+	Key    *nn.Linear `gguf:"attn_k"`
+	Value  *nn.Linear `gguf:"attn_v"`
+	Output *nn.Linear `gguf:"attn_out"`
+}
+
+func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache kvcache.Cache, opts *TextModelOptions) ml.Tensor {
+	batchSize := hiddenStates.Dim(1)
+
+	// Separate Q, K, V projections
+	q := sa.Query.Forward(ctx, hiddenStates)
+	k := sa.Key.Forward(ctx, hiddenStates)
+	v := sa.Value.Forward(ctx, hiddenStates)
+
+	// Reshape for GQA
+	q = q.Reshape(ctx, opts.headDim, opts.numHeads, batchSize)
+	k = k.Reshape(ctx, opts.headDim, opts.numKVHeads, batchSize)
+	v = v.Reshape(ctx, opts.headDim, opts.numKVHeads, batchSize)
+
+	// Apply M-RoPE (multi-resolution rotary position embeddings)
+	q = opts.applyMRoPE(ctx, q, positions)
+	k = opts.applyMRoPE(ctx, k, positions)
+
+	// Scaled dot-product attention with KV cache
+	scaleFactor := 1.0 / math.Sqrt(float64(opts.headDim))
+	kqv := nn.Attention(ctx, q, k, v, scaleFactor, cache)
+	// Reshape attention output: [headDim, numHeads, batchSize] -> [numHeads*headDim, batchSize]
+	// Note: numHeads * headDim = 16 * 128 = 2048, which is the attention hidden size
+	kqv = kqv.Reshape(ctx, opts.numHeads*opts.headDim, batchSize)
+
+	return sa.Output.Forward(ctx, kqv)
+}
+
+type TextMLP struct {
+	Gate *nn.Linear `gguf:"ffn_gate"`
+	Up   *nn.Linear `gguf:"ffn_up"`
+	Down *nn.Linear `gguf:"ffn_down"`
+}
+
+func (mlp *TextMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *TextModelOptions) ml.Tensor {
+	// SwiGLU: down(silu(gate(x)) * up(x))
+	gate := mlp.Gate.Forward(ctx, hiddenStates).SILU(ctx, mlp.Up.Forward(ctx, hiddenStates))
+	return mlp.Down.Forward(ctx, gate)
+}
+
+type TextDecoderLayer struct {
+	// Input layernorm (before attention)
+	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
+	SelfAttention *TextSelfAttention
+	// Post self-attention layernorm (after attention, before residual add)
+	PostAttnNorm *nn.RMSNorm `gguf:"post_attn_norm"`
+
+	// FFN input layernorm (after first residual, before MLP)
+	FFNNorm *nn.RMSNorm `gguf:"ffn_norm"`
+	MLP     *TextMLP
+	// Post MLP layernorm (after MLP, before residual add)
+	PostFFNNorm *nn.RMSNorm `gguf:"post_ffn_norm"`
+}
+
+func (l *TextDecoderLayer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tensor, cache kvcache.Cache, opts *TextModelOptions) ml.Tensor {
+	// Attention block
+	residual := hiddenStates
+	hiddenStates = l.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = l.SelfAttention.Forward(ctx, hiddenStates, positions, cache, opts)
+	hiddenStates = l.PostAttnNorm.Forward(ctx, hiddenStates, opts.eps)
+
+	// Prune to output positions in final layer
+	if outputs != nil {
+		hiddenStates = hiddenStates.Rows(ctx, outputs)
+		residual = residual.Rows(ctx, outputs)
+	}
+
+	hiddenStates = hiddenStates.Add(ctx, residual)
+
+	// MLP block
+	residual = hiddenStates
+	hiddenStates = l.FFNNorm.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = l.MLP.Forward(ctx, hiddenStates, opts)
+	hiddenStates = l.PostFFNNorm.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = hiddenStates.Add(ctx, residual)
+
+	return hiddenStates
+}
+
+type TextModel struct {
+	TokenEmbedding *nn.Embedding      `gguf:"token_embd"`
+	Layers         []TextDecoderLayer `gguf:"blk"`
+	OutputNorm     *nn.RMSNorm        `gguf:"output_norm"`
+	Output         *nn.Linear         `gguf:"output,alt:token_embd"`
+
+	*TextModelOptions
+
+	// positionCache stores the M-RoPE position for each token in the sequence.
+	// This is needed because image tokens share the same base position but have
+	// different height/width offsets, and the end token position depends on the
+	// image grid dimensions.
+	positionCache []int32
+	ropeDelta     int32
+}
+
+func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
+	// Clear position cache when KV cache shifts
+	m.positionCache = nil
+	m.ropeDelta = 0
+	return m.applyMRoPE(ctx, key, shift), nil
+}
+
+func newTextModel(c fs.Config) *TextModel {
+	hiddenSize := int(c.Uint("embedding_length", 1536))
+	numHeads := int(c.Uint("attention.head_count", 16))
+	numKVHeads := int(c.Uint("attention.head_count_kv", 8))
+	intermediateSize := int(c.Uint("feed_forward_length", 4608))
+	eps := c.Float("attention.layer_norm_rms_epsilon", 1e-5)
+	ropeBase := c.Float("rope.freq_base", 10000)
+
+	headDim := int(c.Uint("attention.key_length", uint32(hiddenSize/numHeads)))
+
+	mropeSections := c.Ints("rope.mrope_section")
+	var sectionInts []int
+
+	if len(mropeSections) > 0 {
+		sectionInts = make([]int, len(mropeSections))
+		for i, section := range mropeSections {
+			sectionInts[i] = int(section)
+		}
+	} else {
+		// Default: 3 sections like GLM-OCR
+		sectionInts = []int{16, 24, 24}
+	}
+
+	// rotaryDim = headDim (128) to rotate all dimensions
+	// GGML rope_multi: sector = (dim_pair) % sum(sections), mapping each pair to its position dim
+	rotaryDim := headDim
+
+	return &TextModel{
+		Layers: make([]TextDecoderLayer, c.Uint("block_count", 16)),
+		TextModelOptions: &TextModelOptions{
+			hiddenSize:       hiddenSize,
+			numHeads:         numHeads,
+			numKVHeads:       numKVHeads,
+			headDim:          headDim,
+			rotaryDim:        rotaryDim,
+			intermediateSize: intermediateSize,
+			eps:              eps,
+			ropeBase:         ropeBase,
+			mropeSections:    sectionInts,
+		},
+	}
+}
--- a/model/models/glmocr/model_vision.go
+++ b/model/models/glmocr/model_vision.go
@@ -0,0 +1,348 @@
+package glmocr
+
+import (
+	"log/slog"
+	"math"
+	"slices"
+
+	"github.com/ollama/ollama/fs"
+	"github.com/ollama/ollama/ml"
+	"github.com/ollama/ollama/ml/nn"
+	"github.com/ollama/ollama/ml/nn/rope"
+)
+
+type Grid struct {
+	Height      int // Number of patches in height direction
+	Width       int // Number of patches in width direction
+	Temporal    int
+	ImageHeight int // Full image height in pixels
+	ImageWidth  int // Full image width in pixels
+}
+
+type VisionModelOptions struct {
+	hiddenSize        int
+	numHeads          int
+	headDim           int
+	numChannels       int
+	patchSize         int
+	temporalPatchSize int
+	imageSize         int
+	spatialMergeSize  int
+	outHiddenSize     int
+	intermediateSize  int
+	eps               float32
+}
+
+type VisionPatchEmbed struct {
+	Proj  *nn.Conv2D `gguf:"patch_embd_0"`
+	Proj1 *nn.Conv2D `gguf:"patch_embd_1"`
+	Bias  ml.Tensor  `gguf:"patch_embd.bias"`
+}
+
+func (pe *VisionPatchEmbed) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid, opts *VisionModelOptions) ml.Tensor {
+	_ = grid // patches are already in merge-block order
+
+	// pixelValues shape: [patchDim, numPatches]
+	numPatches := pixelValues.Shape()[1]
+
+	// Reshape to [patchSize*patchSize, temporalPatchSize, numChannels, numPatches]
+	pixelValues = pixelValues.Reshape(ctx, opts.patchSize*opts.patchSize, opts.temporalPatchSize, opts.numChannels, numPatches)
+	// Permute to [temporalPatchSize, patchSize*patchSize, numChannels, numPatches]
+	pixelValues = pixelValues.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
+
+	// Slice temporal frames for Conv2D (simulate Conv3D)
+	in0 := pixelValues.View(ctx, 0, 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
+	in0 = in0.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
+
+	s0, s1 := opts.patchSize, opts.patchSize
+	p0, p1 := 0, 0
+	d0, d1 := 1, 1
+	hiddenStates := pe.Proj.Forward(ctx, in0, s0, s1, p0, p1, d0, d1)
+
+	if pe.Proj1 != nil && opts.temporalPatchSize > 1 {
+		in1 := pixelValues.View(ctx, pixelValues.Stride(0), 1, pixelValues.Stride(1), pixelValues.Dim(1), pixelValues.Stride(2), pixelValues.Dim(2), pixelValues.Stride(3), pixelValues.Dim(3)).Contiguous(ctx)
+		in1 = in1.Reshape(ctx, opts.patchSize, opts.patchSize, opts.numChannels, numPatches)
+		out1 := pe.Proj1.Forward(ctx, in1, s0, s1, p0, p1, d0, d1)
+		hiddenStates = hiddenStates.Add(ctx, out1)
+	}
+
+	// Flatten to [hidden_size, num_patches]
+	hiddenStates = hiddenStates.Reshape(ctx, opts.hiddenSize, numPatches)
+
+	// Add patch bias - reshape from [hidden_size] to [hidden_size, 1] for broadcasting
+	if pe.Bias != nil {
+		hiddenStates = hiddenStates.Add(ctx, pe.Bias.Reshape(ctx, opts.hiddenSize, 1))
+	}
+
+	return hiddenStates
+}
+
+type VisionSelfAttention struct {
+	QKV    *nn.Linear  `gguf:"attn_qkv"`
+	QNorm  *nn.RMSNorm `gguf:"attn_q_norm"`
+	KNorm  *nn.RMSNorm `gguf:"attn_k_norm"`
+	Output *nn.Linear  `gguf:"attn_out"`
+}
+
+func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, opts *VisionModelOptions) ml.Tensor {
+	batchSize := hiddenStates.Dim(1)
+
+	// Combined QKV projection: [3*hidden_size, batch_size]
+	qkv := sa.QKV.Forward(ctx, hiddenStates)
+
+	// Split using ChunkSections along dim 0 (handles byte offsets correctly)
+	// ChunkSections returns views - must make contiguous before further operations
+	chunks := qkv.ChunkSections(ctx, 0, opts.hiddenSize, opts.hiddenSize, opts.hiddenSize)
+	q := chunks[0].Contiguous(ctx)
+	k := chunks[1].Contiguous(ctx)
+	v := chunks[2].Contiguous(ctx)
+
+	// Reshape for multi-head attention: [hiddenSize, N] -> [headDim, numHeads, N]
+	q = q.Reshape(ctx, opts.headDim, opts.numHeads, batchSize)
+	k = k.Reshape(ctx, opts.headDim, opts.numHeads, batchSize)
+	v = v.Reshape(ctx, opts.headDim, opts.numHeads, batchSize)
+
+	// Apply Q-norm and K-norm after head reshape
+	// Weights are [headDim]=64, tensor is [headDim, numHeads, N]
+	q = sa.QNorm.Forward(ctx, q, opts.eps)
+	k = sa.KNorm.Forward(ctx, k, opts.eps)
+
+	// Apply rotary position embeddings with vision-style 2D positions
+	// Each section of headDim/4 pairs is assigned to one position dimension
+	// Positions are [height, width, height, width] repeated for rotation
+	ropeFreqBase := float32(10000.0)
+	sections := []int{opts.headDim / 4, opts.headDim / 4, opts.headDim / 4, opts.headDim / 4}
+	q = nn.RoPE(ctx, q, positions, opts.headDim/2, ropeFreqBase, 1.0, rope.WithVision(sections))
+	k = nn.RoPE(ctx, k, positions, opts.headDim/2, ropeFreqBase, 1.0, rope.WithVision(sections))
+
+	// Scale factor for scaled dot-product attention
+	scale := 1.0 / math.Sqrt(float64(opts.headDim))
+
+	// Try flash attention first (ScaledDotProductAttention), fall back to manual
+	if sdpa, ok := q.(ml.ScaledDotProductAttention); ok {
+		attention := sdpa.ScaledDotProductAttention(ctx, k, v, nil, nil, nil, scale, false)
+		attention = attention.Reshape(ctx, opts.hiddenSize, batchSize)
+		return sa.Output.Forward(ctx, attention)
+	}
+
+	slog.Warn("glmocr: vision attention falling back to manual attention",
+		"batchSize", batchSize, "numHeads", opts.numHeads,
+		"hint", "set OLLAMA_FLASH_ATTENTION=1 to enable flash attention")
+
+	// Manual attention fallback
+	// q, k, v are [headDim, numHeads, batchSize] - GGML treats as 4D with implicit dim 3 = 1
+	q = q.Permute(ctx, 0, 2, 1, 3)
+	k = k.Permute(ctx, 0, 2, 1, 3)
+	v = v.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
+
+	// Attention scores
+	kq := k.MulmatFullPrec(ctx, q)
+	kq = kq.Scale(ctx, scale)
+	kq = kq.Softmax(ctx)
+
+	// Attention output: v @ kq (note: v first)
+	kqv := v.Mulmat(ctx, kq)
+	attention := kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
+	attention = attention.Reshape(ctx, opts.hiddenSize, batchSize)
+
+	return sa.Output.Forward(ctx, attention)
+}
+
+type VisionMLP struct {
+	Gate *nn.Linear `gguf:"ffn_gate"`
+	Up   *nn.Linear `gguf:"ffn_up"`
+	Down *nn.Linear `gguf:"ffn_down"`
+}
+
+func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenStates ml.Tensor) ml.Tensor {
+	// SwiGLU: down(silu(gate(x)) * up(x))
+	gate := mlp.Gate.Forward(ctx, hiddenStates).SILU(ctx, mlp.Up.Forward(ctx, hiddenStates))
+	return mlp.Down.Forward(ctx, gate)
+}
+
+type VisionBlock struct {
+	Norm1         *nn.RMSNorm `gguf:"ln1"`
+	SelfAttention *VisionSelfAttention
+	Norm2         *nn.RMSNorm `gguf:"ln2"`
+	MLP           *VisionMLP
+}
+
+func (b *VisionBlock) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, opts *VisionModelOptions) ml.Tensor {
+	// Pre-norm architecture
+	residual := hiddenStates
+	hiddenStates = b.Norm1.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = b.SelfAttention.Forward(ctx, hiddenStates, positions, opts)
+	hiddenStates = hiddenStates.Add(ctx, residual)
+
+	residual = hiddenStates
+	hiddenStates = b.Norm2.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = b.MLP.Forward(ctx, hiddenStates)
+	hiddenStates = hiddenStates.Add(ctx, residual)
+
+	return hiddenStates
+}
+
+type VisionDownsample struct {
+	*nn.Conv2D // Embedded to get mm.patch_merger.weight/bias directly
+}
+
+func (d *VisionDownsample) Forward(ctx ml.Context, hiddenStates ml.Tensor, grid *Grid, opts *VisionModelOptions) ml.Tensor {
+	// Apply spatial downsampling via Conv2D
+	// Input: [hidden_size, num_patches] where patches are in merge-block order
+
+	if d.Conv2D == nil || d.Weight == nil {
+		panic("VisionDownsample weights not loaded")
+	}
+
+	merge := opts.spatialMergeSize
+	numOutputTokens := (grid.Height / merge) * (grid.Width / merge)
+
+	// Step 1: Reshape to [hidden_size, merge, merge, num_output_tokens]
+	hiddenStates = hiddenStates.Reshape(ctx, opts.hiddenSize, merge, merge, numOutputTokens)
+
+	// Step 2: Permute to [merge, merge, hidden_size, num_output_tokens]
+	// ggml semantics: result.ne[perm[i]] = input.ne[i]
+	// So permute(2,0,1,3) on [1024,2,2,N] gives: ne[2]=1024, ne[0]=2, ne[1]=2, ne[3]=N -> [2,2,1024,N]
+	hiddenStates = hiddenStates.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
+
+	// Step 3: Apply Conv2D without bias (bias added after reshape)
+	// Note: ggml_conv_2d takes (kernel, input) - kernel must be receiver in ollama
+	s0, s1 := merge, merge
+	p0, p1 := 0, 0
+	d0, d1 := 1, 1
+	hiddenStates = d.Weight.Conv2D(ctx, hiddenStates, s0, s1, p0, p1, d0, d1)
+
+	// Step 4: Reshape to [out_hidden_size, num_output_tokens]
+	hiddenStates = hiddenStates.Reshape(ctx, opts.outHiddenSize, numOutputTokens)
+
+	// Step 5: Add bias after reshape
+	// Reshape bias from [out_hidden_size] to [out_hidden_size, 1] for proper broadcasting
+	if d.Bias != nil {
+		hiddenStates = hiddenStates.Add(ctx, d.Bias.Reshape(ctx, opts.outHiddenSize, 1))
+	}
+
+	return hiddenStates
+}
+
+type PatchMerger struct {
+	// GGUF tags align with mm.* keys used by the model
+	Proj     *nn.Linear    `gguf:"model.fc"`  // mm.model.fc.weight
+	PostLN   *nn.LayerNorm `gguf:"post_norm"` // mm.post_norm.weight/bias
+	GateProj *nn.Linear    `gguf:"gate"`      // mm.gate.weight
+	UpProj   *nn.Linear    `gguf:"up"`        // mm.up.weight
+	DownProj *nn.Linear    `gguf:"down"`      // mm.down.weight
+}
+
+func (m *PatchMerger) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *VisionModelOptions) ml.Tensor {
+	// Linear projection
+	hiddenStates = m.Proj.Forward(ctx, hiddenStates)
+
+	// Post-projection layer norm + GELU ERF
+	hiddenStates = m.PostLN.Forward(ctx, hiddenStates, opts.eps)
+	hiddenStates = hiddenStates.GELU_ERF(ctx)
+	// Force a copy to avoid in-place mutation issues with GELU_ERF
+	hiddenStates = hiddenStates.Contiguous(ctx)
+
+	// SwiGLU MLP: down(silu(gate(x)) * up(x))
+	gateOut := m.GateProj.Forward(ctx, hiddenStates)
+	upOut := m.UpProj.Forward(ctx, hiddenStates)
+	gate := gateOut.SILU(ctx, upOut)
+	return m.DownProj.Forward(ctx, gate)
+}
+
+type VisionModel struct {
+	PatchEmbed *VisionPatchEmbed
+	Blocks     []VisionBlock `gguf:"blk"`
+	PostLN     *nn.RMSNorm   `gguf:"post_ln"`
+	// Note: Downsample is applied at the model level so mm.patch_merger stays separate
+
+	*VisionModelOptions
+}
+
+func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, grid *Grid) ml.Tensor {
+	// Extract patch embeddings from flattened patches
+	hiddenStates := m.PatchEmbed.Forward(ctx, pixelValues, grid, m.VisionModelOptions)
+
+	// Create position IDs for RoPE (spatial grid)
+	// Patches are already in merge-block order from preprocessing
+	positions := m.createPositions(ctx, grid)
+
+	// Process through vision blocks
+	for _, block := range m.Blocks {
+		hiddenStates = block.Forward(ctx, hiddenStates, positions, m.VisionModelOptions)
+	}
+
+	// Post-layernorm
+	hiddenStates = m.PostLN.Forward(ctx, hiddenStates, m.eps)
+
+	// Note: Downsample is now applied separately in Model.EncodeMultimodal
+	// so mm.patch_merger remains a distinct module
+
+	return hiddenStates
+}
+
+func (m *VisionModel) createPositions(ctx ml.Context, grid *Grid) ml.Tensor {
+	// Create spatial position IDs for vision RoPE
+	// Position layout: [height, width, height, width] - 4 sections for mrope
+	// Patches are in MERGE-BLOCK order after VisionPatchEmbed interleaving
+	// This follows the GLM-OCR rot_pos_emb layout
+	numPatches := grid.Height * grid.Width
+	mergeRatio := m.spatialMergeSize
+
+	// Build position arrays in merge-block order
+	// Each merge_ratio x merge_ratio block of patches is grouped together
+	hpos := make([]int32, numPatches)
+	wpos := make([]int32, numPatches)
+	ptr := 0
+	for y := 0; y < grid.Height; y += mergeRatio {
+		for x := 0; x < grid.Width; x += mergeRatio {
+			for dy := range mergeRatio {
+				for dx := range mergeRatio {
+					hpos[ptr] = int32(y + dy)
+					wpos[ptr] = int32(x + dx)
+					ptr++
+				}
+			}
+		}
+	}
+
+	// Build position arrays for 4 sections (mrope)
+	s := [][]int32{
+		hpos,               // Section 0: height
+		wpos,               // Section 1: width
+		slices.Clone(hpos), // Section 2: height (repeated)
+		slices.Clone(wpos), // Section 3: width (repeated)
+	}
+
+	return ctx.Input().FromInts(slices.Concat(s...), numPatches*4)
+}
+
+func newVisionModel(c fs.Config) *VisionModel {
+	hiddenSize := int(c.Uint("vision.embedding_length", 1024))
+	numHeads := int(c.Uint("vision.attention.head_count", 16))
+	numChannels := int(c.Uint("vision.num_channels", 3))
+	patchSize := int(c.Uint("vision.patch_size", 14))
+	temporalPatchSize := int(c.Uint("vision.temporal_patch_size", 2))
+	imageSize := int(c.Uint("vision.image_size", 336))
+	spatialMergeSize := int(c.Uint("vision.spatial_merge_size", 2))
+	outHiddenSize := int(c.Uint("vision.out_hidden_size", 1536))
+	intermediateSize := int(c.Uint("vision.intermediate_size", 4096))
+	eps := c.Float("vision.attention.layer_norm_rms_epsilon", 1e-5)
+
+	return &VisionModel{
+		Blocks: make([]VisionBlock, c.Uint("vision.block_count", 24)),
+		VisionModelOptions: &VisionModelOptions{
+			hiddenSize:        hiddenSize,
+			numHeads:          numHeads,
+			headDim:           hiddenSize / numHeads,
+			numChannels:       numChannels,
+			patchSize:         patchSize,
+			temporalPatchSize: temporalPatchSize,
+			imageSize:         imageSize,
+			spatialMergeSize:  spatialMergeSize,
+			outHiddenSize:     outHiddenSize,
+			intermediateSize:  intermediateSize,
+			eps:               eps,
+		},
+	}
+}
--- a/model/models/models.go
+++ b/model/models/models.go
@@ -8,6 +8,7 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma3"
 	_ "github.com/ollama/ollama/model/models/gemma3n"
 	_ "github.com/ollama/ollama/model/models/glm4moelite"
+	_ "github.com/ollama/ollama/model/models/glmocr"
 	_ "github.com/ollama/ollama/model/models/gptoss"
 	_ "github.com/ollama/ollama/model/models/lfm2"
 	_ "github.com/ollama/ollama/model/models/llama"
--- a/model/parsers/glmocr.go
+++ b/model/parsers/glmocr.go
@@ -0,0 +1,19 @@
+package parsers
+
+import "github.com/ollama/ollama/api"
+
+type GlmOcrParser struct {
+	GLM47Parser
+}
+
+func (p *GlmOcrParser) HasThinkingSupport() bool {
+	return false
+}
+
+func (p *GlmOcrParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	p.tools = tools
+	if thinkValue != nil && thinkValue.Bool() {
+		p.state = glm46ParserState_CollectingThinking
+	}
+	return tools
+}
--- a/model/parsers/ministral.go
+++ b/model/parsers/ministral.go
@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
-	"unicode"

 	"github.com/ollama/ollama/api"
 )
@@ -18,34 +17,12 @@ const (
 	ministralCollectingToolArgs
 )

-// ministralEvent represents an event emitted during parsing
-type ministralEvent interface {
-	isMinistralEvent()
-}
-
-type ministralEventContent struct {
-	content string
-}
-
-type ministralEventThinking struct {
-	thinking string
-}
-
-type ministralEventToolCall struct {
-	name string
-	args string // raw JSON string
-}
-
-func (ministralEventContent) isMinistralEvent()  {}
-func (ministralEventThinking) isMinistralEvent() {}
-func (ministralEventToolCall) isMinistralEvent() {}
-
 type MinistralParser struct {
 	state              ministralParserState
 	buffer             strings.Builder
 	tools              []api.Tool
 	hasThinkingSupport bool
-	pendingToolName    string // stores tool name while collecting args
+	currentTool        *api.Tool
 }

 func (p *MinistralParser) HasToolSupport() bool {
@@ -86,251 +63,74 @@ func toolByName(tools []api.Tool, n string) (*api.Tool, error) {
 	return nil, fmt.Errorf("tool '%s' not found", n)
 }

-const (
-	ministralToolCallsTag = "[TOOL_CALLS]"
-	ministralThinkTag     = "[THINK]"
-	ministralThinkEndTag  = "[/THINK]"
-	ministralArgsTag      = "[ARGS]"
-)
-
-// eat consumes the parser's buffer, and returns a list of any unambiguous
-// events from the current parser state. The second return value indicates
-// whether to keep looping (true when state transitions, false when waiting
-// for more data).
-func (p *MinistralParser) eat() ([]ministralEvent, bool) {
-	var events []ministralEvent
-
-	switch p.state {
-	case ministralCollectingContent:
-		bufStr := p.buffer.String()
-
-		// Check for [TOOL_CALLS] tag
-		if strings.Contains(bufStr, ministralToolCallsTag) {
-			split := strings.SplitN(bufStr, ministralToolCallsTag, 2)
-			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			if len(before) > 0 {
-				events = append(events, ministralEventContent{content: before})
-			}
-			after := split[1]
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingToolName
-			return events, true
-		}
-
-		// Check for [THINK] tag
-		if strings.Contains(bufStr, ministralThinkTag) {
-			split := strings.SplitN(bufStr, ministralThinkTag, 2)
-			before := strings.TrimRightFunc(split[0], unicode.IsSpace)
-			if len(before) > 0 {
-				events = append(events, ministralEventContent{content: before})
-			}
-			after := split[1]
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingThinkingContent
-			return events, true
-		}
-
-		// Check for partial tag overlap with [TOOL_CALLS] or [THINK]
-		overlapToolCalls := overlap(bufStr, ministralToolCallsTag)
-		overlapThink := overlap(bufStr, ministralThinkTag)
-		maxOverlap := max(overlapToolCalls, overlapThink)
-
-		if maxOverlap > 0 {
-			// Withhold the potential partial tag
-			beforePartialTag := bufStr[:len(bufStr)-maxOverlap]
-			trailingWS := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingWS
-			unambiguous := bufStr[:ambiguousStart]
-			ambiguous := bufStr[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, ministralEventContent{content: unambiguous})
-			}
-			return events, false
-		}
-
-		// No tag found: emit content but withhold trailing whitespace
-		whitespaceLen := trailingWhitespaceLen(bufStr)
-		ambiguousStart := len(bufStr) - whitespaceLen
-		unambiguous := bufStr[:ambiguousStart]
-		ambiguous := bufStr[ambiguousStart:]
-		p.buffer.Reset()
-		p.buffer.WriteString(ambiguous)
-		if len(unambiguous) > 0 {
-			events = append(events, ministralEventContent{content: unambiguous})
-		}
-		return events, false
-
-	case ministralCollectingThinkingContent:
-		bufStr := p.buffer.String()
-
-		if strings.Contains(bufStr, ministralThinkEndTag) {
-			split := strings.SplitN(bufStr, ministralThinkEndTag, 2)
-			thinkingContent := split[0]
-			after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			if len(thinkingContent) > 0 {
-				events = append(events, ministralEventThinking{thinking: thinkingContent})
-			}
-			p.state = ministralCollectingContent
-			return events, true
-		}
-
-		// Check for partial overlap with [/THINK]
-		if overlapLen := overlap(bufStr, ministralThinkEndTag); overlapLen > 0 {
-			unambiguous := bufStr[:len(bufStr)-overlapLen]
-			ambiguous := bufStr[len(bufStr)-overlapLen:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, ministralEventThinking{thinking: unambiguous})
-			}
-			return events, false
-		}
-
-		// No tag found: emit all thinking content
-		p.buffer.Reset()
-		if len(bufStr) > 0 {
-			events = append(events, ministralEventThinking{thinking: bufStr})
-		}
-		return events, false
-
-	case ministralCollectingToolName:
-		bufStr := p.buffer.String()
-
-		if strings.Contains(bufStr, ministralArgsTag) {
-			split := strings.SplitN(bufStr, ministralArgsTag, 2)
-			toolName := split[0]
-			after := split[1]
-			p.pendingToolName = toolName
-			p.buffer.Reset()
-			p.buffer.WriteString(after)
-			p.state = ministralCollectingToolArgs
-			return events, true
-		}
-		// Wait for more data
-		return events, false
-
-	case ministralCollectingToolArgs:
-		bufStr := p.buffer.String()
-		jsonEnd := findJSONEnd(bufStr)
-
-		if jsonEnd != -1 {
-			jsonStr := bufStr[:jsonEnd+1]
-			remaining := bufStr[jsonEnd+1:]
-
-			events = append(events, ministralEventToolCall{
-				name: p.pendingToolName,
-				args: jsonStr,
-			})
-
-			p.pendingToolName = ""
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = ministralCollectingContent
-			return events, true
-		}
-		// Wait for more data
-		return events, false
-
-	default:
-		panic("unexpected ministral event")
-	}
-}
-
-// parseEvents loops calling eat() until it returns false
-func (p *MinistralParser) parseEvents() []ministralEvent {
-	var all []ministralEvent
-	keepLooping := true
-	for keepLooping {
-		var events []ministralEvent
-		events, keepLooping = p.eat()
-		all = append(all, events...)
-	}
-	return all
-}
-
 func (p *MinistralParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
 	p.buffer.WriteString(s)

-	events := p.parseEvents()
-
-	var contentBuilder, thinkingBuilder strings.Builder
-	var toolCalls []api.ToolCall
-
-	for _, event := range events {
-		switch e := event.(type) {
-		case ministralEventContent:
-			contentBuilder.WriteString(e.content)
-		case ministralEventThinking:
-			thinkingBuilder.WriteString(e.thinking)
-		case ministralEventToolCall:
-			// Validate tool exists
-			tool, toolErr := toolByName(p.tools, e.name)
-			if toolErr != nil {
-				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, toolErr
+	switch p.state {
+	case ministralCollectingContent:
+		if strings.Contains(p.buffer.String(), "[TOOL_CALLS]") {
+			before, _ := splitAtTag(&p.buffer, "[TOOL_CALLS]", false)
+			if before != "" {
+				return before, "", calls, nil
 			}
-			// Parse JSON arguments
+			p.state = ministralCollectingToolName
+		} else if strings.Contains(p.buffer.String(), "[THINK]") {
+			p.state = ministralCollectingThinkingContent
+			return "", "", calls, nil
+		} else {
+			p.buffer.Reset()
+			return s, "", calls, nil
+		}
+	case ministralCollectingThinkingContent:
+		if strings.Contains(p.buffer.String(), "[/THINK]") {
+			thinkingContent, after := splitAtTag(&p.buffer, "[/THINK]", true)
+			p.state = ministralCollectingContent
+			if after != "" {
+				p.buffer.Reset()
+				return after, thinkingContent, calls, nil
+			}
+			return "", thinkingContent, calls, nil
+		} else {
+			p.buffer.Reset()
+			return "", s, calls, nil
+		}
+	case ministralCollectingToolName:
+		if strings.Contains(p.buffer.String(), "[ARGS]") {
+			name, _ := splitAtTag(&p.buffer, "[ARGS]", false)
+
+			t, err := toolByName(p.tools, name)
+			if err != nil {
+				return "", "", calls, err
+			}
+			p.currentTool = t
+			p.state = ministralCollectingToolArgs
+			return "", "", calls, nil
+		}
+		return "", "", calls, nil
+	case ministralCollectingToolArgs:
+		if strings.Contains(p.buffer.String(), "}") {
+			before, _ := splitAtTag(&p.buffer, "}", false)
+			before += "}"
+
 			var args api.ToolCallFunctionArguments
-			if jsonErr := json.Unmarshal([]byte(e.args), &args); jsonErr != nil {
-				return contentBuilder.String(), thinkingBuilder.String(), toolCalls, jsonErr
+			if err := json.Unmarshal([]byte(before), &args); err != nil {
+				// todo - throw a better error
+				return "", "", calls, err
 			}
-			toolCalls = append(toolCalls, api.ToolCall{
+
+			p.state = ministralCollectingContent
+
+			call := api.ToolCall{
 				Function: api.ToolCallFunction{
-					Name:      tool.Function.Name,
+					Name:      p.currentTool.Function.Name,
 					Arguments: args,
 				},
-			})
+			}
+			calls = append(calls, call)
+			return "", "", calls, nil
 		}
+		return "", "", calls, nil
 	}

-	return contentBuilder.String(), thinkingBuilder.String(), toolCalls, nil
-}
-
-// findJSONEnd finds the index of the closing brace that completes a JSON object.
-// It properly handles nested objects, arrays, and strings (including escaped characters).
-// Returns -1 if the JSON is not yet complete.
-func findJSONEnd(s string) int {
-	depth := 0
-	inString := false
-	escaped := false
-
-	for i, r := range s {
-		if inString {
-			switch {
-			case escaped:
-				// If the previous character was a backslash, skip this character
-				escaped = false
-			case r == '\\':
-				// Mark the next character as escaped
-				escaped = true
-			case r == '"':
-				// End of string literal
-				inString = false
-			}
-			continue
-		}
-
-		switch r {
-		case '"':
-			// Start of string literal
-			inString = true
-		case '{', '[':
-			// Increase nesting level for objects and arrays
-			depth++
-		case '}', ']':
-			// Decrease nesting level
-			depth--
-			if depth == 0 {
-				// Reached the end of the root JSON structure
-				return i
-			}
-		}
-	}
-
-	return -1
+	return p.buffer.String(), thinking, calls, nil
 }
--- a/model/parsers/ministral_test.go
+++ b/model/parsers/ministral_test.go
@@ -1,545 +0,0 @@
-package parsers
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestMinistralParserStreaming(t *testing.T) {
-	type step struct {
-		input      string
-		wantEvents []ministralEvent
-	}
-
-	cases := []struct {
-		desc  string
-		tools []api.Tool
-		steps []step
-		think bool // whether to enable thinking support
-	}{
-		// Content streaming
-		{
-			desc: "simple content",
-			steps: []step{
-				{input: "Hello, how can I help you?", wantEvents: []ministralEvent{
-					ministralEventContent{content: "Hello, how can I help you?"},
-				}},
-			},
-		},
-		{
-			desc: "streaming content word by word",
-			steps: []step{
-				{input: "Hello,", wantEvents: []ministralEvent{ministralEventContent{content: "Hello,"}}},
-				{input: " how", wantEvents: []ministralEvent{ministralEventContent{content: " how"}}},
-				{input: " can I help?", wantEvents: []ministralEvent{ministralEventContent{content: " can I help?"}}},
-			},
-		},
-
-		// Simple tool calls
-		{
-			desc:  "simple tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "San Francisco"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "get_weather", args: `{"location": "San Francisco"}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with nested object",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]create_entities[ARGS]{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with deeply nested objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "update_config"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]update_config[ARGS]{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "update_config", args: `{"settings": {"user": {"profile": {"name": "John", "age": 30}}, "theme": "dark"}}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with array of objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "process_items"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]process_items[ARGS]{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "process_items", args: `{"items": [{"id": 1}, {"id": 2}, {"id": 3}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with escaped quotes in string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "search"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]search[ARGS]{"query": "say \"hello\""}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "search", args: `{"query": "say \"hello\""}`},
-				}},
-			},
-		},
-		{
-			desc:  "tool call with braces inside string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "format"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]format[ARGS]{"template": "Hello {name}!"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "format", args: `{"template": "Hello {name}!"}`},
-				}},
-			},
-		},
-		{
-			desc:  "empty JSON object",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "no_args"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]no_args[ARGS]{}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "no_args", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "JSON with newlines in string",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "write"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]write[ARGS]{"content": "line1\nline2\nline3"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "write", args: `{"content": "line1\nline2\nline3"}`},
-				}},
-			},
-		},
-		{
-			desc:  "backslash in string value",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "path"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]path[ARGS]{"dir": "C:\\Users\\test"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "path", args: `{"dir": "C:\\Users\\test"}`},
-				}},
-			},
-		},
-
-		// Content after tool call
-		{
-			desc:  "content after tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				// NOTE: It's unclear if this is valid Ministral output, but the parser
-				// currently treats text after a tool call as regular content. This test
-				// documents that behavior so we notice if it changes.
-				{input: `[TOOL_CALLS]test[ARGS]{"a": 1}some content after`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{"a": 1}`},
-					ministralEventContent{content: "some content after"},
-				}},
-			},
-		},
-
-		// Multiple tool calls
-		{
-			desc: "multiple tool calls in sequence",
-			tools: []api.Tool{
-				{Function: api.ToolFunction{Name: "get_weather"}},
-				{Function: api.ToolFunction{Name: "get_time"}},
-			},
-			steps: []step{
-				{input: `[TOOL_CALLS]get_weather[ARGS]{"location": "NYC"}[TOOL_CALLS]get_time[ARGS]{"timezone": "EST"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "get_weather", args: `{"location": "NYC"}`},
-					ministralEventToolCall{name: "get_time", args: `{"timezone": "EST"}`},
-				}},
-			},
-		},
-		{
-			desc: "multiple tool calls streamed separately",
-			tools: []api.Tool{
-				{Function: api.ToolFunction{Name: "tool_a"}},
-				{Function: api.ToolFunction{Name: "tool_b"}},
-			},
-			steps: []step{
-				{input: `[TOOL_CALLS]tool_a[ARGS]{"x": 1}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "tool_a", args: `{"x": 1}`},
-				}},
-				{input: `[TOOL_CALLS]tool_b[ARGS]{"y": 2}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "tool_b", args: `{"y": 2}`},
-				}},
-			},
-		},
-
-		// Streaming tool calls
-		{
-			desc:  "streaming tool call with nested objects",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "create_entities"}}},
-			steps: []step{
-				{input: "[TOOL_CALLS]create_entities[ARGS]", wantEvents: []ministralEvent{}},
-				{input: `{"entities": [{"entityType": "Person",`, wantEvents: []ministralEvent{}},
-				{input: ` "name": "Jack",`, wantEvents: []ministralEvent{}},
-				{input: ` "observations": ["Works`, wantEvents: []ministralEvent{}},
-				{input: ` as a baker"]}`, wantEvents: []ministralEvent{}},
-				{input: `]}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "create_entities", args: `{"entities": [{"entityType": "Person", "name": "Jack", "observations": ["Works as a baker"]}]}`},
-				}},
-			},
-		},
-		{
-			desc:  "streaming with incomplete JSON waits for completion",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "[TOOL_CALLS]test[ARGS]{", wantEvents: []ministralEvent{}},
-				{input: `"a": {`, wantEvents: []ministralEvent{}},
-				{input: `"b": 1`, wantEvents: []ministralEvent{}},
-				{input: `}`, wantEvents: []ministralEvent{}},
-				{input: `}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{"a": {"b": 1}}`},
-				}},
-			},
-		},
-
-		// Partial tag handling
-		{
-			desc: "partial tool tag fakeout",
-			steps: []step{
-				{input: "abc[TOOL", wantEvents: []ministralEvent{ministralEventContent{content: "abc"}}},
-				{input: " not a tag", wantEvents: []ministralEvent{ministralEventContent{content: "[TOOL not a tag"}}},
-			},
-		},
-		{
-			desc:  "tool call tag split across chunks",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "[TOOL_", wantEvents: []ministralEvent{}},
-				{input: "CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "content before tool call",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "get_weather"}}},
-			steps: []step{
-				{input: "hello [TOOL_CALLS]get_weather[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "hello"},
-					ministralEventToolCall{name: "get_weather", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "whitespace between content and tool call is trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "content \n [TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "tabs and newlines before tool call are trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "content\t\n\t[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc:  "non-breaking space before tool call is trimmed",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				// \u00a0 is non-breaking space, which unicode.IsSpace considers whitespace
-				{input: "content\u00a0[TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-		{
-			desc: "whitespace before THINK tag is trimmed",
-			steps: []step{
-				{input: "content \n [THINK]thinking[/THINK]after", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "after"},
-				}},
-			},
-		},
-		{
-			desc: "trailing whitespace withheld then emitted",
-			steps: []step{
-				{input: "Hello ", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
-				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: " world"}}},
-			},
-		},
-		{
-			desc: "trailing newline withheld then emitted",
-			steps: []step{
-				{input: "Hello\n", wantEvents: []ministralEvent{ministralEventContent{content: "Hello"}}},
-				{input: "world", wantEvents: []ministralEvent{ministralEventContent{content: "\nworld"}}},
-			},
-		},
-
-		// Thinking support
-		{
-			desc:  "thinking content",
-			think: true,
-			steps: []step{
-				{input: "thinking here[/THINK]", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "thinking here"},
-				}},
-				{input: "content after", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content after"},
-				}},
-			},
-		},
-		{
-			desc:  "thinking with whitespace after end tag",
-			think: true,
-			steps: []step{
-				{input: "my thoughts[/THINK]  \n  response", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "my thoughts"},
-					ministralEventContent{content: "response"},
-				}},
-			},
-		},
-		{
-			desc:  "non-breaking space after think end tag is trimmed",
-			think: true,
-			steps: []step{
-				// \u00a0 is non-breaking space
-				{input: "thinking[/THINK]\u00a0response", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "response"},
-				}},
-			},
-		},
-		{
-			desc:  "partial think end tag",
-			think: true,
-			steps: []step{
-				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
-				{input: "NK]after", wantEvents: []ministralEvent{ministralEventContent{content: "after"}}},
-			},
-		},
-		{
-			desc:  "think tag fakeout",
-			think: true,
-			steps: []step{
-				{input: "thinking[/THI", wantEvents: []ministralEvent{ministralEventThinking{thinking: "thinking"}}},
-				{input: "not end tag", wantEvents: []ministralEvent{ministralEventThinking{thinking: "[/THInot end tag"}}},
-			},
-		},
-		{
-			desc:  "thinking then tool call",
-			think: true,
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "test"}}},
-			steps: []step{
-				{input: "let me think[/THINK][TOOL_CALLS]test[ARGS]{}", wantEvents: []ministralEvent{
-					ministralEventThinking{thinking: "let me think"},
-					ministralEventToolCall{name: "test", args: `{}`},
-				}},
-			},
-		},
-
-		// Content then THINK tag transition
-		{
-			desc: "content then think tag",
-			steps: []step{
-				{input: "content[THINK]thinking[/THINK]more", wantEvents: []ministralEvent{
-					ministralEventContent{content: "content"},
-					ministralEventThinking{thinking: "thinking"},
-					ministralEventContent{content: "more"},
-				}},
-			},
-		},
-
-		// Unicode handling
-		{
-			desc: "unicode content",
-			steps: []step{
-				{input: "你好 🌍 مرحبا", wantEvents: []ministralEvent{
-					ministralEventContent{content: "你好 🌍 مرحبا"},
-				}},
-			},
-		},
-		{
-			desc:  "unicode in tool args",
-			tools: []api.Tool{{Function: api.ToolFunction{Name: "greet"}}},
-			steps: []step{
-				{input: `[TOOL_CALLS]greet[ARGS]{"message": "你好 🌍"}`, wantEvents: []ministralEvent{
-					ministralEventToolCall{name: "greet", args: `{"message": "你好 🌍"}`},
-				}},
-			},
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := MinistralParser{}
-			parser.hasThinkingSupport = tc.think
-			parser.Init(tc.tools, nil, nil)
-
-			for i, step := range tc.steps {
-				parser.buffer.WriteString(step.input)
-				gotEvents := parser.parseEvents()
-
-				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
-					// avoid deep equal on empty vs. nil slices
-					continue
-				}
-
-				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
-					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
-				}
-			}
-		})
-	}
-}
-
-func TestMinistralParser_Errors(t *testing.T) {
-	t.Run("unknown tool returns error", func(t *testing.T) {
-		p := &MinistralParser{}
-		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "known_tool"}}}, nil, nil)
-
-		_, _, _, err := p.Add(`[TOOL_CALLS]unknown_tool[ARGS]{"a": 1}`, true)
-		if err == nil {
-			t.Fatal("expected error for unknown tool")
-		}
-	})
-
-	t.Run("invalid JSON returns error", func(t *testing.T) {
-		p := &MinistralParser{}
-		p.Init([]api.Tool{{Function: api.ToolFunction{Name: "test"}}}, nil, nil)
-
-		_, _, _, err := p.Add(`[TOOL_CALLS]test[ARGS]{invalid json}`, true)
-		if err == nil {
-			t.Fatal("expected error for invalid JSON")
-		}
-	})
-}
-
-func TestFindJSONEnd(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    string
-		expected int
-	}{
-		{
-			name:     "simple object",
-			input:    `{"a": 1}`,
-			expected: 7,
-		},
-		{
-			name:     "nested object",
-			input:    `{"a": {"b": 2}}`,
-			expected: 14,
-		},
-		{
-			name:     "array inside object",
-			input:    `{"items": [1, 2, 3]}`,
-			expected: 19,
-		},
-		{
-			name:     "braces in string",
-			input:    `{"template": "Hello {name}!"}`,
-			expected: 28,
-		},
-		{
-			name:     "escaped quotes",
-			input:    `{"msg": "say \"hi\""}`,
-			expected: 20,
-		},
-		{
-			name:     "incomplete object",
-			input:    `{"a": {"b": 1}`,
-			expected: -1,
-		},
-		{
-			name:     "deeply nested",
-			input:    `{"a": {"b": {"c": {"d": 1}}}}`,
-			expected: 28,
-		},
-		{
-			name:     "object with trailing content",
-			input:    `{"a": 1} extra`,
-			expected: 7,
-		},
-		{
-			name:     "array",
-			input:    `[{"a": 1}, {"b": 2}]`,
-			expected: 19,
-		},
-		{
-			name:     "escaped backslash before quote",
-			input:    `{"path": "C:\\"}`,
-			expected: 15,
-		},
-		{
-			name:     "empty string",
-			input:    "",
-			expected: -1,
-		},
-		{
-			name:     "no opening brace",
-			input:    "hello world",
-			expected: -1,
-		},
-		{
-			name:     "only opening brace",
-			input:    "{",
-			expected: -1,
-		},
-		{
-			name:     "unclosed string",
-			input:    `{"key": "unclosed`,
-			expected: -1,
-		},
-		{
-			name:     "double escaped backslash then quote",
-			input:    `{"path": "C:\\\\"}`,
-			expected: 17,
-		},
-		{
-			name:     "unicode in key and value",
-			input:    `{"키": "값"}`,
-			expected: 13,
-		},
-		{
-			name:     "nested arrays",
-			input:    `{"matrix": [[1, 2], [3, 4]]}`,
-			expected: 27,
-		},
-		{
-			name:     "mixed nesting",
-			input:    `{"a": [{"b": {"c": [1, 2, 3]}}]}`,
-			expected: 31,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := findJSONEnd(tt.input)
-			if result != tt.expected {
-				t.Errorf("findJSONEnd(%q) = %d, want %d", tt.input, result, tt.expected)
-			}
-		})
-	}
-}
-
-func TestMinistralParser_HasToolSupport(t *testing.T) {
-	p := &MinistralParser{}
-	if !p.HasToolSupport() {
-		t.Error("expected HasToolSupport to return true")
-	}
-}
-
-func TestMinistralParser_HasThinkingSupport(t *testing.T) {
-	p := &MinistralParser{hasThinkingSupport: false}
-	if p.HasThinkingSupport() {
-		t.Error("expected HasThinkingSupport to return false")
-	}
-
-	p = &MinistralParser{hasThinkingSupport: true}
-	if !p.HasThinkingSupport() {
-		t.Error("expected HasThinkingSupport to return true")
-	}
-}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -3,7 +3,6 @@ package parsers
 import (
 	"strings"
 	"unicode"
-	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/harmony"
@@ -71,6 +70,8 @@ func ParserForName(name string) Parser {
 		return &FunctionGemmaParser{}
 	case "glm-4.7":
 		return &GLM47Parser{}
+	case "glm-ocr":
+		return &GlmOcrParser{}
 	case "lfm2":
 		return &LFM2Parser{hasThinkingSupport: false}
 	case "lfm2-thinking":
@@ -115,33 +116,3 @@ func splitAtTag(sb *strings.Builder, tag string, trimAfter bool) (string, string
 	sb.WriteString(after)
 	return before, after // return events
 }
-
-// overlap returns the longest overlap between the suffix of s and the prefix of delim
-func overlap(s, delim string) int {
-	max := min(len(delim), len(s))
-	for i := max; i > 0; i-- {
-		if strings.HasSuffix(s, delim[:i]) {
-			return i
-		}
-	}
-	return 0
-}
-
-// trailingWhitespaceLen returns the length in bytes of trailing whitespace in s
-func trailingWhitespaceLen(s string) int {
-	remaining := s
-	total := 0
-	for len(remaining) > 0 {
-		r, size := utf8.DecodeLastRuneInString(remaining)
-		// if it's an invalid utf8 rune, assume it isn't whitespace
-		if r == utf8.RuneError && size == 1 {
-			break
-		}
-		if !unicode.IsSpace(r) {
-			break
-		}
-		total += size
-		remaining = remaining[:len(remaining)-size]
-	}
-	return total
-}
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -11,6 +11,7 @@ import (
 	"strconv"
 	"strings"
 	"unicode"
+	"unicode/utf8"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/logutil"
@@ -193,6 +194,36 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
 	}
 }

+// TODO(drifkin): move this to a shared location
+// longest overlap between suffix of s and prefix of delim
+func overlap(s, delim string) int {
+	max := min(len(delim), len(s))
+	for i := max; i > 0; i-- {
+		if strings.HasSuffix(s, delim[:i]) {
+			return i
+		}
+	}
+	return 0
+}
+
+func trailingWhitespaceLen(s string) int {
+	remaining := s
+	total := 0
+	for len(remaining) > 0 {
+		r, size := utf8.DecodeLastRuneInString(remaining)
+		// if it's an invalid utf8 rune, assume it isn't whitespace
+		if r == utf8.RuneError && size == 1 {
+			break
+		}
+		if !unicode.IsSpace(r) {
+			break
+		}
+		total += size
+		remaining = remaining[:len(remaining)-size]
+	}
+	return total
+}
+
 type XMLFunctionCall struct {
 	XMLName    xml.Name       `xml:"function"`
 	Name       string         `xml:"name,attr"`
--- a/model/renderers/glmocr.go
+++ b/model/renderers/glmocr.go
@@ -0,0 +1,109 @@
+package renderers
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/ollama/ollama/api"
+)
+
+type GlmOcrRenderer struct{}
+
+func (r *GlmOcrRenderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
+	var sb strings.Builder
+
+	sb.WriteString("[gMASK]<sop>")
+
+	if len(tools) > 0 {
+		sb.WriteString("<|system|>\n")
+		sb.WriteString("# Tools\n\n")
+		sb.WriteString("You may call one or more functions to assist with the user query.\n\n")
+		sb.WriteString("You are provided with function signatures within <tools></tools> XML tags:\n")
+		sb.WriteString("<tools>\n")
+		for _, tool := range tools {
+			d, _ := json.Marshal(tool)
+			sb.WriteString(formatGLM47ToolJSON(d))
+			sb.WriteString("\n")
+		}
+		sb.WriteString("</tools>\n\n")
+		sb.WriteString("For each function call, output the function name and arguments within the following XML format:\n")
+		sb.WriteString("<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>")
+	}
+
+	enableThinking := false
+	thinkingExplicitlySet := false
+	if thinkValue != nil {
+		enableThinking = thinkValue.Bool()
+		thinkingExplicitlySet = true
+	}
+
+	for i, message := range messages {
+		switch message.Role {
+		case "user":
+			sb.WriteString("<|user|>\n")
+			sb.WriteString(message.Content)
+			if thinkingExplicitlySet && !enableThinking && !strings.HasSuffix(message.Content, "/nothink") {
+				sb.WriteString("/nothink")
+			}
+		case "assistant":
+			sb.WriteString("<|assistant|>\n")
+			if message.Thinking != "" {
+				sb.WriteString("<think>" + strings.TrimSpace(message.Thinking) + "</think>")
+			} else {
+				sb.WriteString("<think></think>")
+			}
+			if message.Content != "" {
+				sb.WriteString("\n" + strings.TrimSpace(message.Content))
+			}
+			if len(message.ToolCalls) > 0 {
+				for _, toolCall := range message.ToolCalls {
+					sb.WriteString("\n<tool_call>" + toolCall.Function.Name)
+					sb.WriteString(renderGlmOcrToolArguments(toolCall.Function.Arguments))
+					sb.WriteString("</tool_call>")
+				}
+			}
+			sb.WriteString("\n")
+		case "tool":
+			if i == 0 || messages[i-1].Role != "tool" {
+				sb.WriteString("<|observation|>")
+			}
+			sb.WriteString("\n<tool_response>\n")
+			sb.WriteString(message.Content)
+			sb.WriteString("\n</tool_response>\n")
+		case "system":
+			sb.WriteString("<|system|>\n")
+			sb.WriteString(message.Content)
+			sb.WriteString("\n")
+		}
+	}
+
+	sb.WriteString("<|assistant|>\n")
+	if thinkingExplicitlySet && !enableThinking {
+		sb.WriteString("<think></think>\n")
+	}
+
+	return sb.String(), nil
+}
+
+func renderGlmOcrToolArguments(args api.ToolCallFunctionArguments) string {
+	var sb strings.Builder
+	for key, value := range args.All() {
+		sb.WriteString("<arg_key>" + key + "</arg_key>")
+		var valueStr string
+		if str, ok := value.(string); ok {
+			valueStr = str
+		} else {
+			jsonBytes, err := json.Marshal(value)
+			if err != nil {
+				valueStr = fmt.Sprintf("%v", value)
+			} else {
+				valueStr = string(jsonBytes)
+			}
+		}
+
+		sb.WriteString("<arg_value>" + valueStr + "</arg_value>")
+	}
+
+	return sb.String()
+}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -82,6 +82,8 @@ func rendererForName(name string) Renderer {
 		return &FunctionGemmaRenderer{}
 	case "glm-4.7":
 		return &GLM47Renderer{}
+	case "glm-ocr":
+		return &GlmOcrRenderer{}
 	case "lfm2":
 		return &LFM2Renderer{IsThinking: false}
 	case "lfm2-thinking":
--- a/server/routes.go
+++ b/server/routes.go
@@ -85,7 +85,6 @@ type Server struct {
 	addr    net.Addr
 	sched   *Scheduler
 	lowVRAM bool
-	usage   *UsageTracker
 }

 func init() {
@@ -274,10 +273,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.GenerateResponse) error {
-			if resp.Done {
-				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
-			}
-
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -584,8 +579,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 					}
 					res.Context = tokens
 				}
-
-				s.usage.Record(req.Model, cr.PromptEvalCount, cr.EvalCount)
 			}

 			if builtinParser != nil {
@@ -1597,8 +1590,6 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
 	r.POST("/api/copy", s.CopyHandler)

-	r.GET("/api/usage", s.UsageHandler)
-
 	// Inference
 	r.GET("/api/ps", s.PsHandler)
 	r.POST("/api/generate", s.GenerateHandler)
@@ -1667,7 +1658,7 @@ func Serve(ln net.Listener) error {
 		}
 	}

-	s := &Server{addr: ln.Addr(), usage: NewUsageTracker()}
+	s := &Server{addr: ln.Addr()}

 	var rc *ollama.Registry
 	if useClient2 {
@@ -1884,10 +1875,6 @@ func (s *Server) SignoutHandler(c *gin.Context) {
 	c.JSON(http.StatusOK, nil)
 }

-func (s *Server) UsageHandler(c *gin.Context) {
-	c.JSON(http.StatusOK, s.usage.Stats())
-}
-
 func (s *Server) PsHandler(c *gin.Context) {
 	models := []api.ProcessModelResponse{}

@@ -2046,10 +2033,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.ChatResponse) error {
-			if resp.Done {
-				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
-			}
-
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -2270,8 +2253,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 					res.DoneReason = r.DoneReason.String()
 					res.TotalDuration = time.Since(checkpointStart)
 					res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
-
-					s.usage.Record(req.Model, r.PromptEvalCount, r.EvalCount)
 				}

 				if builtinParser != nil {
--- a/server/routes_debug_test.go
+++ b/server/routes_debug_test.go
@@ -29,7 +29,6 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -223,7 +222,6 @@ func TestChatDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_renderer_test.go
+++ b/server/routes_generate_renderer_test.go
@@ -34,7 +34,6 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -219,7 +218,6 @@ func TestGenerateWithDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -88,39 +88,19 @@ func TestGenerateChatRemote(t *testing.T) {
 		if r.Method != http.MethodPost {
 			t.Errorf("Expected POST request, got %s", r.Method)
 		}
+		if r.URL.Path != "/api/chat" {
+			t.Errorf("Expected path '/api/chat', got %s", r.URL.Path)
+		}

 		w.WriteHeader(http.StatusOK)
 		w.Header().Set("Content-Type", "application/json")
-
-		switch r.URL.Path {
-		case "/api/chat":
-			resp := api.ChatResponse{
-				Model:      "test",
-				Done:       true,
-				DoneReason: "load",
-				Metrics: api.Metrics{
-					PromptEvalCount: 10,
-					EvalCount:       20,
-				},
-			}
-			if err := json.NewEncoder(w).Encode(&resp); err != nil {
-				t.Fatal(err)
-			}
-		case "/api/generate":
-			resp := api.GenerateResponse{
-				Model:      "test",
-				Done:       true,
-				DoneReason: "stop",
-				Metrics: api.Metrics{
-					PromptEvalCount: 5,
-					EvalCount:       15,
-				},
-			}
-			if err := json.NewEncoder(w).Encode(&resp); err != nil {
-				t.Fatal(err)
-			}
-		default:
-			t.Errorf("unexpected path %s", r.URL.Path)
+		resp := api.ChatResponse{
+			Model:      "test",
+			Done:       true,
+			DoneReason: "load",
+		}
+		if err := json.NewEncoder(w).Encode(&resp); err != nil {
+			t.Fatal(err)
 		}
 	}))
 	defer rs.Close()
@@ -131,7 +111,7 @@ func TestGenerateChatRemote(t *testing.T) {
 	}

 	t.Setenv("OLLAMA_REMOTES", p.Hostname())
-	s := Server{usage: NewUsageTracker()}
+	s := Server{}
 	w := createRequest(t, s.CreateHandler, api.CreateRequest{
 		Model:      "test-cloud",
 		RemoteHost: rs.URL,
@@ -179,61 +159,6 @@ func TestGenerateChatRemote(t *testing.T) {
 			t.Errorf("expected done reason load, got %s", actual.DoneReason)
 		}
 	})
-
-	t.Run("remote chat usage tracking", func(t *testing.T) {
-		stats := s.usage.Stats()
-		found := false
-		for _, m := range stats.Usage {
-			if m.Model == "test-cloud" {
-				found = true
-				if m.Requests != 1 {
-					t.Errorf("expected 1 request, got %d", m.Requests)
-				}
-				if m.PromptTokens != 10 {
-					t.Errorf("expected 10 prompt tokens, got %d", m.PromptTokens)
-				}
-				if m.CompletionTokens != 20 {
-					t.Errorf("expected 20 completion tokens, got %d", m.CompletionTokens)
-				}
-			}
-		}
-		if !found {
-			t.Error("expected usage entry for test-cloud")
-		}
-	})
-
-	t.Run("remote generate usage tracking", func(t *testing.T) {
-		// Reset the tracker for a clean test
-		s.usage = NewUsageTracker()
-
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:  "test-cloud",
-			Prompt: "hello",
-		})
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d", w.Code)
-		}
-
-		stats := s.usage.Stats()
-		found := false
-		for _, m := range stats.Usage {
-			if m.Model == "test-cloud" {
-				found = true
-				if m.Requests != 1 {
-					t.Errorf("expected 1 request, got %d", m.Requests)
-				}
-				if m.PromptTokens != 5 {
-					t.Errorf("expected 5 prompt tokens, got %d", m.PromptTokens)
-				}
-				if m.CompletionTokens != 15 {
-					t.Errorf("expected 15 completion tokens, got %d", m.CompletionTokens)
-				}
-			}
-		}
-		if !found {
-			t.Error("expected usage entry for test-cloud")
-		}
-	})
 }

 func TestGenerateChat(t *testing.T) {
@@ -251,7 +176,6 @@ func TestGenerateChat(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -968,7 +892,6 @@ func TestGenerate(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1453,7 +1376,6 @@ func TestGenerateLogprobs(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1634,7 +1556,6 @@ func TestChatLogprobs(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1745,7 +1666,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2192,7 +2112,6 @@ func TestGenerateUnload(t *testing.T) {
 	var loadFnCalled bool

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2294,7 +2213,6 @@ func TestGenerateWithImages(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2452,7 +2370,6 @@ func TestImageGenerateStreamFalse(t *testing.T) {

 	opts := api.DefaultOptions()
 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:  make(chan *LlmRequest, 1),
 			finishedReqCh: make(chan *LlmRequest, 1),
--- a/server/routes_harmony_streaming_test.go
+++ b/server/routes_harmony_streaming_test.go
@@ -255,7 +255,6 @@ func TestChatHarmonyParserStreamingRealtime(t *testing.T) {
 			}

 			s := Server{
-				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
@@ -407,7 +406,6 @@ func TestChatHarmonyParserStreamingSimple(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -590,7 +588,6 @@ func TestChatHarmonyParserStreaming(t *testing.T) {
 			}

 			s := Server{
-				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/usage.go
+++ b/server/usage.go
@@ -1,62 +0,0 @@
-package server
-
-import (
-	"sync"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-type ModelUsage struct {
-	Requests         int64
-	PromptTokens     int64
-	CompletionTokens int64
-}
-
-type UsageTracker struct {
-	mu     sync.Mutex
-	start  time.Time
-	models map[string]*ModelUsage
-}
-
-func NewUsageTracker() *UsageTracker {
-	return &UsageTracker{
-		start:  time.Now().UTC(),
-		models: make(map[string]*ModelUsage),
-	}
-}
-
-func (u *UsageTracker) Record(model string, promptTokens, completionTokens int) {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-
-	m, ok := u.models[model]
-	if !ok {
-		m = &ModelUsage{}
-		u.models[model] = m
-	}
-
-	m.Requests++
-	m.PromptTokens += int64(promptTokens)
-	m.CompletionTokens += int64(completionTokens)
-}
-
-func (u *UsageTracker) Stats() api.UsageResponse {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-
-	byModel := make([]api.ModelUsageData, 0, len(u.models))
-	for model, usage := range u.models {
-		byModel = append(byModel, api.ModelUsageData{
-			Model:            model,
-			Requests:         usage.Requests,
-			PromptTokens:     usage.PromptTokens,
-			CompletionTokens: usage.CompletionTokens,
-		})
-	}
-
-	return api.UsageResponse{
-		Start: u.start,
-		Usage: byModel,
-	}
-}
--- a/server/usage_test.go
+++ b/server/usage_test.go
@@ -1,136 +0,0 @@
-package server
-
-import (
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"sync"
-	"testing"
-
-	"github.com/gin-gonic/gin"
-	"github.com/ollama/ollama/api"
-)
-
-func TestUsageTrackerRecord(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	tracker.Record("model-a", 10, 20)
-	tracker.Record("model-a", 5, 15)
-	tracker.Record("model-b", 100, 200)
-
-	stats := tracker.Stats()
-
-	if len(stats.Usage) != 2 {
-		t.Fatalf("expected 2 models, got %d", len(stats.Usage))
-	}
-
-	lookup := make(map[string]api.ModelUsageData)
-	for _, m := range stats.Usage {
-		lookup[m.Model] = m
-	}
-
-	a := lookup["model-a"]
-	if a.Requests != 2 {
-		t.Errorf("model-a requests: expected 2, got %d", a.Requests)
-	}
-	if a.PromptTokens != 15 {
-		t.Errorf("model-a prompt tokens: expected 15, got %d", a.PromptTokens)
-	}
-	if a.CompletionTokens != 35 {
-		t.Errorf("model-a completion tokens: expected 35, got %d", a.CompletionTokens)
-	}
-
-	b := lookup["model-b"]
-	if b.Requests != 1 {
-		t.Errorf("model-b requests: expected 1, got %d", b.Requests)
-	}
-	if b.PromptTokens != 100 {
-		t.Errorf("model-b prompt tokens: expected 100, got %d", b.PromptTokens)
-	}
-	if b.CompletionTokens != 200 {
-		t.Errorf("model-b completion tokens: expected 200, got %d", b.CompletionTokens)
-	}
-}
-
-func TestUsageTrackerConcurrent(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	var wg sync.WaitGroup
-	for range 100 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			tracker.Record("model-a", 1, 2)
-		}()
-	}
-	wg.Wait()
-
-	stats := tracker.Stats()
-	if len(stats.Usage) != 1 {
-		t.Fatalf("expected 1 model, got %d", len(stats.Usage))
-	}
-
-	m := stats.Usage[0]
-	if m.Requests != 100 {
-		t.Errorf("requests: expected 100, got %d", m.Requests)
-	}
-	if m.PromptTokens != 100 {
-		t.Errorf("prompt tokens: expected 100, got %d", m.PromptTokens)
-	}
-	if m.CompletionTokens != 200 {
-		t.Errorf("completion tokens: expected 200, got %d", m.CompletionTokens)
-	}
-}
-
-func TestUsageTrackerStart(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	stats := tracker.Stats()
-	if stats.Start.IsZero() {
-		t.Error("expected non-zero start time")
-	}
-}
-
-func TestUsageHandler(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	s := &Server{
-		usage: NewUsageTracker(),
-	}
-
-	s.usage.Record("llama3", 50, 100)
-	s.usage.Record("llama3", 25, 50)
-
-	w := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(w)
-	c.Request = httptest.NewRequest(http.MethodGet, "/api/usage", nil)
-
-	s.UsageHandler(c)
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected status 200, got %d", w.Code)
-	}
-
-	var resp api.UsageResponse
-	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
-		t.Fatalf("failed to unmarshal response: %v", err)
-	}
-
-	if len(resp.Usage) != 1 {
-		t.Fatalf("expected 1 model, got %d", len(resp.Usage))
-	}
-
-	m := resp.Usage[0]
-	if m.Model != "llama3" {
-		t.Errorf("expected model llama3, got %s", m.Model)
-	}
-	if m.Requests != 2 {
-		t.Errorf("expected 2 requests, got %d", m.Requests)
-	}
-	if m.PromptTokens != 75 {
-		t.Errorf("expected 75 prompt tokens, got %d", m.PromptTokens)
-	}
-	if m.CompletionTokens != 150 {
-		t.Errorf("expected 150 completion tokens, got %d", m.CompletionTokens)
-	}
-}
Author	SHA1	Message	Date
jmorganca	f201b7d258	lint	2026-02-02 00:26:43 -08:00
jmorganca	6f5b814b86	lint	2026-02-02 00:07:52 -08:00
jmorganca	79c00a1b16	lint	2026-02-01 22:54:12 -08:00
jmorganca	9eded5fddb	glm working	2026-02-01 22:24:32 -08:00
jmorganca	2626ec7772	glm wip	2026-01-26 14:07:21 -08:00
jmorganca	f408e0ff5e	glm wip	2026-01-26 13:25:56 -08:00