cmd: fix gofmt formatting in pi integration

Fixes formatting issues caught by golangci-lint.
cmd: add pi integration to ollama launch
2026-02-05 21:23:43 -05:00 · 2026-02-04 19:27:34 -08:00 · 2026-02-04 18:51:40 -08:00
15 changed files with 845 additions and 482 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -367,25 +367,14 @@ func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
 		return err
 	} else if info.RemoteHost != "" {
 		// Cloud model, no need to load/unload
-
-		isCloud := strings.HasPrefix(info.RemoteHost, "https://ollama.com")
-
-		// Check if user is signed in for ollama.com cloud models
-		if isCloud {
-			if _, err := client.Whoami(cmd.Context()); err != nil {
-				return err
-			}
-		}
-
 		if opts.ShowConnect {
 			p.StopAndClear()
-			if isCloud {
+			if strings.HasPrefix(info.RemoteHost, "https://ollama.com") {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on 'ollama.com' ⚡\n", info.RemoteModel)
 			} else {
 				fmt.Fprintf(os.Stderr, "Connecting to '%s' on '%s'\n", info.RemoteModel, info.RemoteHost)
 			}
 		}
-
 		return nil
 	}

--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -3,7 +3,6 @@ package cmd
 import (
 	"bytes"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -1660,103 +1659,3 @@ func TestRunOptions_Copy_Independence(t *testing.T) {
 		t.Error("Copy Think should not be affected by original modification")
 	}
 }
-
-func TestLoadOrUnloadModel_CloudModelAuth(t *testing.T) {
-	tests := []struct {
-		name          string
-		remoteHost    string
-		whoamiStatus  int
-		whoamiResp    any
-		expectedError string
-	}{
-		{
-			name:         "ollama.com cloud model - user signed in",
-			remoteHost:   "https://ollama.com",
-			whoamiStatus: http.StatusOK,
-			whoamiResp:   api.UserResponse{Name: "testuser"},
-		},
-		{
-			name:         "ollama.com cloud model - user not signed in",
-			remoteHost:   "https://ollama.com",
-			whoamiStatus: http.StatusUnauthorized,
-			whoamiResp: map[string]string{
-				"error":      "unauthorized",
-				"signin_url": "https://ollama.com/signin",
-			},
-			expectedError: "unauthorized",
-		},
-		{
-			name:         "non-ollama.com remote - no auth check",
-			remoteHost:   "https://other-remote.com",
-			whoamiStatus: http.StatusUnauthorized, // should not be called
-			whoamiResp:   nil,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			whoamiCalled := false
-			mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				switch r.URL.Path {
-				case "/api/show":
-					w.Header().Set("Content-Type", "application/json")
-					if err := json.NewEncoder(w).Encode(api.ShowResponse{
-						RemoteHost:  tt.remoteHost,
-						RemoteModel: "test-model",
-					}); err != nil {
-						http.Error(w, err.Error(), http.StatusInternalServerError)
-					}
-				case "/api/me":
-					whoamiCalled = true
-					w.Header().Set("Content-Type", "application/json")
-					w.WriteHeader(tt.whoamiStatus)
-					if tt.whoamiResp != nil {
-						if err := json.NewEncoder(w).Encode(tt.whoamiResp); err != nil {
-							http.Error(w, err.Error(), http.StatusInternalServerError)
-						}
-					}
-				default:
-					http.NotFound(w, r)
-				}
-			}))
-			defer mockServer.Close()
-
-			t.Setenv("OLLAMA_HOST", mockServer.URL)
-
-			cmd := &cobra.Command{}
-			cmd.SetContext(t.Context())
-
-			opts := &runOptions{
-				Model:       "test-cloud-model",
-				ShowConnect: false,
-			}
-
-			err := loadOrUnloadModel(cmd, opts)
-
-			if strings.HasPrefix(tt.remoteHost, "https://ollama.com") {
-				if !whoamiCalled {
-					t.Error("expected whoami to be called for ollama.com cloud model")
-				}
-			} else {
-				if whoamiCalled {
-					t.Error("whoami should not be called for non-ollama.com remote")
-				}
-			}
-
-			if tt.expectedError != "" {
-				if err == nil {
-					t.Errorf("expected error containing %q, got nil", tt.expectedError)
-				} else {
-					var authErr api.AuthorizationError
-					if !errors.As(err, &authErr) {
-						t.Errorf("expected AuthorizationError, got %T: %v", err, err)
-					}
-				}
-			} else {
-				if err != nil {
-					t.Errorf("expected no error, got %v", err)
-				}
-			}
-		})
-	}
-}
--- a/cmd/config/integrations.go
+++ b/cmd/config/integrations.go
@@ -48,6 +48,7 @@ var integrations = map[string]Runner{
 	"droid":    &Droid{},
 	"opencode": &OpenCode{},
 	"openclaw": &Openclaw{},
+	"pi":       &Pi{},
 }

 // recommendedModels are shown when the user has no models or as suggestions.
@@ -63,6 +64,7 @@ var recommendedModels = []selectItem{
 var integrationAliases = map[string]bool{
 	"clawdbot": true,
 	"moltbot":  true,
+	"pi":       true,
 }

 func selectIntegration() (string, error) {
--- a/cmd/config/pi.go
+++ b/cmd/config/pi.go
@@ -0,0 +1,196 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"slices"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+// Pi implements Runner and Editor for Pi (Pi Coding Agent) integration
+type Pi struct{}
+
+func (p *Pi) String() string { return "Pi" }
+
+func (p *Pi) Run(model string, args []string) error {
+	if _, err := exec.LookPath("pi"); err != nil {
+		return fmt.Errorf("pi is not installed, install with: npm install -g @mariozechner/pi-coding-agent")
+	}
+
+	// Call Edit() to ensure config is up-to-date before launch
+	models := []string{model}
+	if config, err := loadIntegration("pi"); err == nil && len(config.Models) > 0 {
+		models = config.Models
+	}
+	if err := p.Edit(models); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	cmd := exec.Command("pi", args...)
+	cmd.Stdin = os.Stdin
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func (p *Pi) Paths() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	var paths []string
+	modelsPath := filepath.Join(home, ".pi", "agent", "models.json")
+	if _, err := os.Stat(modelsPath); err == nil {
+		paths = append(paths, modelsPath)
+	}
+	settingsPath := filepath.Join(home, ".pi", "agent", "settings.json")
+	if _, err := os.Stat(settingsPath); err == nil {
+		paths = append(paths, settingsPath)
+	}
+	return paths
+}
+
+func (p *Pi) Edit(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	configPath := filepath.Join(home, ".pi", "agent", "models.json")
+	if err := os.MkdirAll(filepath.Dir(configPath), 0o755); err != nil {
+		return err
+	}
+
+	config := make(map[string]any)
+	if data, err := os.ReadFile(configPath); err == nil {
+		_ = json.Unmarshal(data, &config)
+	}
+
+	providers, ok := config["providers"].(map[string]any)
+	if !ok {
+		providers = make(map[string]any)
+	}
+
+	ollama, ok := providers["ollama"].(map[string]any)
+	if !ok {
+		ollama = map[string]any{
+			"baseUrl": envconfig.Host().String() + "/v1",
+			"api":     "openai-completions",
+			"apiKey":  "ollama",
+		}
+	}
+
+	existingModels, ok := ollama["models"].([]any)
+	if !ok {
+		existingModels = make([]any, 0)
+	}
+
+	// Build set of selected models to track which need to be added
+	selectedSet := make(map[string]bool, len(models))
+	for _, m := range models {
+		selectedSet[m] = true
+	}
+
+	// Build new models list:
+	// 1. Keep user-managed models (no _launch marker) - untouched
+	// 2. Keep ollama-managed models (_launch marker) that are still selected
+	// 3. Add new ollama-managed models
+	var newModels []any
+	for _, m := range existingModels {
+		if modelObj, ok := m.(map[string]any); ok {
+			if id, ok := modelObj["id"].(string); ok {
+				// User-managed model (no _launch marker) - always preserve
+				if !isPiOllamaModel(modelObj) {
+					newModels = append(newModels, m)
+				} else if selectedSet[id] {
+					// Ollama-managed and still selected - keep it
+					newModels = append(newModels, m)
+					selectedSet[id] = false
+				}
+			}
+		}
+	}
+
+	// Add newly selected models that weren't already in the list
+	for _, model := range models {
+		if selectedSet[model] {
+			newModels = append(newModels, map[string]any{
+				"id":      model,
+				"_launch": true,
+			})
+		}
+	}
+
+	ollama["models"] = newModels
+	providers["ollama"] = ollama
+	config["providers"] = providers
+
+	configData, err := json.MarshalIndent(config, "", "  ")
+	if err != nil {
+		return err
+	}
+	if err := writeWithBackup(configPath, configData); err != nil {
+		return err
+	}
+
+	// Update settings.json with default provider and model
+	settingsPath := filepath.Join(home, ".pi", "agent", "settings.json")
+	settings := make(map[string]any)
+	if data, err := os.ReadFile(settingsPath); err == nil {
+		_ = json.Unmarshal(data, &settings)
+	}
+
+	settings["defaultProvider"] = "ollama"
+	settings["defaultModel"] = models[0]
+
+	settingsData, err := json.MarshalIndent(settings, "", "  ")
+	if err != nil {
+		return err
+	}
+	return writeWithBackup(settingsPath, settingsData)
+}
+
+func (p *Pi) Models() []string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil
+	}
+
+	configPath := filepath.Join(home, ".pi", "agent", "models.json")
+	config, err := readJSONFile(configPath)
+	if err != nil {
+		return nil
+	}
+
+	providers, _ := config["providers"].(map[string]any)
+	ollama, _ := providers["ollama"].(map[string]any)
+	models, _ := ollama["models"].([]any)
+
+	var result []string
+	for _, m := range models {
+		if modelObj, ok := m.(map[string]any); ok {
+			if id, ok := modelObj["id"].(string); ok {
+				result = append(result, id)
+			}
+		}
+	}
+	slices.Sort(result)
+	return result
+}
+
+// isPiOllamaModel reports whether a model config entry is managed by ollama launch
+func isPiOllamaModel(cfg map[string]any) bool {
+	if v, ok := cfg["_launch"].(bool); ok && v {
+		return true
+	}
+	return false
+}
--- a/cmd/config/pi_test.go
+++ b/cmd/config/pi_test.go
@@ -0,0 +1,609 @@
+package config
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestPiIntegration(t *testing.T) {
+	pi := &Pi{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := pi.String(); got != "Pi" {
+			t.Errorf("String() = %q, want %q", got, "Pi")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = pi
+	})
+
+	t.Run("implements Editor", func(t *testing.T) {
+		var _ Editor = pi
+	})
+}
+
+func TestPiPaths(t *testing.T) {
+	pi := &Pi{}
+
+	t.Run("returns empty when no config exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		paths := pi.Paths()
+		if len(paths) != 0 {
+			t.Errorf("Paths() = %v, want empty", paths)
+		}
+	})
+
+	t.Run("returns path when config exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".pi", "agent")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		configPath := filepath.Join(configDir, "models.json")
+		if err := os.WriteFile(configPath, []byte("{}"), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		paths := pi.Paths()
+		if len(paths) != 1 || paths[0] != configPath {
+			t.Errorf("Paths() = %v, want [%s]", paths, configPath)
+		}
+	})
+}
+
+func TestPiEdit(t *testing.T) {
+	pi := &Pi{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+
+	configDir := filepath.Join(tmpDir, ".pi", "agent")
+	configPath := filepath.Join(configDir, "models.json")
+
+	cleanup := func() {
+		os.RemoveAll(configDir)
+	}
+
+	readConfig := func() map[string]any {
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		return cfg
+	}
+
+	t.Run("returns nil for empty models", func(t *testing.T) {
+		if err := pi.Edit([]string{}); err != nil {
+			t.Errorf("Edit([]) error = %v, want nil", err)
+		}
+	})
+
+	t.Run("creates config with models", func(t *testing.T) {
+		cleanup()
+
+		models := []string{"llama3.2", "qwen3:8b"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		cfg := readConfig()
+
+		providers, ok := cfg["providers"].(map[string]any)
+		if !ok {
+			t.Error("Config missing providers")
+		}
+
+		ollama, ok := providers["ollama"].(map[string]any)
+		if !ok {
+			t.Error("Providers missing ollama")
+		}
+
+		modelsArray, ok := ollama["models"].([]any)
+		if !ok || len(modelsArray) != 2 {
+			t.Errorf("Expected 2 models, got %v", modelsArray)
+		}
+
+		if ollama["baseUrl"] == nil {
+			t.Error("Missing baseUrl")
+		}
+		if ollama["api"] != "openai-completions" {
+			t.Errorf("Expected api=openai-completions, got %v", ollama["api"])
+		}
+		if ollama["apiKey"] != "ollama" {
+			t.Errorf("Expected apiKey=ollama, got %v", ollama["apiKey"])
+		}
+	})
+
+	t.Run("updates existing config preserving ollama provider settings", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		existingConfig := `{
+			"providers": {
+				"ollama": {
+					"baseUrl": "http://custom:8080/v1",
+					"api": "custom-api",
+					"apiKey": "custom-key",
+					"models": [
+						{"id": "old-model", "_launch": true}
+					]
+				}
+			}
+		}`
+		if err := os.WriteFile(configPath, []byte(existingConfig), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := []string{"new-model"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		cfg := readConfig()
+		providers := cfg["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+
+		if ollama["baseUrl"] != "http://custom:8080/v1" {
+			t.Errorf("Custom baseUrl not preserved, got %v", ollama["baseUrl"])
+		}
+		if ollama["api"] != "custom-api" {
+			t.Errorf("Custom api not preserved, got %v", ollama["api"])
+		}
+		if ollama["apiKey"] != "custom-key" {
+			t.Errorf("Custom apiKey not preserved, got %v", ollama["apiKey"])
+		}
+
+		modelsArray := ollama["models"].([]any)
+		if len(modelsArray) != 1 {
+			t.Errorf("Expected 1 model after update, got %d", len(modelsArray))
+		} else {
+			modelEntry := modelsArray[0].(map[string]any)
+			if modelEntry["id"] != "new-model" {
+				t.Errorf("Expected new-model, got %v", modelEntry["id"])
+			}
+			// Verify _launch marker is present
+			if modelEntry["_launch"] != true {
+				t.Errorf("Expected _launch marker to be true")
+			}
+		}
+	})
+
+	t.Run("replaces old models with new ones", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		// Old models must have _launch marker to be managed by us
+		existingConfig := `{
+			"providers": {
+				"ollama": {
+					"baseUrl": "http://localhost:11434/v1",
+					"api": "openai-completions",
+					"apiKey": "ollama",
+					"models": [
+						{"id": "old-model-1", "_launch": true},
+						{"id": "old-model-2", "_launch": true}
+					]
+				}
+			}
+		}`
+		if err := os.WriteFile(configPath, []byte(existingConfig), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		newModels := []string{"new-model-1", "new-model-2"}
+		if err := pi.Edit(newModels); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		cfg := readConfig()
+		providers := cfg["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+		modelsArray := ollama["models"].([]any)
+
+		if len(modelsArray) != 2 {
+			t.Errorf("Expected 2 models, got %d", len(modelsArray))
+		}
+
+		modelIDs := make(map[string]bool)
+		for _, m := range modelsArray {
+			modelObj := m.(map[string]any)
+			id := modelObj["id"].(string)
+			modelIDs[id] = true
+		}
+
+		if !modelIDs["new-model-1"] || !modelIDs["new-model-2"] {
+			t.Errorf("Expected new models, got %v", modelIDs)
+		}
+		if modelIDs["old-model-1"] || modelIDs["old-model-2"] {
+			t.Errorf("Old models should have been removed, got %v", modelIDs)
+		}
+	})
+
+	t.Run("handles partial overlap in model list", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		// Models must have _launch marker to be managed
+		existingConfig := `{
+			"providers": {
+				"ollama": {
+					"baseUrl": "http://localhost:11434/v1",
+					"api": "openai-completions",
+					"apiKey": "ollama",
+					"models": [
+						{"id": "keep-model", "_launch": true},
+						{"id": "remove-model", "_launch": true}
+					]
+				}
+			}
+		}`
+		if err := os.WriteFile(configPath, []byte(existingConfig), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		newModels := []string{"keep-model", "add-model"}
+		if err := pi.Edit(newModels); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		cfg := readConfig()
+		providers := cfg["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+		modelsArray := ollama["models"].([]any)
+
+		if len(modelsArray) != 2 {
+			t.Errorf("Expected 2 models, got %d", len(modelsArray))
+		}
+
+		modelIDs := make(map[string]bool)
+		for _, m := range modelsArray {
+			modelObj := m.(map[string]any)
+			id := modelObj["id"].(string)
+			modelIDs[id] = true
+		}
+
+		if !modelIDs["keep-model"] || !modelIDs["add-model"] {
+			t.Errorf("Expected keep-model and add-model, got %v", modelIDs)
+		}
+		if modelIDs["remove-model"] {
+			t.Errorf("remove-model should have been removed")
+		}
+	})
+
+	t.Run("handles corrupt config gracefully", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		if err := os.WriteFile(configPath, []byte("{invalid json}"), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := []string{"test-model"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() should not fail with corrupt config, got %v", err)
+		}
+
+		data, err := os.ReadFile(configPath)
+		if err != nil {
+			t.Fatalf("Failed to read config: %v", err)
+		}
+
+		var cfg map[string]any
+		if err := json.Unmarshal(data, &cfg); err != nil {
+			t.Fatalf("Config should be valid after Edit, got parse error: %v", err)
+		}
+
+		providers := cfg["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+		modelsArray := ollama["models"].([]any)
+
+		if len(modelsArray) != 1 {
+			t.Errorf("Expected 1 model, got %d", len(modelsArray))
+		}
+	})
+
+	// CRITICAL SAFETY TEST: verifies we don't stomp on user configs
+	t.Run("preserves user-managed models without _launch marker", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		// User has manually configured models in ollama provider (no _launch marker)
+		existingConfig := `{
+			"providers": {
+				"ollama": {
+					"baseUrl": "http://localhost:11434/v1",
+					"api": "openai-completions",
+					"apiKey": "ollama",
+					"models": [
+						{"id": "user-model-1"},
+						{"id": "user-model-2", "customField": "preserved"},
+						{"id": "ollama-managed", "_launch": true}
+					]
+				}
+			}
+		}`
+		if err := os.WriteFile(configPath, []byte(existingConfig), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		// Add a new ollama-managed model
+		newModels := []string{"new-ollama-model"}
+		if err := pi.Edit(newModels); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		cfg := readConfig()
+		providers := cfg["providers"].(map[string]any)
+		ollama := providers["ollama"].(map[string]any)
+		modelsArray := ollama["models"].([]any)
+
+		// Should have: new-ollama-model (managed) + 2 user models (preserved)
+		if len(modelsArray) != 3 {
+			t.Errorf("Expected 3 models (1 new managed + 2 preserved user models), got %d", len(modelsArray))
+		}
+
+		modelIDs := make(map[string]map[string]any)
+		for _, m := range modelsArray {
+			modelObj := m.(map[string]any)
+			id := modelObj["id"].(string)
+			modelIDs[id] = modelObj
+		}
+
+		// Verify new model has _launch marker
+		if m, ok := modelIDs["new-ollama-model"]; !ok {
+			t.Errorf("new-ollama-model should be present")
+		} else if m["_launch"] != true {
+			t.Errorf("new-ollama-model should have _launch marker")
+		}
+
+		// Verify user models are preserved
+		if _, ok := modelIDs["user-model-1"]; !ok {
+			t.Errorf("user-model-1 should be preserved")
+		}
+		if _, ok := modelIDs["user-model-2"]; !ok {
+			t.Errorf("user-model-2 should be preserved")
+		} else if modelIDs["user-model-2"]["customField"] != "preserved" {
+			t.Errorf("user-model-2 customField should be preserved")
+		}
+
+		// Verify old ollama-managed model is removed (not in new list)
+		if _, ok := modelIDs["ollama-managed"]; ok {
+			t.Errorf("ollama-managed should be removed (old ollama model not in new selection)")
+		}
+	})
+
+	t.Run("updates settings.json with default provider and model", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		// Create existing settings with other fields
+		settingsPath := filepath.Join(configDir, "settings.json")
+		existingSettings := `{
+			"theme": "dark",
+			"customSetting": "value",
+			"defaultProvider": "anthropic",
+			"defaultModel": "claude-3"
+		}`
+		if err := os.WriteFile(settingsPath, []byte(existingSettings), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := []string{"llama3.2"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		data, err := os.ReadFile(settingsPath)
+		if err != nil {
+			t.Fatalf("Failed to read settings: %v", err)
+		}
+
+		var settings map[string]any
+		if err := json.Unmarshal(data, &settings); err != nil {
+			t.Fatalf("Failed to parse settings: %v", err)
+		}
+
+		// Verify defaultProvider is set to ollama
+		if settings["defaultProvider"] != "ollama" {
+			t.Errorf("defaultProvider = %v, want ollama", settings["defaultProvider"])
+		}
+
+		// Verify defaultModel is set to first model
+		if settings["defaultModel"] != "llama3.2" {
+			t.Errorf("defaultModel = %v, want llama3.2", settings["defaultModel"])
+		}
+
+		// Verify other fields are preserved
+		if settings["theme"] != "dark" {
+			t.Errorf("theme = %v, want dark (preserved)", settings["theme"])
+		}
+		if settings["customSetting"] != "value" {
+			t.Errorf("customSetting = %v, want value (preserved)", settings["customSetting"])
+		}
+	})
+
+	t.Run("creates settings.json if it does not exist", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		models := []string{"qwen3:8b"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() error = %v", err)
+		}
+
+		settingsPath := filepath.Join(configDir, "settings.json")
+		data, err := os.ReadFile(settingsPath)
+		if err != nil {
+			t.Fatalf("settings.json should be created: %v", err)
+		}
+
+		var settings map[string]any
+		if err := json.Unmarshal(data, &settings); err != nil {
+			t.Fatalf("Failed to parse settings: %v", err)
+		}
+
+		if settings["defaultProvider"] != "ollama" {
+			t.Errorf("defaultProvider = %v, want ollama", settings["defaultProvider"])
+		}
+		if settings["defaultModel"] != "qwen3:8b" {
+			t.Errorf("defaultModel = %v, want qwen3:8b", settings["defaultModel"])
+		}
+	})
+
+	t.Run("handles corrupt settings.json gracefully", func(t *testing.T) {
+		cleanup()
+		os.MkdirAll(configDir, 0o755)
+
+		// Create corrupt settings
+		settingsPath := filepath.Join(configDir, "settings.json")
+		if err := os.WriteFile(settingsPath, []byte("{invalid"), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := []string{"test-model"}
+		if err := pi.Edit(models); err != nil {
+			t.Fatalf("Edit() should not fail with corrupt settings, got %v", err)
+		}
+
+		data, err := os.ReadFile(settingsPath)
+		if err != nil {
+			t.Fatalf("Failed to read settings: %v", err)
+		}
+
+		var settings map[string]any
+		if err := json.Unmarshal(data, &settings); err != nil {
+			t.Fatalf("settings.json should be valid after Edit, got parse error: %v", err)
+		}
+
+		if settings["defaultProvider"] != "ollama" {
+			t.Errorf("defaultProvider = %v, want ollama", settings["defaultProvider"])
+		}
+		if settings["defaultModel"] != "test-model" {
+			t.Errorf("defaultModel = %v, want test-model", settings["defaultModel"])
+		}
+	})
+}
+
+func TestPiModels(t *testing.T) {
+	pi := &Pi{}
+
+	t.Run("returns nil when no config exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		models := pi.Models()
+		if models != nil {
+			t.Errorf("Models() = %v, want nil", models)
+		}
+	})
+
+	t.Run("returns models from config", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".pi", "agent")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		config := `{
+			"providers": {
+				"ollama": {
+					"models": [
+						{"id": "llama3.2"},
+						{"id": "qwen3:8b"}
+					]
+				}
+			}
+		}`
+		configPath := filepath.Join(configDir, "models.json")
+		if err := os.WriteFile(configPath, []byte(config), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := pi.Models()
+		if len(models) != 2 {
+			t.Errorf("Models() returned %d models, want 2", len(models))
+		}
+		if models[0] != "llama3.2" || models[1] != "qwen3:8b" {
+			t.Errorf("Models() = %v, want [llama3.2 qwen3:8b] (sorted)", models)
+		}
+	})
+
+	t.Run("returns sorted models", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".pi", "agent")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		config := `{
+			"providers": {
+				"ollama": {
+					"models": [
+						{"id": "z-model"},
+						{"id": "a-model"},
+						{"id": "m-model"}
+					]
+				}
+			}
+		}`
+		configPath := filepath.Join(configDir, "models.json")
+		if err := os.WriteFile(configPath, []byte(config), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := pi.Models()
+		if models[0] != "a-model" || models[1] != "m-model" || models[2] != "z-model" {
+			t.Errorf("Models() = %v, want [a-model m-model z-model] (sorted)", models)
+		}
+	})
+
+	t.Run("returns nil when models array is missing", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".pi", "agent")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		config := `{
+			"providers": {
+				"ollama": {}
+			}
+		}`
+		configPath := filepath.Join(configDir, "models.json")
+		if err := os.WriteFile(configPath, []byte(config), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := pi.Models()
+		if models != nil {
+			t.Errorf("Models() = %v, want nil when models array is missing", models)
+		}
+	})
+
+	t.Run("handles corrupt config gracefully", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+
+		configDir := filepath.Join(tmpDir, ".pi", "agent")
+		if err := os.MkdirAll(configDir, 0o755); err != nil {
+			t.Fatal(err)
+		}
+		configPath := filepath.Join(configDir, "models.json")
+		if err := os.WriteFile(configPath, []byte("{invalid json}"), 0o644); err != nil {
+			t.Fatal(err)
+		}
+
+		models := pi.Models()
+		if models != nil {
+			t.Errorf("Models() = %v, want nil for corrupt config", models)
+		}
+	})
+}
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -188,8 +188,6 @@ func LogLevel() slog.Level {
 var (
 	// FlashAttention enables the experimental flash attention feature.
 	FlashAttention = BoolWithDefault("OLLAMA_FLASH_ATTENTION")
-	// DebugLogRequests logs inference requests to disk for replay/debugging.
-	DebugLogRequests = Bool("OLLAMA_DEBUG_LOG_REQUESTS")
 	// KvCacheType is the quantization type for the K/V cache.
 	KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
 	// NoHistory disables readline history.
@@ -275,27 +273,26 @@ type EnvVar struct {

 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
-		"OLLAMA_DEBUG":              {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_DEBUG_LOG_REQUESTS": {"OLLAMA_DEBUG_LOG_REQUESTS", DebugLogRequests(), "Log inference request bodies and replay curl commands to a temp directory"},
-		"OLLAMA_FLASH_ATTENTION":    {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
-		"OLLAMA_KV_CACHE_TYPE":      {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
-		"OLLAMA_GPU_OVERHEAD":       {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
-		"OLLAMA_HOST":               {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
-		"OLLAMA_KEEP_ALIVE":         {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
-		"OLLAMA_LLM_LIBRARY":        {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
-		"OLLAMA_LOAD_TIMEOUT":       {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
-		"OLLAMA_MAX_LOADED_MODELS":  {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
-		"OLLAMA_MAX_QUEUE":          {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
-		"OLLAMA_MODELS":             {"OLLAMA_MODELS", Models(), "The path to the models directory"},
-		"OLLAMA_NOHISTORY":          {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
-		"OLLAMA_NOPRUNE":            {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
-		"OLLAMA_NUM_PARALLEL":       {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
-		"OLLAMA_ORIGINS":            {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
-		"OLLAMA_SCHED_SPREAD":       {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
-		"OLLAMA_MULTIUSER_CACHE":    {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":     {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
-		"OLLAMA_NEW_ENGINE":         {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
-		"OLLAMA_REMOTES":            {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
+		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
+		"OLLAMA_KV_CACHE_TYPE":     {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
+		"OLLAMA_GPU_OVERHEAD":      {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
+		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
+		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
+		"OLLAMA_LOAD_TIMEOUT":      {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
+		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
+		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
+		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
+		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
+		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
+		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
+		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
+		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
+		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

 		// Informational
 		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -144,47 +144,3 @@ func TestUnicodeModelDir(t *testing.T) {
 	}
 	ChatTestHelper(ctx, t, req, blueSkyExpected)
 }
-
-// TestNumPredict verifies that when num_predict is set, the model generates
-// exactly that many tokens. It uses logprobs to count the actual tokens output.
-func TestNumPredict(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
-	defer cancel()
-
-	client, _, cleanup := InitServerConnection(ctx, t)
-	defer cleanup()
-
-	if err := PullIfMissing(ctx, client, "qwen3:0.6b"); err != nil {
-		t.Fatalf("failed to pull model: %v", err)
-	}
-
-	req := api.GenerateRequest{
-		Model:    "qwen3:0.6b",
-		Prompt:   "Write a long story.",
-		Stream:   &stream,
-		Logprobs: true,
-		Options: map[string]any{
-			"num_predict": 10,
-			"temperature": 0,
-			"seed":        123,
-		},
-	}
-
-	logprobCount := 0
-	var finalResponse api.GenerateResponse
-	err := client.Generate(ctx, &req, func(resp api.GenerateResponse) error {
-		logprobCount += len(resp.Logprobs)
-		if resp.Done {
-			finalResponse = resp
-		}
-		return nil
-	})
-	if err != nil {
-		t.Fatalf("generate failed: %v", err)
-	}
-
-	if logprobCount != 10 {
-		t.Errorf("expected 10 tokens (logprobs), got %d (EvalCount=%d, DoneReason=%s)",
-			logprobCount, finalResponse.EvalCount, finalResponse.DoneReason)
-	}
-}
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -175,7 +175,6 @@ type Tensor interface {
 	SILU(ctx Context, up ...Tensor) Tensor
 	RELU(ctx Context, up ...Tensor) Tensor
 	Sigmoid(ctx Context) Tensor
-	SigmoidOut(ctx Context) Tensor

 	// AlphaLimitSILU is a variant of SILU that clamps the input to the range [-limit, limit]
 	SILUAlphaLimit(ctx Context, up Tensor, alpha, limit float32) Tensor
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1468,13 +1468,6 @@ func (t *Tensor) Sigmoid(ctx ml.Context) ml.Tensor {
 	}
 }

-func (t *Tensor) SigmoidOut(ctx ml.Context) ml.Tensor {
-	return &Tensor{
-		b: t.b,
-		t: C.ggml_sigmoid(ctx.(*Context).ctx, t.t),
-	}
-}
-
 func (t *Tensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
 	switch len(shape) {
 	case 1:
--- a/model/models/qwen3next/model.go
+++ b/model/models/qwen3next/model.go
@@ -135,7 +135,7 @@ func (mlp *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options
 		// Apply shared expert gating
 		if mlp.SharedGateInp != nil {
 			sharedGateVal := mlp.SharedGateInp.Forward(ctx, hiddenStates2D)
-			sharedGateVal = sharedGateVal.SigmoidOut(ctx)
+			sharedGateVal = sharedGateVal.Sigmoid(ctx)
 			// Broadcast gate to match dimensions
 			sharedGateVal = sharedGateVal.Repeat(ctx, 0, sharedOut.Dim(0))
 			sharedOut = sharedOut.Mul(ctx, sharedGateVal)
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@@ -514,6 +514,13 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
 			continue
 		}

+		// if past the num predict limit
+		if seq.numPredict > 0 && seq.numPredicted >= seq.numPredict {
+			s.removeSequence(seqIdx, llm.DoneReasonLength)
+			nextBatch.seqs[seqIdx] = nil
+			continue
+		}
+
 		if !s.cache.enabled {
 			seq.inputs = append(seq.cache.Inputs, seq.inputs...)
 			seq.cache.Inputs = []*input.Input{}
@@ -702,6 +709,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
 			continue
 		}

+		seq.numPredicted++
 		nextToken := &input.Input{Token: 0} // placeholder we'll fill in after Compute/Floats
 		seq.inputs = []*input.Input{nextToken}
 		nextBatchTokens[i] = nextToken
@@ -737,9 +745,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
 			logutil.Trace("computeBatch: sequence replaced, discarding its results", "batchID", activeBatch.id, "seqIdx", i)
 			continue
 		}
-
 		seq.lastUpdatedAt = t
-		seq.numPredicted++
 		if seq.numPredicted == 1 {
 			seq.processingDuration = seq.lastUpdatedAt.Sub(seq.startedAt)
 			seq.startedAt = seq.lastUpdatedAt
@@ -785,13 +791,6 @@ func (s *Server) computeBatch(activeBatch batchState) {
 		}

 		seq.pendingResponses = append(seq.pendingResponses, piece)
-
-		// if past the num predict limit
-		if seq.numPredict > 0 && seq.numPredicted >= seq.numPredict {
-			s.removeSequence(i, llm.DoneReasonLength)
-			continue
-		}
-
 		sequence := strings.Join(seq.pendingResponses, "")

 		if ok, stop := common.FindStop(sequence, seq.stop); ok {
--- a/server/inference_request_log.go
+++ b/server/inference_request_log.go
@@ -1,144 +0,0 @@
-package server
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"strings"
-	"sync/atomic"
-	"time"
-
-	"github.com/gin-gonic/gin"
-
-	"github.com/ollama/ollama/envconfig"
-)
-
-type inferenceRequestLogger struct {
-	dir     string
-	counter uint64
-}
-
-func newInferenceRequestLogger() (*inferenceRequestLogger, error) {
-	dir, err := os.MkdirTemp("", "ollama-request-logs-*")
-	if err != nil {
-		return nil, err
-	}
-
-	return &inferenceRequestLogger{dir: dir}, nil
-}
-
-func (s *Server) initRequestLogging() error {
-	if !envconfig.DebugLogRequests() {
-		return nil
-	}
-
-	requestLogger, err := newInferenceRequestLogger()
-	if err != nil {
-		return fmt.Errorf("enable OLLAMA_DEBUG_LOG_REQUESTS: %w", err)
-	}
-
-	s.requestLogger = requestLogger
-	slog.Info(fmt.Sprintf("request debug logging enabled; inference request logs will be stored in %s and include request bodies and replay curl commands", requestLogger.dir))
-
-	return nil
-}
-
-func (s *Server) withInferenceRequestLogging(route string, handlers ...gin.HandlerFunc) []gin.HandlerFunc {
-	if s.requestLogger == nil {
-		return handlers
-	}
-
-	return append([]gin.HandlerFunc{s.requestLogger.middleware(route)}, handlers...)
-}
-
-func (l *inferenceRequestLogger) middleware(route string) gin.HandlerFunc {
-	return func(c *gin.Context) {
-		if c.Request == nil {
-			c.Next()
-			return
-		}
-
-		method := c.Request.Method
-		host := c.Request.Host
-		scheme := "http"
-		if c.Request.TLS != nil {
-			scheme = "https"
-		}
-		contentType := c.GetHeader("Content-Type")
-
-		var body []byte
-		if c.Request.Body != nil {
-			var err error
-			body, err = io.ReadAll(c.Request.Body)
-			c.Request.Body = io.NopCloser(bytes.NewReader(body))
-			if err != nil {
-				slog.Warn("failed to read request body for debug logging", "route", route, "error", err)
-			}
-		}
-
-		c.Next()
-		l.log(route, method, scheme, host, contentType, body)
-	}
-}
-
-func (l *inferenceRequestLogger) log(route, method, scheme, host, contentType string, body []byte) {
-	if l == nil || l.dir == "" {
-		return
-	}
-
-	if contentType == "" {
-		contentType = "application/json"
-	}
-	if host == "" || scheme == "" {
-		base := envconfig.Host()
-		if host == "" {
-			host = base.Host
-		}
-		if scheme == "" {
-			scheme = base.Scheme
-		}
-	}
-
-	routeForFilename := sanitizeRouteForFilename(route)
-	timestamp := fmt.Sprintf("%s-%06d", time.Now().UTC().Format("20060102T150405.000000000Z"), atomic.AddUint64(&l.counter, 1))
-	bodyFilename := fmt.Sprintf("%s_%s_body.json", timestamp, routeForFilename)
-	curlFilename := fmt.Sprintf("%s_%s_request.sh", timestamp, routeForFilename)
-	bodyPath := filepath.Join(l.dir, bodyFilename)
-	curlPath := filepath.Join(l.dir, curlFilename)
-
-	if err := os.WriteFile(bodyPath, body, 0o600); err != nil {
-		slog.Warn("failed to write debug request body", "route", route, "error", err)
-		return
-	}
-
-	url := fmt.Sprintf("%s://%s%s", scheme, host, route)
-	curl := fmt.Sprintf("#!/bin/sh\nSCRIPT_DIR=\"$(CDPATH= cd -- \"$(dirname -- \"$0\")\" && pwd)\"\ncurl --request %s --url %q --header %q --data-binary @\"${SCRIPT_DIR}/%s\"\n", method, url, "Content-Type: "+contentType, bodyFilename)
-	if err := os.WriteFile(curlPath, []byte(curl), 0o600); err != nil {
-		slog.Warn("failed to write debug request replay command", "route", route, "error", err)
-		return
-	}
-
-	slog.Info(fmt.Sprintf("logged to %s, replay using curl with `sh %s`", bodyPath, curlPath))
-}
-
-func sanitizeRouteForFilename(route string) string {
-	route = strings.TrimPrefix(route, "/")
-	if route == "" {
-		return "root"
-	}
-
-	var b strings.Builder
-	b.Grow(len(route))
-	for _, r := range route {
-		if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || ('0' <= r && r <= '9') {
-			b.WriteRune(r)
-		} else {
-			b.WriteByte('_')
-		}
-	}
-
-	return b.String()
-}
--- a/server/routes.go
+++ b/server/routes.go
@@ -81,7 +81,6 @@ type Server struct {
 	addr          net.Addr
 	sched         *Scheduler
 	defaultNumCtx int
-	requestLogger *inferenceRequestLogger
 }

 func init() {
@@ -1584,24 +1583,24 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {

 	// Inference
 	r.GET("/api/ps", s.PsHandler)
-	r.POST("/api/generate", s.withInferenceRequestLogging("/api/generate", s.GenerateHandler)...)
-	r.POST("/api/chat", s.withInferenceRequestLogging("/api/chat", s.ChatHandler)...)
+	r.POST("/api/generate", s.GenerateHandler)
+	r.POST("/api/chat", s.ChatHandler)
 	r.POST("/api/embed", s.EmbedHandler)
 	r.POST("/api/embeddings", s.EmbeddingsHandler)

 	// Inference (OpenAI compatibility)
-	r.POST("/v1/chat/completions", s.withInferenceRequestLogging("/v1/chat/completions", middleware.ChatMiddleware(), s.ChatHandler)...)
-	r.POST("/v1/completions", s.withInferenceRequestLogging("/v1/completions", middleware.CompletionsMiddleware(), s.GenerateHandler)...)
+	r.POST("/v1/chat/completions", middleware.ChatMiddleware(), s.ChatHandler)
+	r.POST("/v1/completions", middleware.CompletionsMiddleware(), s.GenerateHandler)
 	r.POST("/v1/embeddings", middleware.EmbeddingsMiddleware(), s.EmbedHandler)
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
-	r.POST("/v1/responses", s.withInferenceRequestLogging("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)...)
+	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
 	// OpenAI-compatible image generation endpoints
 	r.POST("/v1/images/generations", middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
 	r.POST("/v1/images/edits", middleware.ImageEditsMiddleware(), s.GenerateHandler)

 	// Inference (Anthropic compatibility)
-	r.POST("/v1/messages", s.withInferenceRequestLogging("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)...)
+	r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)

 	if rc != nil {
 		// wrap old with new
@@ -1651,9 +1650,6 @@ func Serve(ln net.Listener) error {
 	}

 	s := &Server{addr: ln.Addr()}
-	if err := s.initRequestLogging(); err != nil {
-		return err
-	}

 	var rc *ollama.Registry
 	if useClient2 {
--- a/server/routes_request_log_test.go
+++ b/server/routes_request_log_test.go
@@ -1,128 +0,0 @@
-package server
-
-import (
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/gin-gonic/gin"
-)
-
-func TestInferenceRequestLoggerMiddlewareWritesReplayArtifacts(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	logDir := t.TempDir()
-	requestLogger := &inferenceRequestLogger{dir: logDir}
-
-	const route = "/v1/chat/completions"
-	const requestBody = `{"model":"test-model","messages":[{"role":"user","content":"hello"}]}`
-
-	var bodySeenByHandler string
-
-	r := gin.New()
-	r.POST(route, requestLogger.middleware(route), func(c *gin.Context) {
-		body, err := io.ReadAll(c.Request.Body)
-		if err != nil {
-			t.Fatalf("failed to read body in handler: %v", err)
-		}
-
-		bodySeenByHandler = string(body)
-		c.Status(http.StatusOK)
-	})
-
-	req := httptest.NewRequest(http.MethodPost, route, strings.NewReader(requestBody))
-	req.Host = "127.0.0.1:11434"
-	req.Header.Set("Content-Type", "application/json")
-	w := httptest.NewRecorder()
-	r.ServeHTTP(w, req)
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected status 200, got %d", w.Code)
-	}
-
-	if bodySeenByHandler != requestBody {
-		t.Fatalf("handler body mismatch:\nexpected: %s\ngot: %s", requestBody, bodySeenByHandler)
-	}
-
-	bodyFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_body.json"))
-	if err != nil {
-		t.Fatalf("failed to glob body logs: %v", err)
-	}
-	if len(bodyFiles) != 1 {
-		t.Fatalf("expected 1 body log, got %d (%v)", len(bodyFiles), bodyFiles)
-	}
-
-	curlFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_request.sh"))
-	if err != nil {
-		t.Fatalf("failed to glob curl logs: %v", err)
-	}
-	if len(curlFiles) != 1 {
-		t.Fatalf("expected 1 curl log, got %d (%v)", len(curlFiles), curlFiles)
-	}
-
-	bodyData, err := os.ReadFile(bodyFiles[0])
-	if err != nil {
-		t.Fatalf("failed to read body log: %v", err)
-	}
-	if string(bodyData) != requestBody {
-		t.Fatalf("body log mismatch:\nexpected: %s\ngot: %s", requestBody, string(bodyData))
-	}
-
-	curlData, err := os.ReadFile(curlFiles[0])
-	if err != nil {
-		t.Fatalf("failed to read curl log: %v", err)
-	}
-
-	curlString := string(curlData)
-	if !strings.Contains(curlString, "http://127.0.0.1:11434"+route) {
-		t.Fatalf("curl log does not contain expected route URL: %s", curlString)
-	}
-
-	bodyFileName := filepath.Base(bodyFiles[0])
-	if !strings.Contains(curlString, "@\"${SCRIPT_DIR}/"+bodyFileName+"\"") {
-		t.Fatalf("curl log does not reference sibling body file: %s", curlString)
-	}
-}
-
-func TestNewInferenceRequestLoggerCreatesDirectory(t *testing.T) {
-	requestLogger, err := newInferenceRequestLogger()
-	if err != nil {
-		t.Fatalf("expected no error creating request logger: %v", err)
-	}
-	t.Cleanup(func() {
-		_ = os.RemoveAll(requestLogger.dir)
-	})
-
-	if requestLogger == nil || requestLogger.dir == "" {
-		t.Fatalf("expected request logger directory to be set")
-	}
-
-	info, err := os.Stat(requestLogger.dir)
-	if err != nil {
-		t.Fatalf("expected directory to exist: %v", err)
-	}
-	if !info.IsDir() {
-		t.Fatalf("expected %q to be a directory", requestLogger.dir)
-	}
-}
-
-func TestSanitizeRouteForFilename(t *testing.T) {
-	tests := []struct {
-		route string
-		want  string
-	}{
-		{route: "/api/generate", want: "api_generate"},
-		{route: "/v1/chat/completions", want: "v1_chat_completions"},
-		{route: "/v1/messages", want: "v1_messages"},
-	}
-
-	for _, tt := range tests {
-		if got := sanitizeRouteForFilename(tt.route); got != tt.want {
-			t.Fatalf("sanitizeRouteForFilename(%q) = %q, want %q", tt.route, got, tt.want)
-		}
-	}
-}
--- a/server/sched.go
+++ b/server/sched.go
@@ -417,9 +417,9 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, systemInfo ml.SystemInfo
 		numParallel = 1
 	}

-	// Some architectures are not safe with num_parallel > 1.
+	// `mllama`, `qwen3vl`, and `qwen3vlmoe` are snowflakes and uses an encoder cache which cannot be used with num_parallel > 1
 	// ref: https://github.com/ollama/ollama/issues/4165
-	if slices.Contains([]string{"mllama", "qwen3vl", "qwen3vlmoe", "qwen3next", "lfm2", "lfm2moe"}, req.model.Config.ModelFamily) && numParallel != 1 {
+	if slices.Contains([]string{"mllama", "qwen3vl", "qwen3vlmoe"}, req.model.Config.ModelFamily) && numParallel != 1 {
 		numParallel = 1
 		slog.Warn("model architecture does not currently support parallel requests", "architecture", req.model.Config.ModelFamily)
 	}