cmd: fix opencode config (#13894 )

cmd: add fallback for claude (#13892 )
glm4moelite: fix attention scale calculation (#13893 )
2026-01-25 07:51:01 -05:00 · 2026-01-24 18:42:56 -08:00 · 2026-01-24 18:26:01 -08:00 · 2026-01-24 17:48:09 -08:00 · 2026-01-24 16:33:54 -08:00
6 changed files with 188 additions and 14 deletions
--- a/cmd/config/claude.go
+++ b/cmd/config/claude.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"path/filepath"
+	"runtime"
 )

 // Claude implements Runner for Claude Code integration
@@ -18,12 +20,32 @@ func (c *Claude) args(model string) []string {
 	return nil
 }

+func (c *Claude) findPath() (string, error) {
+	if p, err := exec.LookPath("claude"); err == nil {
+		return p, nil
+	}
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", err
+	}
+	name := "claude"
+	if runtime.GOOS == "windows" {
+		name = "claude.exe"
+	}
+	fallback := filepath.Join(home, ".claude", "local", name)
+	if _, err := os.Stat(fallback); err != nil {
+		return "", err
+	}
+	return fallback, nil
+}
+
 func (c *Claude) Run(model string) error {
-	if _, err := exec.LookPath("claude"); err != nil {
+	claudePath, err := c.findPath()
+	if err != nil {
 		return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
 	}

-	cmd := exec.Command("claude", c.args(model)...)
+	cmd := exec.Command(claudePath, c.args(model)...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
--- a/cmd/config/claude_test.go
+++ b/cmd/config/claude_test.go
@@ -1,6 +1,9 @@
 package config

 import (
+	"os"
+	"path/filepath"
+	"runtime"
 	"slices"
 	"testing"
 )
@@ -19,6 +22,62 @@ func TestClaudeIntegration(t *testing.T) {
 	})
 }

+func TestClaudeFindPath(t *testing.T) {
+	c := &Claude{}
+
+	t.Run("finds claude in PATH", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		name := "claude"
+		if runtime.GOOS == "windows" {
+			name = "claude.exe"
+		}
+		fakeBin := filepath.Join(tmpDir, name)
+		os.WriteFile(fakeBin, []byte("#!/bin/sh\n"), 0o755)
+		t.Setenv("PATH", tmpDir)
+
+		got, err := c.findPath()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != fakeBin {
+			t.Errorf("findPath() = %q, want %q", got, fakeBin)
+		}
+	})
+
+	t.Run("falls back to ~/.claude/local/claude", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
+
+		name := "claude"
+		if runtime.GOOS == "windows" {
+			name = "claude.exe"
+		}
+		fallback := filepath.Join(tmpDir, ".claude", "local", name)
+		os.MkdirAll(filepath.Dir(fallback), 0o755)
+		os.WriteFile(fallback, []byte("#!/bin/sh\n"), 0o755)
+
+		got, err := c.findPath()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != fallback {
+			t.Errorf("findPath() = %q, want %q", got, fallback)
+		}
+	})
+
+	t.Run("returns error when neither PATH nor fallback exists", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
+
+		_, err := c.findPath()
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+	})
+}
+
 func TestClaudeArgs(t *testing.T) {
 	c := &Claude{}

--- a/cmd/config/opencode.go
+++ b/cmd/config/opencode.go
@@ -105,17 +105,26 @@ func (o *OpenCode) Edit(modelList []string) error {

 	for name, cfg := range models {
 		if cfgMap, ok := cfg.(map[string]any); ok {
-			if displayName, ok := cfgMap["name"].(string); ok {
-				if strings.HasSuffix(displayName, "[Ollama]") && !selectedSet[name] {
-					delete(models, name)
-				}
+			if isOllamaModel(cfgMap) && !selectedSet[name] {
+				delete(models, name)
 			}
 		}
 	}

 	for _, model := range modelList {
+		if existing, ok := models[model].(map[string]any); ok {
+			// migrate existing models without _launch marker
+			if isOllamaModel(existing) {
+				existing["_launch"] = true
+				if name, ok := existing["name"].(string); ok {
+					existing["name"] = strings.TrimSuffix(name, " [Ollama]")
+				}
+			}
+			continue
+		}
 		models[model] = map[string]any{
-			"name": fmt.Sprintf("%s [Ollama]", model),
+			"name":    model,
+			"_launch": true,
 		}
 	}

@@ -201,3 +210,15 @@ func (o *OpenCode) Models() []string {
 	slices.Sort(keys)
 	return keys
 }
+
+// isOllamaModel reports whether a model config entry is managed by us
+func isOllamaModel(cfg map[string]any) bool {
+	if v, ok := cfg["_launch"].(bool); ok && v {
+		return true
+	}
+	// previously used [Ollama] as a suffix for the model managed by ollama launch
+	if name, ok := cfg["name"].(string); ok {
+		return strings.HasSuffix(name, "[Ollama]")
+	}
+	return false
+}
--- a/cmd/config/opencode_test.go
+++ b/cmd/config/opencode_test.go
@@ -161,6 +161,76 @@ func TestOpenCodeEdit(t *testing.T) {
 		assertOpenCodeModelNotExists(t, configPath, "mistral")
 	})

+	t.Run("preserve user customizations on managed models", func(t *testing.T) {
+		cleanup()
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		// Add custom fields to the model entry (simulating user edits)
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		provider := cfg["provider"].(map[string]any)
+		ollama := provider["ollama"].(map[string]any)
+		models := ollama["models"].(map[string]any)
+		entry := models["llama3.2"].(map[string]any)
+		entry["_myPref"] = "custom-value"
+		entry["_myNum"] = 42
+		configData, _ := json.MarshalIndent(cfg, "", "  ")
+		os.WriteFile(configPath, configData, 0o644)
+
+		// Re-run Edit — should preserve custom fields
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		data, _ = os.ReadFile(configPath)
+		json.Unmarshal(data, &cfg)
+		provider = cfg["provider"].(map[string]any)
+		ollama = provider["ollama"].(map[string]any)
+		models = ollama["models"].(map[string]any)
+		entry = models["llama3.2"].(map[string]any)
+
+		if entry["_myPref"] != "custom-value" {
+			t.Errorf("_myPref was lost: got %v", entry["_myPref"])
+		}
+		if entry["_myNum"] != float64(42) {
+			t.Errorf("_myNum was lost: got %v", entry["_myNum"])
+		}
+		if v, ok := entry["_launch"].(bool); !ok || !v {
+			t.Errorf("_launch marker missing or false: got %v", entry["_launch"])
+		}
+	})
+
+	t.Run("migrate legacy [Ollama] suffix entries", func(t *testing.T) {
+		cleanup()
+		// Write a config with a legacy entry (has [Ollama] suffix but no _launch marker)
+		os.MkdirAll(configDir, 0o755)
+		os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"llama3.2":{"name":"llama3.2 [Ollama]"}}}}}`), 0o644)
+
+		if err := o.Edit([]string{"llama3.2"}); err != nil {
+			t.Fatal(err)
+		}
+
+		data, _ := os.ReadFile(configPath)
+		var cfg map[string]any
+		json.Unmarshal(data, &cfg)
+		provider := cfg["provider"].(map[string]any)
+		ollama := provider["ollama"].(map[string]any)
+		models := ollama["models"].(map[string]any)
+		entry := models["llama3.2"].(map[string]any)
+
+		// _launch marker should be added
+		if v, ok := entry["_launch"].(bool); !ok || !v {
+			t.Errorf("_launch marker not added during migration: got %v", entry["_launch"])
+		}
+		// [Ollama] suffix should be stripped
+		if name, ok := entry["name"].(string); !ok || name != "llama3.2" {
+			t.Errorf("name suffix not stripped: got %q", entry["name"])
+		}
+	})
+
 	t.Run("remove model preserves non-ollama models", func(t *testing.T) {
 		cleanup()
 		os.MkdirAll(configDir, 0o755)
--- a/model/models/glm4moelite/model.go
+++ b/model/models/glm4moelite/model.go
@@ -223,12 +223,7 @@ func New(c fs.Config) (model.Model, error) {

 	keyLength := int(c.Uint("attention.key_length"))
 	valueLength := int(c.Uint("attention.value_length"))
-	kvLoraRank := int(c.Uint("attention.kv_lora_rank"))
-	qkRopeHeadDim := int(c.Uint("rope.dimension_count"))
-
-	// For MLA absorption, the effective key dimension is kvLoraRank + qkRopeHeadDim
-	mlaKeyLength := kvLoraRank + qkRopeHeadDim
-	kqScale := 1.0 / math.Sqrt(float64(mlaKeyLength))
+	kqScale := 1.0 / math.Sqrt(float64(keyLength))

 	var pre []string
 	switch c.String("tokenizer.ggml.pre") {
@@ -246,7 +241,7 @@ func New(c fs.Config) (model.Model, error) {
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Ints("tokenizer.ggml.token_type"),
 				Merges: c.Strings("tokenizer.ggml.merges"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
+				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
 				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				EOS: append(
--- a/server/quantization.go
+++ b/server/quantization.go
@@ -95,6 +95,13 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
 			// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
 			newType = fsggml.TensorTypeQ8_0
 		}
+	} else if strings.Contains(name, "attn_k_b.weight") ||
+		strings.Contains(name, "attn_v_b.weight") ||
+		strings.Contains(name, "attn_kv_a_mqa.weight") ||
+		strings.Contains(name, "attn_q_a.weight") ||
+		strings.Contains(name, "attn_q_b.weight") {
+		// MLA tensors need higher precision to avoid quality degradation
+		newType = fsggml.TensorTypeQ8_0
 	} else if strings.Contains(name, "ffn_down") {
 		iLayer := qs.iFfnDown
 		n_layer := qs.nFfnDown
Author	SHA1	Message	Date
Parth Sareen	465d124183	cmd: fix opencode config (#13894 )	2026-01-24 18:42:56 -08:00
Parth Sareen	d310e56fa3	cmd: add fallback for claude (#13892 )	2026-01-24 18:26:01 -08:00
Jeffrey Morgan	a1ca428c90	glm4moelite: fix attention scale calculation (#13893 ) Use the original key dimension (qkNopeHeadDim + qkRopeHeadDim = 256) for the attention scale instead of the MLA absorbed dimension (kvLoraRank + qkRopeHeadDim = 576). MLA absorption is a mathematically equivalent reorganization of the attention computation - it should not change the effective attention scale. The scale should match training, which uses 1/sqrt(256). This improves tool calling and model looping issues.	2026-01-24 17:48:09 -08:00
Jeffrey Morgan	16750865d1	glm4moelite: quantize more tensors to q8_0 and avoid double BOS token (#13891 )	2026-01-24 16:33:54 -08:00