Compare commits

...

4 Commits

Author SHA1 Message Date
Parth Sareen
465d124183 cmd: fix opencode config (#13894) 2026-01-24 18:42:56 -08:00
Parth Sareen
d310e56fa3 cmd: add fallback for claude (#13892) 2026-01-24 18:26:01 -08:00
Jeffrey Morgan
a1ca428c90 glm4moelite: fix attention scale calculation (#13893)
Use the original key dimension (qkNopeHeadDim + qkRopeHeadDim = 256) for
the attention scale instead of the MLA absorbed dimension (kvLoraRank +
qkRopeHeadDim = 576).

MLA absorption is a mathematically equivalent reorganization of the
attention computation - it should not change the effective attention
scale. The scale should match training, which uses 1/sqrt(256).

This improves tool calling and model looping issues.
2026-01-24 17:48:09 -08:00
Jeffrey Morgan
16750865d1 glm4moelite: quantize more tensors to q8_0 and avoid double BOS token (#13891) 2026-01-24 16:33:54 -08:00
6 changed files with 188 additions and 14 deletions

View File

@@ -4,6 +4,8 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
)
// Claude implements Runner for Claude Code integration
@@ -18,12 +20,32 @@ func (c *Claude) args(model string) []string {
return nil
}
func (c *Claude) findPath() (string, error) {
if p, err := exec.LookPath("claude"); err == nil {
return p, nil
}
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
name := "claude"
if runtime.GOOS == "windows" {
name = "claude.exe"
}
fallback := filepath.Join(home, ".claude", "local", name)
if _, err := os.Stat(fallback); err != nil {
return "", err
}
return fallback, nil
}
func (c *Claude) Run(model string) error {
if _, err := exec.LookPath("claude"); err != nil {
claudePath, err := c.findPath()
if err != nil {
return fmt.Errorf("claude is not installed, install from https://code.claude.com/docs/en/quickstart")
}
cmd := exec.Command("claude", c.args(model)...)
cmd := exec.Command(claudePath, c.args(model)...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

View File

@@ -1,6 +1,9 @@
package config
import (
"os"
"path/filepath"
"runtime"
"slices"
"testing"
)
@@ -19,6 +22,62 @@ func TestClaudeIntegration(t *testing.T) {
})
}
func TestClaudeFindPath(t *testing.T) {
c := &Claude{}
t.Run("finds claude in PATH", func(t *testing.T) {
tmpDir := t.TempDir()
name := "claude"
if runtime.GOOS == "windows" {
name = "claude.exe"
}
fakeBin := filepath.Join(tmpDir, name)
os.WriteFile(fakeBin, []byte("#!/bin/sh\n"), 0o755)
t.Setenv("PATH", tmpDir)
got, err := c.findPath()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != fakeBin {
t.Errorf("findPath() = %q, want %q", got, fakeBin)
}
})
t.Run("falls back to ~/.claude/local/claude", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
name := "claude"
if runtime.GOOS == "windows" {
name = "claude.exe"
}
fallback := filepath.Join(tmpDir, ".claude", "local", name)
os.MkdirAll(filepath.Dir(fallback), 0o755)
os.WriteFile(fallback, []byte("#!/bin/sh\n"), 0o755)
got, err := c.findPath()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != fallback {
t.Errorf("findPath() = %q, want %q", got, fallback)
}
})
t.Run("returns error when neither PATH nor fallback exists", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("PATH", t.TempDir()) // empty dir, no claude binary
_, err := c.findPath()
if err == nil {
t.Fatal("expected error, got nil")
}
})
}
func TestClaudeArgs(t *testing.T) {
c := &Claude{}

View File

@@ -105,17 +105,26 @@ func (o *OpenCode) Edit(modelList []string) error {
for name, cfg := range models {
if cfgMap, ok := cfg.(map[string]any); ok {
if displayName, ok := cfgMap["name"].(string); ok {
if strings.HasSuffix(displayName, "[Ollama]") && !selectedSet[name] {
delete(models, name)
}
if isOllamaModel(cfgMap) && !selectedSet[name] {
delete(models, name)
}
}
}
for _, model := range modelList {
if existing, ok := models[model].(map[string]any); ok {
// migrate existing models without _launch marker
if isOllamaModel(existing) {
existing["_launch"] = true
if name, ok := existing["name"].(string); ok {
existing["name"] = strings.TrimSuffix(name, " [Ollama]")
}
}
continue
}
models[model] = map[string]any{
"name": fmt.Sprintf("%s [Ollama]", model),
"name": model,
"_launch": true,
}
}
@@ -201,3 +210,15 @@ func (o *OpenCode) Models() []string {
slices.Sort(keys)
return keys
}
// isOllamaModel reports whether a model config entry is managed by us
func isOllamaModel(cfg map[string]any) bool {
if v, ok := cfg["_launch"].(bool); ok && v {
return true
}
// previously used [Ollama] as a suffix for the model managed by ollama launch
if name, ok := cfg["name"].(string); ok {
return strings.HasSuffix(name, "[Ollama]")
}
return false
}

View File

@@ -161,6 +161,76 @@ func TestOpenCodeEdit(t *testing.T) {
assertOpenCodeModelNotExists(t, configPath, "mistral")
})
t.Run("preserve user customizations on managed models", func(t *testing.T) {
cleanup()
if err := o.Edit([]string{"llama3.2"}); err != nil {
t.Fatal(err)
}
// Add custom fields to the model entry (simulating user edits)
data, _ := os.ReadFile(configPath)
var cfg map[string]any
json.Unmarshal(data, &cfg)
provider := cfg["provider"].(map[string]any)
ollama := provider["ollama"].(map[string]any)
models := ollama["models"].(map[string]any)
entry := models["llama3.2"].(map[string]any)
entry["_myPref"] = "custom-value"
entry["_myNum"] = 42
configData, _ := json.MarshalIndent(cfg, "", " ")
os.WriteFile(configPath, configData, 0o644)
// Re-run Edit — should preserve custom fields
if err := o.Edit([]string{"llama3.2"}); err != nil {
t.Fatal(err)
}
data, _ = os.ReadFile(configPath)
json.Unmarshal(data, &cfg)
provider = cfg["provider"].(map[string]any)
ollama = provider["ollama"].(map[string]any)
models = ollama["models"].(map[string]any)
entry = models["llama3.2"].(map[string]any)
if entry["_myPref"] != "custom-value" {
t.Errorf("_myPref was lost: got %v", entry["_myPref"])
}
if entry["_myNum"] != float64(42) {
t.Errorf("_myNum was lost: got %v", entry["_myNum"])
}
if v, ok := entry["_launch"].(bool); !ok || !v {
t.Errorf("_launch marker missing or false: got %v", entry["_launch"])
}
})
t.Run("migrate legacy [Ollama] suffix entries", func(t *testing.T) {
cleanup()
// Write a config with a legacy entry (has [Ollama] suffix but no _launch marker)
os.MkdirAll(configDir, 0o755)
os.WriteFile(configPath, []byte(`{"provider":{"ollama":{"models":{"llama3.2":{"name":"llama3.2 [Ollama]"}}}}}`), 0o644)
if err := o.Edit([]string{"llama3.2"}); err != nil {
t.Fatal(err)
}
data, _ := os.ReadFile(configPath)
var cfg map[string]any
json.Unmarshal(data, &cfg)
provider := cfg["provider"].(map[string]any)
ollama := provider["ollama"].(map[string]any)
models := ollama["models"].(map[string]any)
entry := models["llama3.2"].(map[string]any)
// _launch marker should be added
if v, ok := entry["_launch"].(bool); !ok || !v {
t.Errorf("_launch marker not added during migration: got %v", entry["_launch"])
}
// [Ollama] suffix should be stripped
if name, ok := entry["name"].(string); !ok || name != "llama3.2" {
t.Errorf("name suffix not stripped: got %q", entry["name"])
}
})
t.Run("remove model preserves non-ollama models", func(t *testing.T) {
cleanup()
os.MkdirAll(configDir, 0o755)

View File

@@ -223,12 +223,7 @@ func New(c fs.Config) (model.Model, error) {
keyLength := int(c.Uint("attention.key_length"))
valueLength := int(c.Uint("attention.value_length"))
kvLoraRank := int(c.Uint("attention.kv_lora_rank"))
qkRopeHeadDim := int(c.Uint("rope.dimension_count"))
// For MLA absorption, the effective key dimension is kvLoraRank + qkRopeHeadDim
mlaKeyLength := kvLoraRank + qkRopeHeadDim
kqScale := 1.0 / math.Sqrt(float64(mlaKeyLength))
kqScale := 1.0 / math.Sqrt(float64(keyLength))
var pre []string
switch c.String("tokenizer.ggml.pre") {
@@ -246,7 +241,7 @@ func New(c fs.Config) (model.Model, error) {
Values: c.Strings("tokenizer.ggml.tokens"),
Types: c.Ints("tokenizer.ggml.token_type"),
Merges: c.Strings("tokenizer.ggml.merges"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
EOS: append(

View File

@@ -95,6 +95,13 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
newType = fsggml.TensorTypeQ8_0
}
} else if strings.Contains(name, "attn_k_b.weight") ||
strings.Contains(name, "attn_v_b.weight") ||
strings.Contains(name, "attn_kv_a_mqa.weight") ||
strings.Contains(name, "attn_q_a.weight") ||
strings.Contains(name, "attn_q_b.weight") {
// MLA tensors need higher precision to avoid quality degradation
newType = fsggml.TensorTypeQ8_0
} else if strings.Contains(name, "ffn_down") {
iLayer := qs.iFfnDown
n_layer := qs.nFfnDown