Compare commits

...

17 Commits

Author SHA1 Message Date
Bruce MacDonald
3cf62838ce server: do partial gguf kv read for capability check 2025-06-06 16:14:19 -07:00
Bruce MacDonald
22aed78048 Update gguf_test.go 2025-06-06 15:20:43 -07:00
Bruce MacDonald
d3cbbbfd85 more type conversions 2025-06-06 15:17:22 -07:00
Bruce MacDonald
e8d1933b99 fix lint unneeded conversions 2025-06-06 15:05:09 -07:00
Bruce MacDonald
735e80787b gguf: update test to not rely on gguf on disc 2025-06-06 14:40:23 -07:00
Michael Yang
8e3998b9dd wip: incremental gguf parser 2025-06-06 14:40:23 -07:00
Daniel Hiltgen
a8ed68bd93 launch app hidden (#10962)
When starting the app in the background, start it hidden.
2025-06-06 14:06:29 -07:00
Daniel Hiltgen
2ae65ae471 win: handle more than 2048 processes (#10997)
Fix an array out of bounds crash
2025-06-06 14:06:09 -07:00
Devon Rifkin
a3b6886b7d move thinking logic into its own package (#10990)
move thinking logic into its own package
2025-06-06 12:02:20 -07:00
Hunter Wittenborn
c6a6d7294d docs: fix typo in development.md (#10998) 2025-06-06 12:07:29 -04:00
Devon Rifkin
2cf007c9d1 Merge pull request #10987 from ollama/drifkin/export-thinking-parser
export ThinkingParser
2025-06-05 12:19:14 -07:00
Devon Rifkin
0683efa637 export ThinkingParser 2025-06-05 10:22:32 -07:00
JasonHonKL
0943001193 server: add model capabilities to the list endpoint (#10174) 2025-06-04 11:39:48 -07:00
HardCodeDev
5c42800fca readme: add SimpleOllamaUnity to community integrations (#10817) 2025-05-30 19:50:16 -07:00
Parth Sareen
65f10c2823 tools: resiliency upgrade to name and arg extraction from template (#10917) 2025-05-30 15:18:09 -07:00
Jesse Gross
aaa7818000 ggml: Export GPU UUIDs
This enables matching up devices and information reported by the backend
with system management libraries such as nvml to get accurate free
memory reporting.
2025-05-29 14:01:26 -07:00
Jesse Gross
f15ffc4320 llm: Make "POST predict" error message more informative
"POST predict" basically means that the runner has crashed, which
can have many reasons. However, many people think this is a specific
error and either report only this message or group together unrelated
bugs. This replaces it with a more friendly and helpful message.
2025-05-29 09:41:19 -07:00
29 changed files with 2249 additions and 488 deletions

View File

@@ -587,6 +587,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai)
- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c)
- [mcp-llm](https://github.com/sammcj/mcp-llm) (MCP Server to allow LLMs to call other LLMs)
- [SimpleOllamaUnity](https://github.com/HardCodeDev777/SimpleOllamaUnity) (Unity Engine extension for communicating with Ollama in a few lines of code. Also works at runtime)
- [UnityCodeLama](https://github.com/HardCodeDev777/UnityCodeLama) (Unity Edtior tool to analyze scripts via Ollama)
### Supported backends

View File

@@ -457,12 +457,13 @@ type ProcessResponse struct {
// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
Name string `json:"name"`
Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Capabilities []model.Capability `json:"capabilities,omitempty"`
Details ModelDetails `json:"details,omitempty"`
}
// ProcessModelResponse is a single model description in [ProcessResponse].

View File

@@ -23,7 +23,7 @@ func startApp(ctx context.Context, client *api.Client) error {
return errors.New("could not find ollama app")
}
path := strings.Split(link, "Ollama.app")
if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
if err := exec.Command("/usr/bin/open", "-j", "-a", path[0]+"Ollama.app").Run(); err != nil {
return err
}
return waitForServer(ctx, client)

View File

@@ -45,14 +45,11 @@ func startApp(ctx context.Context, client *api.Client) error {
}
}
}
// log.Printf("XXX attempting to start app %s", appExe)
cmd_path := "c:\\Windows\\system32\\cmd.exe"
cmd := exec.Command(cmd_path, "/c", appExe)
// TODO - these hide flags aren't working - still pops up a command window for some reason
cmd := exec.Command(cmd_path, "/c", appExe, "hidden")
cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: 0x08000000, HideWindow: true}
// TODO this didn't help either...
cmd.Stdin = strings.NewReader("")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
@@ -74,7 +71,16 @@ func isProcRunning(procName string) []uint32 {
slog.Debug("failed to check for running installers", "error", err)
return nil
}
pids = pids[:ret]
if ret > uint32(len(pids)) {
pids = make([]uint32, ret+10)
if err := windows.EnumProcesses(pids, &ret); err != nil || ret == 0 {
slog.Debug("failed to check for running installers", "error", err)
return nil
}
}
if ret < uint32(len(pids)) {
pids = pids[:ret]
}
var matches []uint32
for _, pid := range pids {
if pid == 0 {

View File

@@ -1157,11 +1157,15 @@ A single JSON object will be returned.
{
"models": [
{
"name": "deepseek-r1:latest",
"model": "deepseek-r1:latest",
"modified_at": "2025-05-10T08:06:48.639712648-07:00",
"size": 4683075271,
"digest": "0a8c266910232fd3291e71e5ba1e058cc5af9d411192cf88b6d30e92b6e73163",
"model": "codellama:13b",
"modified_at": "2023-11-04T14:56:49.277302595-07:00",
"size": 7365960935,
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
"capabilities": [
"completion"
],
"details": {
"parent_model": "",
"format": "gguf",
@@ -1174,11 +1178,16 @@ A single JSON object will be returned.
}
},
{
"name": "llama3.2:latest",
"model": "llama3.2:latest",
"modified_at": "2025-05-04T17:37:44.706015396-07:00",
"size": 2019393189,
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"model": "llama4:latest",
"modified_at": "2023-12-07T09:32:18.757212583-08:00",
"size": 3825819519,
"digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
"capabilities": [
"completion",
"vision"
],
"details": {
"parent_model": "",
"format": "gguf",

View File

@@ -118,7 +118,7 @@ To run tests, use `go test`:
go test ./...
```
> NOTE: In rare cirumstances, you may nedd to change a package using the new
> NOTE: In rare cirumstances, you may need to change a package using the new
> "synctest" package in go1.24.
>
> If you do not have the "synctest" package enabled, you will not see build or

350
fs/gguf/gguf.go Normal file
View File

@@ -0,0 +1,350 @@
package gguf
import (
"bytes"
"cmp"
"encoding/binary"
"errors"
"fmt"
"io"
"iter"
"os"
"slices"
"strings"
)
const (
typeUint8 uint32 = iota
typeInt8
typeUint16
typeInt16
typeUint32
typeInt32
typeFloat32
typeBool
typeString
typeArray
typeUint64
typeInt64
typeFloat64
)
var ErrUnsupported = errors.New("unsupported")
type File struct {
Magic [4]byte
Version uint32
keyValues *lazy[KeyValue]
tensors *lazy[TensorInfo]
offset int64
file *os.File
reader *readSeeker
bts []byte
}
func Open(path string) (f *File, err error) {
f = &File{bts: make([]byte, 4096)}
f.file, err = os.Open(path)
if err != nil {
return nil, err
}
f.reader = newReadSeeker(f.file, 32<<10)
if err := binary.Read(f.reader, binary.LittleEndian, &f.Magic); err != nil {
return nil, err
}
if bytes.Equal(f.Magic[:], []byte("gguf")) {
return nil, fmt.Errorf("%w file type %v", ErrUnsupported, f.Magic)
}
if err := binary.Read(f.reader, binary.LittleEndian, &f.Version); err != nil {
return nil, err
}
if f.Version != 3 {
return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version)
}
f.tensors, err = newLazy(f, f.readTensor)
if err != nil {
return nil, err
}
f.tensors.doneFunc = func() error {
offset, err := f.reader.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32)
f.offset = offset + (alignment-offset%alignment)%alignment
return nil
}
f.keyValues, err = newLazy(f, f.readKeyValue)
if err != nil {
return nil, err
}
return f, nil
}
func (f *File) readTensor() (TensorInfo, error) {
name, err := readString(f)
if err != nil {
return TensorInfo{}, err
}
dims, err := read[uint32](f)
if err != nil {
return TensorInfo{}, err
}
shape := make([]uint64, dims)
for i := range dims {
shape[i], err = read[uint64](f)
if err != nil {
return TensorInfo{}, err
}
}
type_, err := read[uint32](f)
if err != nil {
return TensorInfo{}, err
}
offset, err := read[uint64](f)
if err != nil {
return TensorInfo{}, err
}
return TensorInfo{
Name: name,
Offset: offset,
Shape: shape,
Type: TensorType(type_),
}, nil
}
func (f *File) readKeyValue() (KeyValue, error) {
key, err := readString(f)
if err != nil {
return KeyValue{}, err
}
t, err := read[uint32](f)
if err != nil {
return KeyValue{}, err
}
value, err := func() (any, error) {
switch t {
case typeUint8:
return read[uint8](f)
case typeInt8:
return read[int8](f)
case typeUint16:
return read[uint16](f)
case typeInt16:
return read[int16](f)
case typeUint32:
return read[uint32](f)
case typeInt32:
return read[int32](f)
case typeUint64:
return read[uint64](f)
case typeInt64:
return read[int64](f)
case typeFloat32:
return read[float32](f)
case typeFloat64:
return read[float64](f)
case typeBool:
return read[bool](f)
case typeString:
return readString(f)
case typeArray:
return readArray(f)
default:
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
}
}()
if err != nil {
return KeyValue{}, err
}
return KeyValue{
Key: key,
Value: Value{value},
}, nil
}
func read[T any](f *File) (t T, err error) {
err = binary.Read(f.reader, binary.LittleEndian, &t)
return t, err
}
func readString(f *File) (string, error) {
n, err := read[uint64](f)
if err != nil {
return "", err
}
if int(n) > len(f.bts) {
f.bts = make([]byte, n)
}
bts := f.bts[:n]
if _, err := io.ReadFull(f.reader, bts); err != nil {
return "", err
}
defer clear(bts)
return string(bts), nil
}
func readArray(f *File) (any, error) {
t, err := read[uint32](f)
if err != nil {
return nil, err
}
n, err := read[uint64](f)
if err != nil {
return nil, err
}
switch t {
case typeUint8:
return readArrayData[uint8](f, n)
case typeInt8:
return readArrayData[int8](f, n)
case typeUint16:
return readArrayData[uint16](f, n)
case typeInt16:
return readArrayData[int16](f, n)
case typeUint32:
return readArrayData[uint32](f, n)
case typeInt32:
return readArrayData[int32](f, n)
case typeUint64:
return readArrayData[uint64](f, n)
case typeInt64:
return readArrayData[int64](f, n)
case typeFloat32:
return readArrayData[float32](f, n)
case typeFloat64:
return readArrayData[float64](f, n)
case typeBool:
return readArrayData[bool](f, n)
case typeString:
return readArrayString(f, n)
default:
return nil, fmt.Errorf("%w type %d", ErrUnsupported, t)
}
}
func readArrayData[T any](f *File, n uint64) (s []T, err error) {
s = make([]T, n)
for i := range n {
e, err := read[T](f)
if err != nil {
return nil, err
}
s[i] = e
}
return s, nil
}
func readArrayString(f *File, n uint64) (s []string, err error) {
s = make([]string, n)
for i := range n {
e, err := readString(f)
if err != nil {
return nil, err
}
s[i] = e
}
return s, nil
}
func (f *File) Close() error {
f.keyValues.stop()
f.tensors.stop()
return f.file.Close()
}
func (f *File) KeyValue(key string) KeyValue {
if !strings.HasPrefix(key, "general.") && !strings.HasPrefix(key, "tokenizer.") {
key = f.KeyValue("general.architecture").String() + "." + key
}
if index := slices.IndexFunc(f.keyValues.values, func(kv KeyValue) bool {
return kv.Key == key
}); index >= 0 {
return f.keyValues.values[index]
}
for keyValue, ok := f.keyValues.next(); ok; keyValue, ok = f.keyValues.next() {
if keyValue.Key == key {
return keyValue
}
}
return KeyValue{}
}
func (f *File) NumKeyValues() int {
return int(f.keyValues.count)
}
func (f *File) KeyValues() iter.Seq2[int, KeyValue] {
return f.keyValues.All()
}
func (f *File) TensorInfo(name string) TensorInfo {
if index := slices.IndexFunc(f.tensors.values, func(t TensorInfo) bool {
return t.Name == name
}); index >= 0 {
return f.tensors.values[index]
}
// fast-forward through key values if we haven't already
_ = f.keyValues.rest()
for tensor, ok := f.tensors.next(); ok; tensor, ok = f.tensors.next() {
if tensor.Name == name {
return tensor
}
}
return TensorInfo{}
}
func (f *File) NumTensors() int {
return int(f.tensors.count)
}
func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] {
// fast forward through key values if we haven't already
f.keyValues.rest()
return f.tensors.All()
}
func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) {
t := f.TensorInfo(name)
if t.NumBytes() == 0 {
return TensorInfo{}, nil, fmt.Errorf("tensor %s not found", name)
}
// fast forward through tensor info if we haven't already
_ = f.tensors.rest()
return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), t.NumBytes()), nil
}

320
fs/gguf/gguf_test.go Normal file
View File

@@ -0,0 +1,320 @@
package gguf
import (
"encoding/binary"
"fmt"
"os"
"path/filepath"
"slices"
"testing"
)
func TestRead(t *testing.T) {
// Setup
tempDir := t.TempDir()
tempFile := filepath.Join(tempDir, "test.gguf")
if err := createTestGGUFFile(tempFile, map[string]any{
"general.architecture": "llama",
"general.alignment": int64(32),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
{Name: "output.weight", Shape: []uint64{512, 1000}, Type: 1}, // F16
}); err != nil {
t.Fatal(err)
}
f, err := Open(tempFile)
if err != nil {
t.Fatal(err)
}
defer f.Close()
// Test
if got := f.NumKeyValues(); got != 2 {
t.Errorf("NumKeyValues() = %d, want %d", got, 2)
}
if got := f.NumTensors(); got != 2 {
t.Errorf("NumTensors() = %d, want %d", got, 2)
}
archKV := f.KeyValue("general.architecture")
if archKV.Key == "" {
t.Error("KeyValue(\"general.architecture\") not found")
}
if got := archKV.String(); got != "llama" {
t.Errorf("KeyValue(\"general.architecture\").String() = %q, want %q", got, "llama")
}
alignKV := f.KeyValue("general.alignment")
if alignKV.Key == "" {
t.Error("KeyValue(\"general.alignment\") not found")
}
if got := alignKV.Int(); got != 32 {
t.Errorf("KeyValue(\"general.alignment\").Int() = %d, want %d", got, 32)
}
expectedTensorNames := []string{"token_embd.weight", "output.weight"}
var gotTensorNames []string
for _, tensor := range f.TensorInfos() {
gotTensorNames = append(gotTensorNames, tensor.Name)
}
if !slices.Equal(gotTensorNames, expectedTensorNames) {
t.Errorf("tensor names = %v, want %v", gotTensorNames, expectedTensorNames)
}
tokenTensor := f.TensorInfo("token_embd.weight")
if tokenTensor.Name != "token_embd.weight" {
t.Error("TensorInfo(\"token_embd.weight\") not found")
}
if len(tokenTensor.Shape) == 0 {
t.Error("TensorInfo(\"token_embd.weight\") has empty shape")
}
outputTensor := f.TensorInfo("output.weight")
if outputTensor.Name != "output.weight" {
t.Error("TensorInfo(\"output.weight\") not found")
}
if len(outputTensor.Shape) == 0 {
t.Error("TensorInfo(\"output.weight\") has empty shape")
}
var gotKeyCount int
for _, kv := range f.KeyValues() {
gotKeyCount++
if kv.Key == "" {
t.Error("found key value with empty key")
}
}
if gotKeyCount != 2 {
t.Errorf("iterated key count = %d, want %d", gotKeyCount, 2)
}
tensorInfo, reader, err := f.TensorReader("token_embd.weight")
if err != nil {
t.Errorf("TensorReader(\"token_embd.weight\") error: %v", err)
}
if tensorInfo.Name != "token_embd.weight" {
t.Errorf("TensorReader returned wrong tensor: %q", tensorInfo.Name)
}
if reader == nil {
t.Error("TensorReader returned nil reader")
}
}
func BenchmarkRead(b *testing.B) {
// Create benchmark test file
tempDir := b.TempDir()
tempFile := filepath.Join(tempDir, "benchmark.gguf")
if err := createTestGGUFFile(tempFile, map[string]any{
"general.architecture": "llama",
"general.alignment": int64(32),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
{Name: "output.weight", Shape: []uint64{512, 1000}, Type: 1}, // F16
}); err != nil {
b.Fatal(err)
}
// Get file info for reporting
info, err := os.Stat(tempFile)
if err != nil {
b.Fatal(err)
}
b.Logf("Benchmark file size: %d bytes", info.Size())
b.ReportAllocs()
for b.Loop() {
f, err := Open(tempFile)
if err != nil {
b.Fatal(err)
}
// Access some data to ensure it's actually being read
_ = f.KeyValue("general.architecture").String()
_ = f.KeyValue("general.alignment").Int()
_ = f.NumTensors()
_ = f.NumKeyValues()
// Iterate through some tensors
count := 0
for _, tensor := range f.TensorInfos() {
_ = tensor.Name
count++
if count >= 2 {
break
}
}
f.Close()
}
}
// Helper function to create test GGUF files
func createTestGGUFFile(path string, keyValues map[string]any, tensors []testTensorInfo) error {
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
// Write GGUF magic
if _, err := file.Write([]byte("GGUF")); err != nil {
return err
}
// Write version
if err := binary.Write(file, binary.LittleEndian, uint32(3)); err != nil {
return err
}
// Write tensor count
if err := binary.Write(file, binary.LittleEndian, uint64(len(tensors))); err != nil {
return err
}
// Write metadata count
if err := binary.Write(file, binary.LittleEndian, uint64(len(keyValues))); err != nil {
return err
}
// Write metadata
for key, value := range keyValues {
if err := writeKeyValue(file, key, value); err != nil {
return err
}
}
// Write tensor info
for _, tensor := range tensors {
if err := writeTensorInfo(file, tensor); err != nil {
return err
}
}
// Write some dummy tensor data
dummyData := make([]byte, 1024)
file.Write(dummyData)
return nil
}
type testTensorInfo struct {
Name string
Shape []uint64
Type uint32
}
func writeKeyValue(file *os.File, key string, value any) error {
// Write key length and key
if err := binary.Write(file, binary.LittleEndian, uint64(len(key))); err != nil {
return err
}
if _, err := file.Write([]byte(key)); err != nil {
return err
}
// Write value based on type
switch v := value.(type) {
case string:
if err := binary.Write(file, binary.LittleEndian, typeString); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
_, err := file.Write([]byte(v))
return err
case int64:
if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case bool:
if err := binary.Write(file, binary.LittleEndian, typeBool); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case float64:
if err := binary.Write(file, binary.LittleEndian, typeFloat64); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case []string:
if err := binary.Write(file, binary.LittleEndian, typeArray); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeString); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, s := range v {
if err := binary.Write(file, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
if _, err := file.Write([]byte(s)); err != nil {
return err
}
}
return nil
case []int64:
if err := binary.Write(file, binary.LittleEndian, typeArray); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, i := range v {
if err := binary.Write(file, binary.LittleEndian, i); err != nil {
return err
}
}
return nil
case []float64:
if err := binary.Write(file, binary.LittleEndian, typeArray); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeFloat64); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, f := range v {
if err := binary.Write(file, binary.LittleEndian, f); err != nil {
return err
}
}
return nil
default:
return fmt.Errorf("unsupported value type: %T", value)
}
}
func writeTensorInfo(file *os.File, tensor testTensorInfo) error {
// Write tensor name
if err := binary.Write(file, binary.LittleEndian, uint64(len(tensor.Name))); err != nil {
return err
}
if _, err := file.Write([]byte(tensor.Name)); err != nil {
return err
}
// Write dimensions
if err := binary.Write(file, binary.LittleEndian, uint32(len(tensor.Shape))); err != nil {
return err
}
for _, dim := range tensor.Shape {
if err := binary.Write(file, binary.LittleEndian, dim); err != nil {
return err
}
}
// Write type
if err := binary.Write(file, binary.LittleEndian, tensor.Type); err != nil {
return err
}
// Write offset (dummy value)
return binary.Write(file, binary.LittleEndian, uint64(0))
}

102
fs/gguf/keyvalue.go Normal file
View File

@@ -0,0 +1,102 @@
package gguf
import (
"reflect"
"slices"
)
type KeyValue struct {
Key string
Value
}
type Value struct {
value any
}
func value[T any](v Value, kinds ...reflect.Kind) (t T) {
vv := reflect.ValueOf(v.value)
if slices.Contains(kinds, vv.Kind()) {
t = vv.Convert(reflect.TypeOf(t)).Interface().(T)
}
return
}
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
switch vv := reflect.ValueOf(v.value); vv.Kind() {
case reflect.Slice:
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
ts = make([]T, vv.Len())
for i := range vv.Len() {
ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T)
}
}
}
return
}
// Int returns Value as a signed integer. If it is not a signed integer, it returns 0.
func (v Value) Int() int64 {
return value[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
}
// Ints returns Value as a signed integer slice. If it is not a signed integer slice, it returns nil.
func (v Value) Ints() (i64s []int64) {
return values[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)
}
// Uint converts an unsigned integer value to uint64. If the value is not a unsigned integer, it returns 0.
func (v Value) Uint() uint64 {
return value[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
}
// Uints returns Value as a unsigned integer slice. If it is not a unsigned integer slice, it returns nil.
func (v Value) Uints() (u64s []uint64) {
return values[uint64](v, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64)
}
// Float returns Value as a float. If it is not a float, it returns 0.
func (v Value) Float() float64 {
return value[float64](v, reflect.Float32, reflect.Float64)
}
// Floats returns Value as a float slice. If it is not a float slice, it returns nil.
func (v Value) Floats() (f64s []float64) {
return values[float64](v, reflect.Float32, reflect.Float64)
}
// Bool returns Value as a boolean. If it is not a boolean, it returns false.
func (v Value) Bool() bool {
return value[bool](v, reflect.Bool)
}
// Bools returns Value as a boolean slice. If it is not a boolean slice, it returns nil.
func (v Value) Bools() (bools []bool) {
return values[bool](v, reflect.Bool)
}
// String returns Value as a string. If it is not a string, it returns an empty string.
func (v Value) String() string {
return value[string](v, reflect.String)
}
// Strings returns Value as a string slice. If it is not a string slice, it returns nil.
func (v Value) Strings() (strings []string) {
return values[string](v, reflect.String)
}
// IsNil checks if the Value is nil. It returns true if the value is nil or if it is a nil pointer, interface, slice, map, channel, or function.
func (v Value) IsNil() bool {
if v.value == nil {
return true
}
// Check for nil pointers, interfaces, slices, maps, channels, and functions
rv := reflect.ValueOf(v.value)
switch rv.Kind() {
case reflect.Ptr, reflect.Interface, reflect.Slice, reflect.Map, reflect.Chan, reflect.Func:
return rv.IsNil()
}
return false
}

208
fs/gguf/keyvalue_test.go Normal file
View File

@@ -0,0 +1,208 @@
package gguf
import (
"testing"
"github.com/google/go-cmp/cmp"
)
func split(name string, values map[string][]any) (matched []any, unmatched []any) {
for key, value := range values {
if key == name {
matched = value
} else {
unmatched = append(unmatched, value...)
}
}
return
}
func TestValue(t *testing.T) {
values := map[string][]any{
"int64": {int(42), int8(42), int16(42), int32(42), int64(42)},
"uint64": {uint(42), uint8(42), uint16(42), uint32(42), uint64(42)},
"float64": {float32(42), float64(42)},
"string": {"42", "hello"},
"bool": {true, false},
}
t.Run("int64", func(t *testing.T) {
matched, unmatched := split("int64", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if i64 := kv.Int(); i64 != 42 {
t.Errorf("expected 42, got %d", i64)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if i64 := kv.Int(); i64 != 0 {
t.Errorf("expected 42, got %d", i64)
}
}
})
t.Run("uint64", func(t *testing.T) {
matched, unmatched := split("uint64", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if u64 := kv.Uint(); u64 != 42 {
t.Errorf("expected 42, got %d", u64)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if u64 := kv.Uint(); u64 != 0 {
t.Errorf("expected 42, got %d", u64)
}
}
})
t.Run("float64", func(t *testing.T) {
matched, unmatched := split("float64", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if f64 := kv.Float(); f64 != 42 {
t.Errorf("expected 42, got %f", f64)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if f64 := kv.Float(); f64 != 0 {
t.Errorf("expected 42, got %f", f64)
}
}
})
t.Run("string", func(t *testing.T) {
matched, unmatched := split("string", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if s := kv.String(); s != v {
t.Errorf("expected 42, got %s", s)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if s := kv.String(); s != "" {
t.Errorf("expected 42, got %s", s)
}
}
})
t.Run("bool", func(t *testing.T) {
matched, unmatched := split("bool", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if b := kv.Bool(); b != v {
t.Errorf("expected true, got %v", b)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if b := kv.Bool(); b != false {
t.Errorf("expected false, got %v", b)
}
}
})
}
func TestValues(t *testing.T) {
values := map[string][]any{
"int64s": {[]int{42}, []int8{42}, []int16{42}, []int32{42}, []int64{42}},
"uint64s": {[]uint{42}, []uint8{42}, []uint16{42}, []uint32{42}, []uint64{42}},
"float64s": {[]float32{42}, []float64{42}},
"strings": {[]string{"42"}, []string{"hello"}},
"bools": {[]bool{true}, []bool{false}},
}
t.Run("int64s", func(t *testing.T) {
matched, unmatched := split("int64s", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if diff := cmp.Diff(kv.Ints(), []int64{42}); diff != "" {
t.Errorf("diff: %s", diff)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if i64s := kv.Ints(); i64s != nil {
t.Errorf("expected nil, got %v", i64s)
}
}
})
t.Run("uint64s", func(t *testing.T) {
matched, unmatched := split("uint64s", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if diff := cmp.Diff(kv.Uints(), []uint64{42}); diff != "" {
t.Errorf("diff: %s", diff)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if u64s := kv.Uints(); u64s != nil {
t.Errorf("expected nil, got %v", u64s)
}
}
})
t.Run("float64s", func(t *testing.T) {
matched, unmatched := split("float64s", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if diff := cmp.Diff(kv.Floats(), []float64{42}); diff != "" {
t.Errorf("diff: %s", diff)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if f64s := kv.Floats(); f64s != nil {
t.Errorf("expected nil, got %v", f64s)
}
}
})
t.Run("strings", func(t *testing.T) {
matched, unmatched := split("strings", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if diff := cmp.Diff(kv.Strings(), v); diff != "" {
t.Errorf("diff: %s", diff)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if s := kv.Strings(); s != nil {
t.Errorf("expected nil, got %v", s)
}
}
})
t.Run("bools", func(t *testing.T) {
matched, unmatched := split("bools", values)
for _, v := range matched {
kv := KeyValue{"key", Value{v}}
if diff := cmp.Diff(kv.Bools(), v); diff != "" {
t.Errorf("diff: %s", diff)
}
}
for _, v := range unmatched {
kv := KeyValue{"key", Value{v}}
if b := kv.Bools(); b != nil {
t.Errorf("expected nil, got %v", b)
}
}
})
}

88
fs/gguf/lazy.go Normal file
View File

@@ -0,0 +1,88 @@
package gguf
import (
"encoding/binary"
"iter"
"log/slog"
)
type lazy[T any] struct {
count uint64
next func() (T, bool)
stop func()
values []T
doneFunc func() error
}
func newLazy[T any](f *File, fn func() (T, error)) (*lazy[T], error) {
it := lazy[T]{}
if err := binary.Read(f.reader, binary.LittleEndian, &it.count); err != nil {
return nil, err
}
it.values = make([]T, 0)
it.next, it.stop = iter.Pull(func(yield func(T) bool) {
for i := range it.count {
t, err := fn()
if err != nil {
slog.Error("error reading tensor", "index", i, "error", err)
return
}
it.values = append(it.values, t)
if !yield(t) {
break
}
}
if it.doneFunc != nil {
it.doneFunc()
}
})
return &it, nil
}
func (g *lazy[T]) Values() iter.Seq[T] {
return func(yield func(T) bool) {
for _, v := range g.All() {
if !yield(v) {
break
}
}
}
}
func (g *lazy[T]) All() iter.Seq2[int, T] {
return func(yield func(int, T) bool) {
for i := range int(g.count) {
if i < len(g.values) {
if !yield(i, g.values[i]) {
break
}
} else {
t, ok := g.next()
if !ok {
break
}
if !yield(i, t) {
break
}
}
}
}
}
func (g *lazy[T]) rest() (collected bool) {
for {
_, ok := g.next()
collected = collected || ok
if !ok {
break
}
}
return collected
}

34
fs/gguf/reader.go Normal file
View File

@@ -0,0 +1,34 @@
package gguf
import (
"bufio"
"io"
)
type readSeeker struct {
rs io.ReadSeeker
br *bufio.Reader
}
func newReadSeeker(rs io.ReadSeeker, size int) *readSeeker {
return &readSeeker{
rs: rs,
br: bufio.NewReaderSize(rs, size),
}
}
func (b *readSeeker) Read(p []byte) (int, error) {
return b.br.Read(p)
}
func (b *readSeeker) Seek(offset int64, whence int) (int64, error) {
if whence == io.SeekCurrent {
offset -= int64(b.br.Buffered())
}
n, err := b.rs.Seek(offset, whence)
if err != nil {
return 0, err
}
b.br.Reset(b.rs)
return n, nil
}

284
fs/gguf/tensor.go Normal file
View File

@@ -0,0 +1,284 @@
package gguf
import (
"log/slog"
"strings"
)
type TensorInfo struct {
Name string
Offset uint64
Shape []uint64
Type TensorType
}
func (t TensorInfo) NumValues() int64 {
var numItems int64 = 1
for _, dim := range t.Shape {
numItems *= int64(dim)
}
return numItems
}
// NumBytes returns the number of bytes in the tensor.
func (t TensorInfo) NumBytes() int64 {
return int64(float64(t.NumValues()) * t.Type.NumBytes())
}
func (t TensorInfo) LogValue() slog.Value {
return slog.GroupValue(
slog.String("name", t.Name),
slog.Int64("offset", int64(t.Offset)),
slog.Any("shape", t.Shape),
slog.Int64("num_values", t.NumValues()),
slog.Int64("num_bytes", t.NumBytes()),
slog.Any("type", t.Type),
)
}
type TensorType uint32
const (
TensorTypeF32 TensorType = iota
TensorTypeF16
TensorTypeQ4_0
TensorTypeQ4_1
// unexported // unused in gguf
tensorTypeQ4_2
tensorTypeQ4_3
TensorTypeQ5_0
TensorTypeQ5_1
TensorTypeQ8_0
TensorTypeQ8_1
TensorTypeQ2_K
TensorTypeQ3_K
TensorTypeQ4_K
TensorTypeQ5_K
TensorTypeQ6_K
TensorTypeQ8_K
// unexported // unquantizable by ollama
tensorTypeIQ2_XXS
tensorTypeIQ2_XS
tensorTypeIQ3_XXS
tensorTypeIQ1_S
tensorTypeIQ4_NL
tensorTypeIQ3_S
tensorTypeIQ2_S
tensorTypeIQ4_XS
TensorTypeI8
TensorTypeI16
TensorTypeI32
TensorTypeI64
TensorTypeF64
// unexported // unquantizable by ollama
tensorTypeIQ1_M
TensorTypeBF16
// unexported // unused in gguf
tensorTypeQ4_0_4_4
tensorTypeQ4_0_4_8
tensorTypeQ4_0_8_8
// unexported // unquantizable by ollama
tensorTypeTQ1_0
tensorTypeTQ2_0
// unexported // unused in gguf
tensorTypeIQ4_NL_4_4
tensorTypeIQ4_NL_4_8
tensorTypeIQ4_NL_8_8
)
func (t TensorType) NumBytes() float64 {
return float64(t.typeSize()) / float64(t.blockSize())
}
func (t TensorType) typeSize() int64 {
switch t {
case TensorTypeF32:
return 4
case TensorTypeF16:
return 2
case TensorTypeQ4_0:
return 2 + t.blockSize()/2
case TensorTypeQ4_1:
return 2 + 2 + t.blockSize()/2
case TensorTypeQ5_0:
return 2 + 4 + t.blockSize()/2
case TensorTypeQ5_1:
return 2 + 2 + 4 + t.blockSize()/2
case TensorTypeQ8_0:
return 2 + t.blockSize()
case TensorTypeQ8_1:
return 2 + 2 + t.blockSize()
case TensorTypeQ2_K:
return t.blockSize()/16 + t.blockSize()/4 + 2 + 2
case TensorTypeQ3_K:
return t.blockSize()/8 + t.blockSize()/4 + 12 + 2
case TensorTypeQ4_K:
return 2 + 2 + 12 + t.blockSize()/2
case TensorTypeQ5_K:
return 2 + 2 + 12 + t.blockSize()/8 + t.blockSize()/2
case TensorTypeQ6_K:
return t.blockSize()/2 + t.blockSize()/4 + t.blockSize()/16 + 2
case TensorTypeQ8_K:
return 4 + t.blockSize() + 2*t.blockSize()/16
case tensorTypeIQ2_XXS:
return 2 + 2*t.blockSize()/8
case tensorTypeIQ2_XS:
return 2 + 2*t.blockSize()/8 + t.blockSize()/32
case tensorTypeIQ3_XXS:
return 2 + t.blockSize()/4 + t.blockSize()/8
case tensorTypeIQ1_S:
return 2 + t.blockSize()/8 + t.blockSize()/16
case tensorTypeIQ4_NL:
return 2 + t.blockSize()/2
case tensorTypeIQ3_S:
return 2 + t.blockSize()/4 + t.blockSize()/8 + t.blockSize()/32 + 4
case tensorTypeIQ2_S:
return 2 + t.blockSize()/4 + t.blockSize()/16
case tensorTypeIQ4_XS:
return 2 + 2 + t.blockSize()/2 + t.blockSize()/64
case TensorTypeI8:
return 1
case TensorTypeI16:
return 2
case TensorTypeI32:
return 4
case TensorTypeI64:
return 8
case TensorTypeF64:
return 8
case tensorTypeIQ1_M:
return t.blockSize()/8 + t.blockSize()/16 + t.blockSize()/32
case TensorTypeBF16:
return 2
default:
return 0
}
}
func (t TensorType) blockSize() int64 {
switch t {
case TensorTypeF32,
TensorTypeF16,
TensorTypeI8,
TensorTypeI16,
TensorTypeI32,
TensorTypeI64,
TensorTypeF64,
TensorTypeBF16:
return 1
case TensorTypeQ4_0,
TensorTypeQ4_1,
TensorTypeQ5_0,
TensorTypeQ5_1,
TensorTypeQ8_0,
TensorTypeQ8_1,
tensorTypeIQ4_NL:
return 32
default:
return 256
}
}
func (t TensorType) String() string {
switch t {
case TensorTypeF32:
return "f32"
case TensorTypeF16:
return "f16"
case TensorTypeQ4_0:
return "q4_0"
case TensorTypeQ4_1:
return "q4_1"
case tensorTypeQ4_2:
return "q4_2"
case tensorTypeQ4_3:
return "q4_3"
case TensorTypeQ5_0:
return "q5_0"
case TensorTypeQ5_1:
return "q5_1"
case TensorTypeQ8_0:
return "q8_0"
case TensorTypeQ8_1:
return "q8_1"
case TensorTypeQ2_K:
return "q2_k"
case TensorTypeQ3_K:
return "q3_k"
case TensorTypeQ4_K:
return "q4_k"
case TensorTypeQ5_K:
return "q5_k"
case TensorTypeQ6_K:
return "q6_k"
case TensorTypeQ8_K:
return "q8_k"
case tensorTypeIQ2_XXS:
return "iq2_xxs"
case tensorTypeIQ2_XS:
return "iq2_xs"
case tensorTypeIQ3_XXS:
return "iq3_xxs"
case tensorTypeIQ1_S:
return "iq1_s"
case tensorTypeIQ4_NL:
return "iq4_nl"
case tensorTypeIQ3_S:
return "iq3_s"
case tensorTypeIQ2_S:
return "iq2_s"
case tensorTypeIQ4_XS:
return "iq4_xs"
case TensorTypeI8:
return "i8"
case TensorTypeI16:
return "i16"
case TensorTypeI32:
return "i32"
case TensorTypeI64:
return "i64"
case TensorTypeF64:
return "f64"
case tensorTypeIQ1_M:
return "iq1_m"
case TensorTypeBF16:
return "bf16"
case tensorTypeQ4_0_4_4:
return "q4_0_4_4"
case tensorTypeQ4_0_4_8:
return "q4_0_4_8"
case tensorTypeQ4_0_8_8:
return "q4_0_8_8"
case tensorTypeTQ1_0:
return "tq1_0"
case tensorTypeTQ2_0:
return "tq2_0"
case tensorTypeIQ4_NL_4_4:
return "iq4_nl_4_4"
case tensorTypeIQ4_NL_4_8:
return "iq4_nl_4_8"
case tensorTypeIQ4_NL_8_8:
return "iq4_nl_8_8"
default:
return "unknown"
}
}
func (t TensorType) LogValue() slog.Value {
return slog.GroupValue(
slog.Uint64("value", uint64(t)),
slog.String("name", strings.ToUpper(t.String())),
slog.Int64("size", t.typeSize()),
slog.Int64("block_size", t.blockSize()),
slog.Float64("num_bytes", t.NumBytes()),
)
}

View File

@@ -0,0 +1,102 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Thu, 24 Apr 2025 14:48:51 -0700
Subject: [PATCH] ggml: Export GPU UUIDs
This enables matching up devices and information reported by the backend
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
---
ggml/include/ggml-backend.h | 1 +
ggml/src/ggml-cuda/ggml-cuda.cu | 33 ++++++++++++++++++++++++++++++++
ggml/src/ggml-metal/ggml-metal.m | 1 +
3 files changed, 35 insertions(+)
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 74e46716..a880df33 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
struct ggml_backend_dev_props {
const char * name;
const char * description;
+ const char * uuid;
size_t memory_free;
size_t memory_total;
enum ggml_backend_dev_type type;
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index cb0d8528..4c829153 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
int device;
std::string name;
std::string description;
+ std::string uuid;
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
return ctx->description.c_str();
}
+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
+ return ctx->uuid.c_str();
+}
+
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
+ props->uuid = ggml_backend_cuda_device_get_uuid(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;
+ #if !defined(GGML_USE_HIP)
+ char uuid[64];
+ snprintf(uuid, sizeof(uuid),
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ (unsigned char)prop.uuid.bytes[0],
+ (unsigned char)prop.uuid.bytes[1],
+ (unsigned char)prop.uuid.bytes[2],
+ (unsigned char)prop.uuid.bytes[3],
+ (unsigned char)prop.uuid.bytes[4],
+ (unsigned char)prop.uuid.bytes[5],
+ (unsigned char)prop.uuid.bytes[6],
+ (unsigned char)prop.uuid.bytes[7],
+ (unsigned char)prop.uuid.bytes[8],
+ (unsigned char)prop.uuid.bytes[9],
+ (unsigned char)prop.uuid.bytes[10],
+ (unsigned char)prop.uuid.bytes[11],
+ (unsigned char)prop.uuid.bytes[12],
+ (unsigned char)prop.uuid.bytes[13],
+ (unsigned char)prop.uuid.bytes[14],
+ (unsigned char)prop.uuid.bytes[15]
+ );
+ dev_ctx->uuid = uuid;
+ #else
+ dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+ #endif
+
ggml_backend_dev_t dev = new ggml_backend_device {
/* .iface = */ ggml_backend_cuda_device_interface,
/* .reg = */ &reg,
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index 1b56f858..ee4f2dcb 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
props->name = ggml_backend_metal_device_get_name(dev);
props->description = ggml_backend_metal_device_get_description(dev);
+ props->uuid = "0";
props->type = ggml_backend_metal_device_get_type(dev);
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = (struct ggml_backend_dev_caps) {

View File

@@ -797,7 +797,8 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
res, err := http.DefaultClient.Do(serverReq)
if err != nil {
return fmt.Errorf("POST predict: %v", err)
slog.Error("post predict", "error", err)
return errors.New("model runner has unexpectedly stopped, this may be due to resource limitations or an internal error, check ollama server logs for details")
}
defer res.Body.Close()

View File

@@ -124,6 +124,10 @@ type DeviceMemory struct {
// may not be persistent across instances of the runner.
Name string
// UUID is a unique persistent identifier for the device for matching
// with system management libraries
UUID string
// Weights is the per-layer memory needed for the model weights.
Weights []Memory
@@ -152,6 +156,10 @@ func (m DeviceMemory) LogValue() slog.Value {
attrs = append(attrs, slog.Any("Graph", m.Graph))
}
if len(attrs) > 0 && m.UUID != "" {
attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
}
return slog.GroupValue(attrs...)
}

View File

@@ -136,6 +136,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
}
requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
var props C.struct_ggml_backend_dev_props
C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
requiredMemory.CPU.UUID = C.GoString(props.uuid)
requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
@@ -150,6 +153,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
})
btDeviceMemory[bt] = &requiredMemory.GPUs[i]
requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
var props C.struct_ggml_backend_dev_props
C.ggml_backend_dev_get_props(d, &props)
requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
}

View File

@@ -152,6 +152,7 @@ extern "C" {
struct ggml_backend_dev_props {
const char * name;
const char * description;
const char * uuid;
size_t memory_free;
size_t memory_total;
enum ggml_backend_dev_type type;

View File

@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
int device;
std::string name;
std::string description;
std::string uuid;
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
return ctx->description.c_str();
}
static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
return ctx->uuid.c_str();
}
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
props->uuid = ggml_backend_cuda_device_get_uuid(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;
#if !defined(GGML_USE_HIP)
char uuid[64];
snprintf(uuid, sizeof(uuid),
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
(unsigned char)prop.uuid.bytes[0],
(unsigned char)prop.uuid.bytes[1],
(unsigned char)prop.uuid.bytes[2],
(unsigned char)prop.uuid.bytes[3],
(unsigned char)prop.uuid.bytes[4],
(unsigned char)prop.uuid.bytes[5],
(unsigned char)prop.uuid.bytes[6],
(unsigned char)prop.uuid.bytes[7],
(unsigned char)prop.uuid.bytes[8],
(unsigned char)prop.uuid.bytes[9],
(unsigned char)prop.uuid.bytes[10],
(unsigned char)prop.uuid.bytes[11],
(unsigned char)prop.uuid.bytes[12],
(unsigned char)prop.uuid.bytes[13],
(unsigned char)prop.uuid.bytes[14],
(unsigned char)prop.uuid.bytes[15]
);
dev_ctx->uuid = uuid;
#else
dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
#endif
ggml_backend_dev_t dev = new ggml_backend_device {
/* .iface = */ ggml_backend_cuda_device_interface,
/* .reg = */ &reg,

View File

@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
props->name = ggml_backend_metal_device_get_name(dev);
props->description = ggml_backend_metal_device_get_description(dev);
props->uuid = "0";
props->type = ggml_backend_metal_device_get_type(dev);
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = (struct ggml_backend_dev_caps) {

View File

@@ -23,9 +23,10 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/fs/gguf"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
)
@@ -72,22 +73,20 @@ func (m *Model) Capabilities() []model.Capability {
capabilities := []model.Capability{}
// Check for completion capability
r, err := os.Open(m.ModelPath)
f, err := gguf.Open(m.ModelPath)
if err == nil {
defer r.Close()
defer f.Close()
f, err := ggml.Decode(r, 1024)
if err == nil {
if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
capabilities = append(capabilities, model.CapabilityEmbedding)
} else {
capabilities = append(capabilities, model.CapabilityCompletion)
}
if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
capabilities = append(capabilities, model.CapabilityVision)
}
embedding := f.KeyValue("pooling_type")
if !embedding.Value.IsNil() {
capabilities = append(capabilities, model.CapabilityEmbedding)
} else {
slog.Error("couldn't decode ggml", "error", err)
// If no embedding is specified, we assume the model supports completion
capabilities = append(capabilities, model.CapabilityCompletion)
}
vision := f.KeyValue("vision.block_count")
if !vision.Value.IsNil() {
capabilities = append(capabilities, model.CapabilityVision)
}
} else {
slog.Error("couldn't open model file", "error", err)
@@ -113,7 +112,7 @@ func (m *Model) Capabilities() []model.Capability {
}
// Check for thinking capability
openingTag, closingTag := inferThinkingTags(m.Template.Template)
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if openingTag != "" && closingTag != "" {
capabilities = append(capabilities, model.CapabilityThinking)
}

View File

@@ -1,9 +1,8 @@
package server
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
@@ -13,81 +12,200 @@ import (
"github.com/ollama/ollama/types/model"
)
// Constants for GGUF magic bytes and version
var (
ggufMagic = []byte{0x47, 0x47, 0x55, 0x46} // "GGUF"
ggufVer = uint32(3) // Version 3
// GGUF type constants (matching gguf package)
const (
typeUint8 = uint32(0)
typeInt8 = uint32(1)
typeUint16 = uint32(2)
typeInt16 = uint32(3)
typeUint32 = uint32(4)
typeInt32 = uint32(5)
typeFloat32 = uint32(6)
typeBool = uint32(7)
typeString = uint32(8)
typeArray = uint32(9)
typeUint64 = uint32(10)
typeInt64 = uint32(11)
typeFloat64 = uint32(12)
)
// Helper function to create mock GGUF data
func createMockGGUFData(architecture string, vision bool) []byte {
var buf bytes.Buffer
type testTensorInfo struct {
Name string
Shape []uint64
Type uint32
}
// Write GGUF header
buf.Write(ggufMagic)
binary.Write(&buf, binary.LittleEndian, ggufVer)
// Write tensor count (0 for our test)
var numTensors uint64 = 0
binary.Write(&buf, binary.LittleEndian, numTensors)
// Calculate number of metadata entries
numMetaEntries := uint64(1) // architecture entry
if vision {
numMetaEntries++
// Helper function to create test GGUF files (matching gguf package approach)
func createTestGGUFFile(path string, keyValues map[string]any, tensors []testTensorInfo) error {
file, err := os.Create(path)
if err != nil {
return err
}
// Add embedding entry if architecture is "bert"
if architecture == "bert" {
numMetaEntries++
}
binary.Write(&buf, binary.LittleEndian, numMetaEntries)
defer file.Close()
// Write architecture metadata
archKey := "general.architecture"
keyLen := uint64(len(archKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(archKey)
// String type (8)
var strType uint32 = 8
binary.Write(&buf, binary.LittleEndian, strType)
// String length
strLen := uint64(len(architecture))
binary.Write(&buf, binary.LittleEndian, strLen)
buf.WriteString(architecture)
if vision {
visionKey := architecture + ".vision.block_count"
keyLen = uint64(len(visionKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(visionKey)
// uint32 type (4)
var uint32Type uint32 = 4
binary.Write(&buf, binary.LittleEndian, uint32Type)
// uint32 value (1)
var countVal uint32 = 1
binary.Write(&buf, binary.LittleEndian, countVal)
}
// Write embedding metadata if architecture is "bert"
if architecture == "bert" {
poolKey := architecture + ".pooling_type"
keyLen = uint64(len(poolKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(poolKey)
// uint32 type (4)
var uint32Type uint32 = 4
binary.Write(&buf, binary.LittleEndian, uint32Type)
// uint32 value (1)
var poolingVal uint32 = 1
binary.Write(&buf, binary.LittleEndian, poolingVal)
// Write GGUF magic
if _, err := file.Write([]byte("GGUF")); err != nil {
return err
}
return buf.Bytes()
// Write version
if err := binary.Write(file, binary.LittleEndian, uint32(3)); err != nil {
return err
}
// Write tensor count
if err := binary.Write(file, binary.LittleEndian, uint64(len(tensors))); err != nil {
return err
}
// Write metadata count
if err := binary.Write(file, binary.LittleEndian, uint64(len(keyValues))); err != nil {
return err
}
// Write metadata
for key, value := range keyValues {
if err := writeKeyValue(file, key, value); err != nil {
return err
}
}
// Write tensor info
for _, tensor := range tensors {
if err := writeTensorInfo(file, tensor); err != nil {
return err
}
}
// Write some dummy tensor data
dummyData := make([]byte, 1024)
file.Write(dummyData)
return nil
}
func writeKeyValue(file *os.File, key string, value any) error {
// Write key length and key
if err := binary.Write(file, binary.LittleEndian, uint64(len(key))); err != nil {
return err
}
if _, err := file.Write([]byte(key)); err != nil {
return err
}
// Write value based on type
switch v := value.(type) {
case string:
if err := binary.Write(file, binary.LittleEndian, uint32(typeString)); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
_, err := file.Write([]byte(v))
return err
case int64:
if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case uint32:
if err := binary.Write(file, binary.LittleEndian, typeUint32); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case bool:
if err := binary.Write(file, binary.LittleEndian, typeBool); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case float64:
if err := binary.Write(file, binary.LittleEndian, uint32(typeFloat64)); err != nil {
return err
}
return binary.Write(file, binary.LittleEndian, v)
case []string:
if err := binary.Write(file, binary.LittleEndian, uint32(typeArray)); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeString); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, s := range v {
if err := binary.Write(file, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
if _, err := file.Write([]byte(s)); err != nil {
return err
}
}
return nil
case []int64:
if err := binary.Write(file, binary.LittleEndian, uint32(typeArray)); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeInt64); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, i := range v {
if err := binary.Write(file, binary.LittleEndian, i); err != nil {
return err
}
}
return nil
case []float64:
if err := binary.Write(file, binary.LittleEndian, typeArray); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, typeFloat64); err != nil {
return err
}
if err := binary.Write(file, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, f := range v {
if err := binary.Write(file, binary.LittleEndian, f); err != nil {
return err
}
}
return nil
default:
return fmt.Errorf("unsupported value type: %T", value)
}
}
func writeTensorInfo(file *os.File, tensor testTensorInfo) error {
// Write tensor name
if err := binary.Write(file, binary.LittleEndian, uint64(len(tensor.Name))); err != nil {
return err
}
if _, err := file.Write([]byte(tensor.Name)); err != nil {
return err
}
// Write dimensions
if err := binary.Write(file, binary.LittleEndian, uint32(len(tensor.Shape))); err != nil {
return err
}
for _, dim := range tensor.Shape {
if err := binary.Write(file, binary.LittleEndian, dim); err != nil {
return err
}
}
// Write type
if err := binary.Write(file, binary.LittleEndian, tensor.Type); err != nil {
return err
}
// Write offset (dummy value)
return binary.Write(file, binary.LittleEndian, uint64(0))
}
func TestModelCapabilities(t *testing.T) {
@@ -101,13 +219,38 @@ func TestModelCapabilities(t *testing.T) {
// Create a simple model file for tests that don't depend on GGUF content
simpleModelPath := filepath.Join(tempDir, "simple_model.bin")
if err := errors.Join(
os.WriteFile(completionModelPath, createMockGGUFData("llama", false), 0o644),
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
); err != nil {
t.Fatalf("Failed to create model files: %v", err)
// Create completion model (llama architecture without vision)
if err := createTestGGUFFile(completionModelPath, map[string]any{
"general.architecture": "llama",
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
}); err != nil {
t.Fatalf("Failed to create completion model file: %v", err)
}
// Create vision model (llama architecture with vision block count)
if err := createTestGGUFFile(visionModelPath, map[string]any{
"general.architecture": "llama",
"llama.vision.block_count": uint32(1),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
}); err != nil {
t.Fatalf("Failed to create vision model file: %v", err)
}
// Create embedding model (bert architecture with pooling type)
if err := createTestGGUFFile(embeddingModelPath, map[string]any{
"general.architecture": "bert",
"bert.pooling_type": uint32(1),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
}); err != nil {
t.Fatalf("Failed to create embedding model file: %v", err)
}
// Create simple model file for tests that don't depend on GGUF content
if err := os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644); err != nil {
t.Fatalf("Failed to create simple model file: %v", err)
}
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
@@ -231,12 +374,29 @@ func TestModelCheckCapabilities(t *testing.T) {
simpleModelPath := filepath.Join(tempDir, "model.bin")
embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")
if err := errors.Join(
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
); err != nil {
t.Fatalf("Failed to create model files: %v", err)
// Create vision model (llama architecture with vision block count)
if err := createTestGGUFFile(visionModelPath, map[string]any{
"general.architecture": "llama",
"llama.vision.block_count": uint32(1),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
}); err != nil {
t.Fatalf("Failed to create vision model file: %v", err)
}
// Create embedding model (bert architecture with pooling type)
if err := createTestGGUFFile(embeddingModelPath, map[string]any{
"general.architecture": "bert",
"bert.pooling_type": uint32(1),
}, []testTensorInfo{
{Name: "token_embd.weight", Shape: []uint64{1000, 512}, Type: 1}, // F16
}); err != nil {
t.Fatalf("Failed to create embedding model file: %v", err)
}
// Create simple model file for tests that don't depend on GGUF content
if err := os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644); err != nil {
t.Fatalf("Failed to create simple model file: %v", err)
}
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")

View File

@@ -37,6 +37,7 @@ import (
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/tools"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
@@ -282,12 +283,12 @@ func (s *Server) GenerateHandler(c *gin.Context) {
prompt = b.String()
}
var thinkingState *thinkingParser
openingTag, closingTag := inferThinkingTags(m.Template.Template)
var thinkingState *thinking.Parser
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if req.Think != nil && *req.Think && openingTag != "" && closingTag != "" {
thinkingState = &thinkingParser{
openingTag: openingTag,
closingTag: closingTag,
thinkingState = &thinking.Parser{
OpeningTag: openingTag,
ClosingTag: closingTag,
}
}
@@ -316,7 +317,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
if thinkingState != nil {
thinking, content := thinkingState.addContent(cr.Content)
thinking, content := thinkingState.AddContent(cr.Content)
res.Thinking = thinking
res.Response = content
}
@@ -928,8 +929,7 @@ func (s *Server) ListHandler(c *gin.Context) {
}
}
// tag should never be masked
models = append(models, api.ListModelResponse{
r := api.ListModelResponse{
Model: n.DisplayShortest(),
Name: n.DisplayShortest(),
Size: m.Size(),
@@ -942,7 +942,16 @@ func (s *Server) ListHandler(c *gin.Context) {
ParameterSize: cf.ModelType,
QuantizationLevel: cf.FileType,
},
})
}
model, err := GetModel(n.String())
if err != nil {
slog.Warn("bad model details", "name", n, "error", err)
} else {
r.Capabilities = model.Capabilities()
}
models = append(models, r)
}
slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
@@ -1514,12 +1523,12 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
var thinkingState *thinkingParser
openingTag, closingTag := inferThinkingTags(m.Template.Template)
var thinkingState *thinking.Parser
openingTag, closingTag := thinking.InferTags(m.Template.Template)
if req.Think != nil && *req.Think && openingTag != "" && closingTag != "" {
thinkingState = &thinkingParser{
openingTag: openingTag,
closingTag: closingTag,
thinkingState = &thinking.Parser{
OpeningTag: openingTag,
ClosingTag: closingTag,
}
}
@@ -1557,7 +1566,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
}
if thinkingState != nil {
thinkingContent, remainingContent := thinkingState.addContent(res.Message.Content)
thinkingContent, remainingContent := thinkingState.AddContent(res.Message.Content)
if thinkingContent == "" && remainingContent == "" && !r.Done {
// need to accumulate more to decide what to send
return
@@ -1668,11 +1677,11 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
// change the user output), we should probably perform this filtering
// for all thinking models (not just qwen3 & deepseek-r1) since it tends
// to save tokens and improve quality.
thinkingState := &thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
thinkingState := &thinking.Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
_, content := thinkingState.addContent(msg.Content)
_, content := thinkingState.AddContent(msg.Content)
msgs[i].Content = content
}
}

View File

@@ -1,9 +1,7 @@
package server
package thinking
import (
"strings"
"text/template"
"text/template/parse"
"unicode"
)
@@ -46,17 +44,17 @@ func (s thinkingState) String() string {
}
}
type thinkingParser struct {
type Parser struct {
state thinkingState
openingTag string
closingTag string
OpeningTag string
ClosingTag string
acc strings.Builder
}
// addContent returns the thinking content and the non-thinking content that
// AddContent returns the thinking content and the non-thinking content that
// should be immediately sent to the user. It will internally buffer if it needs
// to see more raw content to disambiguate
func (s *thinkingParser) addContent(content string) (string, string) {
func (s *Parser) AddContent(content string) (string, string) {
s.acc.WriteString(content)
var thinkingSb, remainingSb strings.Builder
@@ -76,12 +74,12 @@ func (s *thinkingParser) addContent(content string) (string, string) {
}
// the additional bool return is true iff we should continue eating
func eat(s *thinkingParser) (string, string, bool) {
func eat(s *Parser) (string, string, bool) {
switch s.state {
case thinkingState_LookingForOpening:
trimmed := strings.TrimLeftFunc(s.acc.String(), unicode.IsSpace)
if strings.HasPrefix(trimmed, s.openingTag) {
after := strings.Join(strings.Split(trimmed, s.openingTag)[1:], s.openingTag)
if strings.HasPrefix(trimmed, s.OpeningTag) {
after := strings.Join(strings.Split(trimmed, s.OpeningTag)[1:], s.OpeningTag)
after = strings.TrimLeftFunc(after, unicode.IsSpace)
// after might contain more than just thinking tokens, so we continue
// parsing instead of returning it as thinking tokens here
@@ -93,7 +91,7 @@ func eat(s *thinkingParser) (string, string, bool) {
s.state = thinkingState_Thinking
}
return "", "", true
} else if strings.HasPrefix(s.openingTag, trimmed) {
} else if strings.HasPrefix(s.OpeningTag, trimmed) {
// partial opening seen, so let's keep accumulating
return "", "", false
} else if trimmed == "" {
@@ -119,10 +117,10 @@ func eat(s *thinkingParser) (string, string, bool) {
}
case thinkingState_Thinking:
acc := s.acc.String()
if strings.Contains(acc, s.closingTag) {
split := strings.Split(acc, s.closingTag)
if strings.Contains(acc, s.ClosingTag) {
split := strings.Split(acc, s.ClosingTag)
thinking := split[0]
remaining := strings.Join(split[1:], s.closingTag)
remaining := strings.Join(split[1:], s.ClosingTag)
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
s.acc.Reset()
if remaining == "" {
@@ -131,7 +129,7 @@ func eat(s *thinkingParser) (string, string, bool) {
s.state = thinkingState_ThinkingDone
}
return thinking, remaining, false
} else if overlapLen := overlap(acc, s.closingTag); overlapLen > 0 {
} else if overlapLen := overlap(acc, s.ClosingTag); overlapLen > 0 {
thinking := acc[:len(acc)-overlapLen]
remaining := acc[len(acc)-overlapLen:]
s.acc.Reset()
@@ -171,130 +169,3 @@ func overlap(s, delim string) int {
}
return 0
}
func templateVisit(n parse.Node, enterFn func(parse.Node) bool, exitFn func(parse.Node)) {
if n == nil {
return
}
shouldContinue := enterFn(n)
if !shouldContinue {
return
}
switch x := n.(type) {
case *parse.ListNode:
for _, c := range x.Nodes {
templateVisit(c, enterFn, exitFn)
}
case *parse.BranchNode:
if x.Pipe != nil {
templateVisit(x.Pipe, enterFn, exitFn)
}
if x.List != nil {
templateVisit(x.List, enterFn, exitFn)
}
if x.ElseList != nil {
templateVisit(x.ElseList, enterFn, exitFn)
}
case *parse.ActionNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.WithNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.RangeNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.IfNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.TemplateNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.PipeNode:
for _, c := range x.Cmds {
templateVisit(c, enterFn, exitFn)
}
case *parse.CommandNode:
for _, a := range x.Args {
templateVisit(a, enterFn, exitFn)
}
// text, field, number, etc. are leaves nothing to recurse into
}
if exitFn != nil {
exitFn(n)
}
}
// We use a heuristic to infer the tags that surround thinking traces:
// We look for a range node that iterates over "Messages" and then look for a
// reference to "Thinking" like `{{.Thinking}}`. We then go up to the nearest
// ListNode and take the first and last TextNodes as the opening and closing
// tags.
func inferThinkingTags(t *template.Template) (string, string) {
ancestors := []parse.Node{}
openingTag := ""
closingTag := ""
enterFn := func(n parse.Node) bool {
ancestors = append(ancestors, n)
switch x := n.(type) {
case *parse.FieldNode:
if len(x.Ident) > 0 && x.Ident[0] == "Thinking" {
var mostRecentRange *parse.RangeNode
for i := len(ancestors) - 1; i >= 0; i-- {
if r, ok := ancestors[i].(*parse.RangeNode); ok {
mostRecentRange = r
break
}
}
if mostRecentRange == nil || !rangeUsesField(mostRecentRange, "Messages") {
return true
}
// TODO(drifkin): to be more robust, check that it's in the action
// part, not the `if`'s pipeline part. We do match on the nearest list
// that starts and ends with text nodes, which makes this not strictly
// necessary for our heuristic
// go up to the nearest ancestor that is a *parse.ListNode
for i := len(ancestors) - 1; i >= 0; i-- {
if l, ok := ancestors[i].(*parse.ListNode); ok {
firstNode := l.Nodes[0]
if t, ok := firstNode.(*parse.TextNode); ok {
openingTag = strings.TrimSpace(t.String())
}
lastNode := l.Nodes[len(l.Nodes)-1]
if t, ok := lastNode.(*parse.TextNode); ok {
closingTag = strings.TrimSpace(t.String())
}
break
}
}
}
}
return true
}
exitFn := func(n parse.Node) {
ancestors = ancestors[:len(ancestors)-1]
}
templateVisit(t.Root, enterFn, exitFn)
return openingTag, closingTag
}
// checks to see if the given field name is present in the pipeline of the given range node
func rangeUsesField(rangeNode *parse.RangeNode, field string) bool {
found := false
enterFn := func(n parse.Node) bool {
switch x := n.(type) {
case *parse.FieldNode:
if x.Ident[0] == field {
found = true
}
}
return true
}
templateVisit(rangeNode.BranchNode.Pipe, enterFn, nil)
return found
}

View File

@@ -1,8 +1,7 @@
package server
package thinking
import (
"testing"
"text/template"
)
func TestExtractThinking(t *testing.T) {
@@ -26,11 +25,11 @@ func TestExtractThinking(t *testing.T) {
},
}
for i, tt := range tests {
parser := thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
parser := Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
gotThinking, gotContent := parser.addContent(tt.in)
gotThinking, gotContent := parser.AddContent(tt.in)
if gotContent != tt.wantContent || gotThinking != tt.wantThink {
t.Errorf("case %d: got (%q,%q), want (%q,%q)", i, gotThinking, gotContent, tt.wantThink, tt.wantContent)
}
@@ -259,15 +258,15 @@ func TestThinkingStreaming(t *testing.T) {
}
for _, c := range cases {
parser := thinkingParser{
openingTag: "<think>",
closingTag: "</think>",
parser := Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
if c.skip {
continue
}
for i, step := range c.steps {
thinking, content := parser.addContent(step.input)
thinking, content := parser.AddContent(step.input)
if content != step.wantContent || thinking != step.wantThinking {
t.Errorf("case %q (step %d): got (%q,%q), want (%q,%q)", c.desc, i, content, thinking, step.wantContent, step.wantThinking)
}
@@ -277,127 +276,3 @@ func TestThinkingStreaming(t *testing.T) {
}
}
}
func TestInferThinkingTags(t *testing.T) {
cases := []struct {
desc string
tmplString string
wantOpeningTag string
wantClosingTag string
}{
{
desc: "basic",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
{
desc: "doubly nested range",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- range $j, $_ := .NotMessages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
{{ end }}
`,
wantOpeningTag: "",
wantClosingTag: "",
},
{
desc: "whitespace is trimmed",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
Some text before {{ .Thinking }} Some text after
{{ end }}
{{ end }}
`,
wantOpeningTag: "Some text before",
wantClosingTag: "Some text after",
},
{
desc: "qwen3",
tmplString: `
{{- if or .System .Tools .Thinking }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}
{{- if .Thinking }}
/think
{{- else }}
/no_think
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
}
for _, c := range cases {
tmpl := template.Must(template.New("test").Parse(c.tmplString))
openingTag, closingTag := inferThinkingTags(tmpl)
if openingTag != c.wantOpeningTag || closingTag != c.wantClosingTag {
t.Errorf("case %q: got (%q,%q), want (%q,%q)", c.desc, openingTag, closingTag, c.wantOpeningTag, c.wantClosingTag)
}
}
}

134
thinking/template.go Normal file
View File

@@ -0,0 +1,134 @@
package thinking
import (
"strings"
"text/template"
"text/template/parse"
)
func templateVisit(n parse.Node, enterFn func(parse.Node) bool, exitFn func(parse.Node)) {
if n == nil {
return
}
shouldContinue := enterFn(n)
if !shouldContinue {
return
}
switch x := n.(type) {
case *parse.ListNode:
for _, c := range x.Nodes {
templateVisit(c, enterFn, exitFn)
}
case *parse.BranchNode:
if x.Pipe != nil {
templateVisit(x.Pipe, enterFn, exitFn)
}
if x.List != nil {
templateVisit(x.List, enterFn, exitFn)
}
if x.ElseList != nil {
templateVisit(x.ElseList, enterFn, exitFn)
}
case *parse.ActionNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.WithNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.RangeNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.IfNode:
templateVisit(&x.BranchNode, enterFn, exitFn)
case *parse.TemplateNode:
templateVisit(x.Pipe, enterFn, exitFn)
case *parse.PipeNode:
for _, c := range x.Cmds {
templateVisit(c, enterFn, exitFn)
}
case *parse.CommandNode:
for _, a := range x.Args {
templateVisit(a, enterFn, exitFn)
}
// text, field, number, etc. are leaves nothing to recurse into
}
if exitFn != nil {
exitFn(n)
}
}
// InferTags uses a heuristic to infer the tags that surround thinking traces:
// We look for a range node that iterates over "Messages" and then look for a
// reference to "Thinking" like `{{.Thinking}}`. We then go up to the nearest
// ListNode and take the first and last TextNodes as the opening and closing
// tags.
func InferTags(t *template.Template) (string, string) {
ancestors := []parse.Node{}
openingTag := ""
closingTag := ""
enterFn := func(n parse.Node) bool {
ancestors = append(ancestors, n)
switch x := n.(type) {
case *parse.FieldNode:
if len(x.Ident) > 0 && x.Ident[0] == "Thinking" {
var mostRecentRange *parse.RangeNode
for i := len(ancestors) - 1; i >= 0; i-- {
if r, ok := ancestors[i].(*parse.RangeNode); ok {
mostRecentRange = r
break
}
}
if mostRecentRange == nil || !rangeUsesField(mostRecentRange, "Messages") {
return true
}
// TODO(drifkin): to be more robust, check that it's in the action
// part, not the `if`'s pipeline part. We do match on the nearest list
// that starts and ends with text nodes, which makes this not strictly
// necessary for our heuristic
// go up to the nearest ancestor that is a *parse.ListNode
for i := len(ancestors) - 1; i >= 0; i-- {
if l, ok := ancestors[i].(*parse.ListNode); ok {
firstNode := l.Nodes[0]
if t, ok := firstNode.(*parse.TextNode); ok {
openingTag = strings.TrimSpace(t.String())
}
lastNode := l.Nodes[len(l.Nodes)-1]
if t, ok := lastNode.(*parse.TextNode); ok {
closingTag = strings.TrimSpace(t.String())
}
break
}
}
}
}
return true
}
exitFn := func(n parse.Node) {
ancestors = ancestors[:len(ancestors)-1]
}
templateVisit(t.Root, enterFn, exitFn)
return openingTag, closingTag
}
// checks to see if the given field name is present in the pipeline of the given range node
func rangeUsesField(rangeNode *parse.RangeNode, field string) bool {
found := false
enterFn := func(n parse.Node) bool {
switch x := n.(type) {
case *parse.FieldNode:
if x.Ident[0] == field {
found = true
}
}
return true
}
templateVisit(rangeNode.BranchNode.Pipe, enterFn, nil)
return found
}

130
thinking/template_test.go Normal file
View File

@@ -0,0 +1,130 @@
package thinking
import (
"testing"
"text/template"
)
func TestInferThinkingTags(t *testing.T) {
cases := []struct {
desc string
tmplString string
wantOpeningTag string
wantClosingTag string
}{
{
desc: "basic",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
{
desc: "doubly nested range",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- range $j, $_ := .NotMessages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ end }}
{{ end }}
`,
wantOpeningTag: "",
wantClosingTag: "",
},
{
desc: "whitespace is trimmed",
tmplString: `
{{ if .Thinking}}
/think
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{ if and $last .Thinking }}
Some text before {{ .Thinking }} Some text after
{{ end }}
{{ end }}
`,
wantOpeningTag: "Some text before",
wantClosingTag: "Some text after",
},
{
desc: "qwen3",
tmplString: `
{{- if or .System .Tools .Thinking }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}
{{- if .Thinking }}
/think
{{- else }}
/no_think
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if and $last .Thinking }}
<think>{{ .Thinking }}</think>
{{ end }}
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
`,
wantOpeningTag: "<think>",
wantClosingTag: "</think>",
},
}
for _, c := range cases {
tmpl := template.Must(template.New("test").Parse(c.tmplString))
openingTag, closingTag := InferTags(tmpl)
if openingTag != c.wantOpeningTag || closingTag != c.wantClosingTag {
t.Errorf("case %q: got (%q,%q), want (%q,%q)", c.desc, openingTag, closingTag, c.wantOpeningTag, c.wantClosingTag)
}
}
}

View File

@@ -166,31 +166,26 @@ func extractToolArgs(tmpl *gotmpl.Template) (name, arguments string, err error)
return "", "", err
}
var obj any
err = json.Unmarshal(b.Bytes(), &obj)
if err != nil {
// Extract JSON object between curly braces
// JSON arrays are also valid as they will not be repeated in the template
output := b.String()
start := strings.Index(output, "{")
end := strings.LastIndex(output, "}")
if start == -1 || end == -1 || start > end {
return "", "", errors.New("no valid JSON object found in template output")
}
jsonStr := output[start : end+1]
var obj map[string]any
if err := json.Unmarshal([]byte(jsonStr), &obj); err != nil {
return "", "", err
}
var objs []map[string]any
switch v := obj.(type) {
case map[string]any:
objs = []map[string]any{v}
case []map[string]any:
objs = v
case []any:
objs = collect(v)
}
if len(objs) == 0 {
return "", "", errors.New("no template objects found")
}
// find the keys that correspond to the name and arguments fields
for k, v := range objs[0] {
switch v.(type) {
case string:
// Find name and arguments fields
for k, v := range obj {
if str, ok := v.(string); ok && str == "@@name@@" {
name = k
case map[string]any:
} else if _, ok := v.(map[string]any); ok {
arguments = k
}
}

View File

@@ -271,74 +271,99 @@ func TestExtractToolArgs(t *testing.T) {
cases := []struct {
name string
template string
want string
ok bool
wantName string
wantArgs string
wantErr bool
}{
{
name: "basic tool call with text after",
template: `{{if .ToolCalls}}tool response{{end}}`,
want: "tool response",
ok: true,
name: "basic tool call",
template: `{{ range .ToolCalls }}
{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}`,
wantName: "name",
wantArgs: "parameters",
wantErr: false,
},
{
name: "tool call with mixed content after",
template: `{{if .ToolCalls}}<tool_call>{{.Something}}{{end}}`,
want: "<tool_call>",
ok: true,
name: "tool call with whitespace",
template: `{{range .ToolCalls}}
{"name": "{{.Function.Name}}", "parameters": {{.Function.Arguments}}}
{{end}}`,
wantName: "name",
wantArgs: "parameters",
wantErr: false,
},
{
name: "tool call with no text after",
template: `{{if .ToolCalls}}{{.Something}}{{end}}`,
want: "",
ok: true,
},
{
name: "nested tool call",
template: `{{if .Something}}{{if .ToolCalls}}[TOOL_CALL]{{end}}{{end}}`,
want: "[TOOL_CALL]",
ok: true,
name: "tool call with extra content",
template: `Before {{range .ToolCalls}}
{"name": "{{.Function.Name}}", "arguments": {{.Function.Arguments}}}{{end}} After`,
wantName: "name",
wantArgs: "arguments",
wantErr: false,
},
{
name: "no tool calls",
template: `{{if .Something}}no tools here{{end}}`,
want: "",
ok: false,
wantName: "",
wantArgs: "",
wantErr: true,
},
{
name: "empty template",
template: ``,
want: "",
ok: false,
wantName: "",
wantArgs: "",
wantErr: true,
},
{
name: "multiple tool calls sections",
template: `{{if .ToolCalls}}first{{end}}{{if .ToolCalls}}second{{end}}`,
want: "first",
ok: true,
name: "prefix within tool call",
template: `{{- if .ToolCalls }}
{{ range .ToolCalls }}
<tool_call>
{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
</tool_call>{{ end }}{{- end }}`,
wantName: "name",
wantArgs: "arguments",
wantErr: false,
},
{
name: "range over tool calls",
template: `{{if .ToolCalls}}{{range .ToolCalls}}tool{{end}}{{end}}`,
want: "",
ok: true,
name: "JSON array",
template: `{{ range .ToolCalls }}
[{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}]{{ end }}`,
wantName: "name",
wantArgs: "arguments",
wantErr: false,
},
{
name: "tool calls with pipe delimiters",
template: `{{if .ToolCalls}}<|tool|>{{end}}`,
want: "<|tool|>",
ok: true,
name: "invalid JSON",
template: `{{ range .ToolCalls }}
{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}, invalid}{{ end }}`,
wantName: "",
wantArgs: "",
wantErr: true,
},
{
name: "tool calls with nested template",
template: `{{if .ToolCalls}}{{template "tool" .}}{{end}}`,
want: "",
ok: true,
name: "missing name field",
template: `{{ range .ToolCalls }}
{"parameters": {{ .Function.Arguments }}}{{ end }}`,
wantName: "",
wantArgs: "",
wantErr: true,
},
{
name: "tool calls with whitespace variations",
template: `{{if .ToolCalls}} tool {{end}}`,
want: " tool ",
ok: true,
name: "missing arguments field",
template: `{{ range .ToolCalls }}
{"name": "{{ .Function.Name }}"}{{ end }}`,
wantName: "",
wantArgs: "",
wantErr: true,
},
{
name: "malformed JSON",
template: `{{ range .ToolCalls }}
{"name": {{ .Function.Name }}, "arguments": {{ .Function.Arguments }}{{ end }}`,
wantName: "",
wantArgs: "",
wantErr: true,
},
}
@@ -349,12 +374,20 @@ func TestExtractToolArgs(t *testing.T) {
t.Fatalf("failed to parse template: %v", err)
}
got, ok := extractToolCallsFormat(tmpl)
if got != tt.want {
t.Errorf("TextAfterToolCalls() got = %q, want %q", got, tt.want)
gotName, gotArgs, err := extractToolArgs(tmpl)
if (err != nil) != tt.wantErr {
t.Errorf("extractToolArgs() error = %v, wantErr %v", err, tt.wantErr)
return
}
if ok != tt.ok {
t.Errorf("TextAfterToolCalls() ok = %v, want %v", ok, tt.ok)
if err != nil {
return
}
if gotName != tt.wantName {
t.Errorf("extractToolArgs() gotName = %q, want %q", gotName, tt.wantName)
}
if gotArgs != tt.wantArgs {
t.Errorf("extractToolArgs() gotArgs = %q, want %q", gotArgs, tt.wantArgs)
}
})
}