Compare commits

...

3 Commits

Author SHA1 Message Date
Michael Yang
8d97d4b0ea use fs.gguf.File to show models 2025-08-28 17:30:42 -07:00
Michael Yang
d9d980c760 lazy gguf arrays 2025-08-28 17:03:00 -07:00
Michael Yang
12e13573a8 benchmark tests 2025-08-28 17:02:59 -07:00
7 changed files with 314 additions and 134 deletions

View File

@@ -4,6 +4,8 @@ import (
"bytes" "bytes"
"math/rand/v2" "math/rand/v2"
"os" "os"
"slices"
"strconv"
"strings" "strings"
"testing" "testing"
@@ -81,3 +83,47 @@ func TestWriteGGUF(t *testing.T) {
}) })
} }
} }
func BenchmarkReadArray(b *testing.B) {
b.ReportAllocs()
create := func(tb testing.TB, kv KV) string {
tb.Helper()
f, err := os.CreateTemp(b.TempDir(), "")
if err != nil {
b.Fatal(err)
}
defer f.Close()
if err := WriteGGUF(f, kv, nil); err != nil {
b.Fatal(err)
}
return f.Name()
}
cases := map[string]any{
"int32": slices.Repeat([]int32{42}, 1_000_000),
"uint32": slices.Repeat([]uint32{42}, 1_000_000),
"float32": slices.Repeat([]float32{42.}, 1_000_000),
"string": slices.Repeat([]string{"42"}, 1_000_000),
}
for name, bb := range cases {
for _, maxArraySize := range []int{-1, 0, 1024} {
b.Run(name+"-maxArraySize="+strconv.Itoa(maxArraySize), func(b *testing.B) {
p := create(b, KV{"array": bb})
for b.Loop() {
f, err := os.Open(p)
if err != nil {
b.Fatal(err)
}
if _, err := Decode(f, maxArraySize); err != nil {
b.Fatal(err)
}
f.Close()
}
})
}
}
}

View File

@@ -35,9 +35,10 @@ type File struct {
Magic [4]byte Magic [4]byte
Version uint32 Version uint32
keyValues *lazy[KeyValue] keyValues *lazy[KeyValue]
tensors *lazy[TensorInfo] tensorInfos *lazy[TensorInfo]
offset int64 offset int64
n uint64
file *os.File file *os.File
reader *bufferedReader reader *bufferedReader
@@ -69,12 +70,12 @@ func Open(path string) (f *File, err error) {
return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version) return nil, fmt.Errorf("%w version %v", ErrUnsupported, f.Version)
} }
f.tensors, err = newLazy(f, f.readTensor) f.tensorInfos, err = newLazy(f, f.readTensor)
if err != nil { if err != nil {
return nil, err return nil, err
} }
f.tensors.successFunc = func() error { f.tensorInfos.successFunc = func() error {
offset := f.reader.offset offset := f.reader.offset
alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32) alignment := cmp.Or(f.KeyValue("general.alignment").Int(), 32)
@@ -119,12 +120,15 @@ func (f *File) readTensor() (TensorInfo, error) {
return TensorInfo{}, err return TensorInfo{}, err
} }
return TensorInfo{ tensorInfo := TensorInfo{
Name: name, Name: name,
Offset: offset, Offset: offset,
Shape: shape, Shape: shape,
Type: TensorType(type_), Type: TensorType(type_),
}, nil }
f.n += tensorInfo.NumValues()
return tensorInfo, nil
} }
func (f *File) readKeyValue() (KeyValue, error) { func (f *File) readKeyValue() (KeyValue, error) {
@@ -186,20 +190,20 @@ func read[T any](f *File) (t T, err error) {
} }
func readString(f *File) (string, error) { func readString(f *File) (string, error) {
n, err := read[uint64](f) bts := f.bts[:8]
if err != nil { if _, err := io.ReadFull(f.reader, bts); err != nil {
return "", err return "", err
} }
n := binary.LittleEndian.Uint64(bts)
if int(n) > len(f.bts) { if int(n) > len(f.bts) {
f.bts = make([]byte, n) f.bts = make([]byte, n)
} }
bts := f.bts[:n] bts = f.bts[:n]
if _, err := io.ReadFull(f.reader, bts); err != nil { if _, err := io.ReadFull(f.reader, bts); err != nil {
return "", err return "", err
} }
defer clear(bts)
return string(bts), nil return string(bts), nil
} }
@@ -245,37 +249,70 @@ func readArray(f *File) (any, error) {
} }
} }
func readArrayData[T any](f *File, n uint64) (s []T, err error) { func readArrayData[T any](f *File, n uint64) (*lazy[T], error) {
s = make([]T, n) offset := f.reader.offset
for i := range n {
e, err := read[T](f)
if err != nil {
return nil, err
}
s[i] = e var t T
if _, err := f.reader.Discard(int(n) * binary.Size(t)); err != nil {
return nil, err
} }
return s, nil sr := io.NewSectionReader(f.file, offset, int64(int(n)*binary.Size(t)))
next, stop := iter.Pull(func(yield func(T) bool) {
s := make([]T, n)
if err := binary.Read(sr, binary.LittleEndian, &s); err != nil {
return
}
for _, e := range s {
if !yield(e) {
return
}
}
})
return &lazy[T]{count: n, next: next, stop: stop}, nil
} }
func readArrayString(f *File, n uint64) (s []string, err error) { func readArrayString(f *File, n uint64) (*lazy[string], error) {
s = make([]string, n) offset := f.reader.offset
for i := range n {
e, err := readString(f) var size int64
if err != nil { for range n {
bts := f.bts[:8]
if _, err := io.ReadFull(f.reader, bts); err != nil {
return nil, err return nil, err
} }
s[i] = e n := int(binary.LittleEndian.Uint64(bts))
if _, err := f.reader.Discard(n); err != nil {
return nil, err
}
size += 8 + int64(n)
} }
return s, nil sr := io.NewSectionReader(f.file, offset, size)
next, stop := iter.Pull(func(yield func(string) bool) {
f := File{reader: newBufferedReader(sr, 16<<10), bts: make([]byte, 4096)}
for range n {
s, err := readString(&f)
if err != nil {
return
}
if !yield(s) {
return
}
}
})
return &lazy[string]{count: n, next: next, stop: stop}, nil
} }
func (f *File) Close() error { func (f *File) Close() error {
f.keyValues.stop() f.keyValues.stop()
f.tensors.stop() f.tensorInfos.stop()
return f.file.Close() return f.file.Close()
} }
@@ -308,15 +345,15 @@ func (f *File) KeyValues() iter.Seq2[int, KeyValue] {
} }
func (f *File) TensorInfo(name string) TensorInfo { func (f *File) TensorInfo(name string) TensorInfo {
if index := slices.IndexFunc(f.tensors.values, func(t TensorInfo) bool { if index := slices.IndexFunc(f.tensorInfos.values, func(t TensorInfo) bool {
return t.Name == name return t.Name == name
}); index >= 0 { }); index >= 0 {
return f.tensors.values[index] return f.tensorInfos.values[index]
} }
// fast-forward through key values if we haven't already // fast-forward through key values if we haven't already
_ = f.keyValues.rest() _ = f.keyValues.rest()
for tensor, ok := f.tensors.next(); ok; tensor, ok = f.tensors.next() { for tensor, ok := f.tensorInfos.next(); ok; tensor, ok = f.tensorInfos.next() {
if tensor.Name == name { if tensor.Name == name {
return tensor return tensor
} }
@@ -326,13 +363,13 @@ func (f *File) TensorInfo(name string) TensorInfo {
} }
func (f *File) NumTensors() int { func (f *File) NumTensors() int {
return int(f.tensors.count) return int(f.tensorInfos.count)
} }
func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] { func (f *File) TensorInfos() iter.Seq2[int, TensorInfo] {
// fast forward through key values if we haven't already // fast forward through key values if we haven't already
f.keyValues.rest() _ = f.keyValues.rest()
return f.tensors.All() return f.tensorInfos.All()
} }
func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) { func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) {
@@ -342,6 +379,11 @@ func (f *File) TensorReader(name string) (TensorInfo, io.Reader, error) {
} }
// fast forward through tensor info if we haven't already // fast forward through tensor info if we haven't already
_ = f.tensors.rest() _ = f.tensorInfos.rest()
return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), t.NumBytes()), nil return t, io.NewSectionReader(f.file, f.offset+int64(t.Offset), int64(t.NumBytes())), nil
}
func (f *File) NumValues() uint64 {
_ = f.tensorInfos.rest()
return f.n
} }

View File

@@ -3,6 +3,7 @@ package gguf_test
import ( import (
"bytes" "bytes"
"os" "os"
"slices"
"strconv" "strconv"
"strings" "strings"
"testing" "testing"
@@ -247,3 +248,43 @@ func BenchmarkRead(b *testing.B) {
f.Close() f.Close()
} }
} }
func BenchmarkReadArray(b *testing.B) {
b.ReportAllocs()
create := func(tb testing.TB, kv ggml.KV) string {
tb.Helper()
f, err := os.CreateTemp(b.TempDir(), "")
if err != nil {
b.Fatal(err)
}
defer f.Close()
if err := ggml.WriteGGUF(f, kv, nil); err != nil {
b.Fatal(err)
}
return f.Name()
}
cases := map[string]any{
"int32": slices.Repeat([]int32{42}, 1_000_000),
"uint32": slices.Repeat([]uint32{42}, 1_000_000),
"float32": slices.Repeat([]float32{42.}, 1_000_000),
"string": slices.Repeat([]string{"42"}, 1_000_000),
}
for name, bb := range cases {
b.Run(name, func(b *testing.B) {
p := create(b, ggml.KV{"array": bb})
for b.Loop() {
f, err := gguf.Open(p)
if err != nil {
b.Fatal(err)
}
_ = f.KeyValue("array")
f.Close()
}
})
}
}

View File

@@ -1,6 +1,9 @@
package gguf package gguf
import ( import (
"encoding/json"
"iter"
"log/slog"
"reflect" "reflect"
"slices" "slices"
) )
@@ -11,32 +14,15 @@ type KeyValue struct {
} }
func (kv KeyValue) Valid() bool { func (kv KeyValue) Valid() bool {
return kv.Key != "" && kv.Value.value != nil return kv.Key != "" && kv.value != nil
} }
type Value struct { type Value struct {
value any value any
} }
func value[T any](v Value, kinds ...reflect.Kind) (t T) { func (v Value) MarshalJSON() ([]byte, error) {
vv := reflect.ValueOf(v.value) return json.Marshal(v.value)
if slices.Contains(kinds, vv.Kind()) {
t = vv.Convert(reflect.TypeOf(t)).Interface().(T)
}
return
}
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
switch vv := reflect.ValueOf(v.value); vv.Kind() {
case reflect.Slice:
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
ts = make([]T, vv.Len())
for i := range vv.Len() {
ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T)
}
}
}
return
} }
// Int returns Value as a signed integer. If it is not a signed integer, it returns 0. // Int returns Value as a signed integer. If it is not a signed integer, it returns 0.
@@ -88,3 +74,44 @@ func (v Value) String() string {
func (v Value) Strings() (strings []string) { func (v Value) Strings() (strings []string) {
return values[string](v, reflect.String) return values[string](v, reflect.String)
} }
func value[T any](v Value, kinds ...reflect.Kind) (t T) {
vv := reflect.ValueOf(v.value)
if slices.Contains(kinds, vv.Kind()) {
t = vv.Convert(reflect.TypeOf(t)).Interface().(T)
}
return
}
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
switch vv := reflect.ValueOf(v.value); vv.Kind() {
case reflect.Ptr:
out := vv.MethodByName("Values").Call(nil)
if len(out) > 0 && out[0].IsValid() {
next, stop := iter.Pull(out[0].Seq())
defer stop()
ts = make([]T, vv.Elem().FieldByName("count").Uint())
for i := range ts {
t, ok := next()
if !ok {
slog.Error("error reading value", "index", i)
return nil
}
ts[i] = t.Convert(reflect.TypeOf(ts[i])).Interface().(T)
}
return ts
}
case reflect.Slice:
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
ts = make([]T, vv.Len())
for i := range vv.Len() {
ts[i] = vv.Index(i).Convert(reflect.TypeOf(ts[i])).Interface().(T)
}
}
}
return
}

View File

@@ -21,3 +21,9 @@ func (rs *bufferedReader) Read(p []byte) (n int, err error) {
rs.offset += int64(n) rs.offset += int64(n)
return n, err return n, err
} }
func (rs *bufferedReader) Discard(n int) (discarded int, err error) {
discarded, err = rs.Reader.Discard(n)
rs.offset += int64(discarded)
return discarded, err
}

View File

@@ -16,17 +16,17 @@ func (ti TensorInfo) Valid() bool {
return ti.Name != "" && ti.NumBytes() > 0 return ti.Name != "" && ti.NumBytes() > 0
} }
func (ti TensorInfo) NumValues() int64 { func (ti TensorInfo) NumValues() uint64 {
var numItems int64 = 1 var numItems uint64 = 1
for _, dim := range ti.Shape { for _, dim := range ti.Shape {
numItems *= int64(dim) numItems *= dim
} }
return numItems return numItems
} }
// NumBytes returns the number of bytes in the tensor. // NumBytes returns the number of bytes in the tensor.
func (ti TensorInfo) NumBytes() int64 { func (ti TensorInfo) NumBytes() uint64 {
return int64(float64(ti.NumValues()) * ti.Type.NumBytes()) return uint64(float64(ti.NumValues()) * ti.Type.NumBytes())
} }
func (ti TensorInfo) LogValue() slog.Value { func (ti TensorInfo) LogValue() slog.Value {
@@ -34,8 +34,8 @@ func (ti TensorInfo) LogValue() slog.Value {
slog.String("name", ti.Name), slog.String("name", ti.Name),
slog.Int64("offset", int64(ti.Offset)), slog.Int64("offset", int64(ti.Offset)),
slog.Any("shape", ti.Shape), slog.Any("shape", ti.Shape),
slog.Int64("num_values", ti.NumValues()), slog.Uint64("num_values", ti.NumValues()),
slog.Int64("num_bytes", ti.NumBytes()), slog.Uint64("num_bytes", ti.NumBytes()),
slog.Any("type", ti.Type), slog.Any("type", ti.Type),
) )
} }
@@ -97,6 +97,8 @@ const (
tensorTypeIQ4_NL_4_4 tensorTypeIQ4_NL_4_4
tensorTypeIQ4_NL_4_8 tensorTypeIQ4_NL_4_8
tensorTypeIQ4_NL_8_8 tensorTypeIQ4_NL_8_8
TensorTypeMXFP4
) )
func (tt TensorType) NumBytes() float64 { func (tt TensorType) NumBytes() float64 {
@@ -163,6 +165,8 @@ func (tt TensorType) typeSize() int64 {
return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32 return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32
case TensorTypeBF16: case TensorTypeBF16:
return 2 return 2
case 4, TensorTypeMXFP4:
return 1 + tt.blockSize() / 2
default: default:
return 0 return 0
} }
@@ -185,7 +189,8 @@ func (tt TensorType) blockSize() int64 {
TensorTypeQ5_1, TensorTypeQ5_1,
TensorTypeQ8_0, TensorTypeQ8_0,
TensorTypeQ8_1, TensorTypeQ8_1,
tensorTypeIQ4_NL: tensorTypeIQ4_NL,
4, TensorTypeMXFP4:
return 32 return 32
default: default:
return 256 return 256
@@ -195,83 +200,85 @@ func (tt TensorType) blockSize() int64 {
func (tt TensorType) String() string { func (tt TensorType) String() string {
switch tt { switch tt {
case TensorTypeF32: case TensorTypeF32:
return "f32" return "F32"
case TensorTypeF16: case TensorTypeF16:
return "f16" return "F16"
case TensorTypeQ4_0: case TensorTypeQ4_0:
return "q4_0" return "Q4_0"
case TensorTypeQ4_1: case TensorTypeQ4_1:
return "q4_1" return "Q4_1"
case tensorTypeQ4_2: // case tensorTypeQ4_2:
return "q4_2" // return "Q4_2"
case tensorTypeQ4_3: case tensorTypeQ4_3:
return "q4_3" return "Q4_3"
case TensorTypeQ5_0: case TensorTypeQ5_0:
return "q5_0" return "Q5_0"
case TensorTypeQ5_1: case TensorTypeQ5_1:
return "q5_1" return "Q5_1"
case TensorTypeQ8_0: case TensorTypeQ8_0:
return "q8_0" return "Q8_0"
case TensorTypeQ8_1: case TensorTypeQ8_1:
return "q8_1" return "Q8_1"
case TensorTypeQ2_K: case TensorTypeQ2_K:
return "q2_k" return "Q2_K"
case TensorTypeQ3_K: case TensorTypeQ3_K:
return "q3_k" return "Q3_K"
case TensorTypeQ4_K: case TensorTypeQ4_K:
return "q4_k" return "Q4_K"
case TensorTypeQ5_K: case TensorTypeQ5_K:
return "q5_k" return "Q5_K"
case TensorTypeQ6_K: case TensorTypeQ6_K:
return "q6_k" return "Q6_K"
case TensorTypeQ8_K: case TensorTypeQ8_K:
return "q8_k" return "Q8_K"
case tensorTypeIQ2_XXS: case tensorTypeIQ2_XXS:
return "iq2_xxs" return "IQ2_XXS"
case tensorTypeIQ2_XS: case tensorTypeIQ2_XS:
return "iq2_xs" return "IQ2_XS"
case tensorTypeIQ3_XXS: case tensorTypeIQ3_XXS:
return "iq3_xxs" return "IQ3_XXS"
case tensorTypeIQ1_S: case tensorTypeIQ1_S:
return "iq1_s" return "IQ1_S"
case tensorTypeIQ4_NL: case tensorTypeIQ4_NL:
return "iq4_nl" return "IQ4_NL"
case tensorTypeIQ3_S: case tensorTypeIQ3_S:
return "iq3_s" return "IQ3_S"
case tensorTypeIQ2_S: case tensorTypeIQ2_S:
return "iq2_s" return "IQ2_S"
case tensorTypeIQ4_XS: case tensorTypeIQ4_XS:
return "iq4_xs" return "IQ4_XS"
case TensorTypeI8: case TensorTypeI8:
return "i8" return "I8"
case TensorTypeI16: case TensorTypeI16:
return "i16" return "I16"
case TensorTypeI32: case TensorTypeI32:
return "i32" return "I32"
case TensorTypeI64: case TensorTypeI64:
return "i64" return "I64"
case TensorTypeF64: case TensorTypeF64:
return "f64" return "F64"
case tensorTypeIQ1_M: case tensorTypeIQ1_M:
return "iq1_m" return "IQ1_M"
case TensorTypeBF16: case TensorTypeBF16:
return "bf16" return "BF16"
case tensorTypeQ4_0_4_4: case tensorTypeQ4_0_4_4:
return "q4_0_4_4" return "Q4_0_4_4"
case tensorTypeQ4_0_4_8: case tensorTypeQ4_0_4_8:
return "q4_0_4_8" return "Q4_0_4_8"
case tensorTypeQ4_0_8_8: case tensorTypeQ4_0_8_8:
return "q4_0_8_8" return "Q4_0_8_8"
case tensorTypeTQ1_0: case tensorTypeTQ1_0:
return "tq1_0" return "TQ1_0"
case tensorTypeTQ2_0: case tensorTypeTQ2_0:
return "tq2_0" return "TQ2_0"
case tensorTypeIQ4_NL_4_4: case tensorTypeIQ4_NL_4_4:
return "iq4_nl_4_4" return "IQ4_NL_4_4"
case tensorTypeIQ4_NL_4_8: case tensorTypeIQ4_NL_4_8:
return "iq4_nl_4_8" return "IQ4_NL_4_8"
case tensorTypeIQ4_NL_8_8: case tensorTypeIQ4_NL_8_8:
return "iq4_nl_8_8" return "IQ4_NL_8_8"
case 4, TensorTypeMXFP4:
return "MXFP4"
default: default:
return "unknown" return "unknown"
} }

View File

@@ -31,7 +31,7 @@ import (
"github.com/ollama/ollama/discover" "github.com/ollama/ollama/discover"
"github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
"github.com/ollama/ollama/fs/ggml" "github.com/ollama/ollama/fs/gguf"
"github.com/ollama/ollama/harmony" "github.com/ollama/ollama/harmony"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil" "github.com/ollama/ollama/logutil"
@@ -534,11 +534,12 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return return
} }
kvData, _, err := getModelData(m.ModelPath, false) f, err := gguf.Open(m.ModelPath)
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
defer f.Close()
var count int var count int
for i, s := range input { for i, s := range input {
@@ -548,7 +549,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return return
} }
ctxLen := min(opts.NumCtx, int(kvData.ContextLength())) ctxLen := min(opts.NumCtx, int(f.KeyValue("context_length").Int()))
if len(tokens) > ctxLen { if len(tokens) > ctxLen {
if !truncate { if !truncate {
c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"}) c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
@@ -951,53 +952,63 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
fmt.Fprint(&sb, m.String()) fmt.Fprint(&sb, m.String())
resp.Modelfile = sb.String() resp.Modelfile = sb.String()
kvData, tensors, err := getModelData(m.ModelPath, req.Verbose) f, err := gguf.Open(m.ModelPath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer f.Close()
delete(kvData, "general.name") resp.ModelInfo = make(map[string]any, f.NumKeyValues())
delete(kvData, "tokenizer.chat_template") for _, keyValue := range f.KeyValues() {
resp.ModelInfo = kvData if !slices.Contains([]string{"general.name", "tokenizer.chat_template"}, keyValue.Key) {
resp.ModelInfo[keyValue.Key] = keyValue.Value
tensorData := make([]api.Tensor, len(tensors.Items())) }
for cnt, t := range tensors.Items() {
tensorData[cnt] = api.Tensor{Name: t.Name, Type: t.Type(), Shape: t.Shape}
} }
resp.Tensors = tensorData
resp.Tensors = make([]api.Tensor, f.NumTensors())
for i, tensorInfo := range f.TensorInfos() {
resp.Tensors[i] = api.Tensor{
Name: tensorInfo.Name,
Type: tensorInfo.Type.String(),
Shape: tensorInfo.Shape,
}
}
resp.ModelInfo["general.parameter_count"] = f.NumValues()
if len(m.ProjectorPaths) > 0 { if len(m.ProjectorPaths) > 0 {
projectorData, _, err := getModelData(m.ProjectorPaths[0], req.Verbose) f, err := gguf.Open(m.ProjectorPaths[0])
if err != nil { if err != nil {
return nil, err return nil, err
} }
resp.ProjectorInfo = projectorData defer f.Close()
resp.ProjectorInfo = make(map[string]any, f.NumKeyValues())
for _, keyValue := range f.KeyValues() {
resp.ProjectorInfo[keyValue.Key] = keyValue.Value
}
} }
return resp, nil return resp, nil
} }
func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) { func getModelData(digest string, verbose bool) ([]gguf.KeyValue, []gguf.TensorInfo, error) {
maxArraySize := 0 f, err := gguf.Open(digest)
if verbose {
maxArraySize = -1
}
data, err := llm.LoadModel(digest, maxArraySize)
if err != nil { if err != nil {
return nil, ggml.Tensors{}, err return nil, nil, err
}
defer f.Close()
keyValues := make([]gguf.KeyValue, f.NumKeyValues())
for i, keyValue := range f.KeyValues() {
keyValues[i] = keyValue
} }
kv := data.KV() tensorInfos := make([]gguf.TensorInfo, f.NumTensors())
for i, info := range f.TensorInfos() {
if !verbose { tensorInfos[i] = info
for k := range kv {
if t, ok := kv[k].([]any); len(t) > 5 && ok {
kv[k] = []any{}
}
}
} }
return kv, data.Tensors(), nil return keyValues, tensorInfos, nil
} }
func (s *Server) ListHandler(c *gin.Context) { func (s *Server) ListHandler(c *gin.Context) {