mirror of
https://github.com/ollama/ollama.git
synced 2026-01-22 22:40:07 -05:00
Compare commits
17 Commits
royh-param
...
v0.1.46
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb42e607c5 | ||
|
|
2aa91a937b | ||
|
|
ccef9431c8 | ||
|
|
9a9e7d83c4 | ||
|
|
189a43caa2 | ||
|
|
e835ef1836 | ||
|
|
7e7749224c | ||
|
|
c7c2f3bc22 | ||
|
|
54a79d6a8a | ||
|
|
5bf5aeec01 | ||
|
|
e01e535cbb | ||
|
|
0195d6a2f8 | ||
|
|
8e0641a9bf | ||
|
|
662568d453 | ||
|
|
4ebb66c662 | ||
|
|
23e899f32d | ||
|
|
1a1c99e334 |
@@ -182,6 +182,12 @@ $ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||
```
|
||||
|
||||
### Show model information
|
||||
|
||||
```
|
||||
ollama show llama3
|
||||
```
|
||||
|
||||
### List models on your computer
|
||||
|
||||
```
|
||||
|
||||
13
api/types.go
13
api/types.go
@@ -608,6 +608,19 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
|
||||
} else {
|
||||
field := valueOpts.FieldByName(opt.Name)
|
||||
if field.IsValid() && field.CanSet() {
|
||||
if reflect.PointerTo(field.Type()) == reflect.TypeOf((*TriState)(nil)) {
|
||||
boolVal, err := strconv.ParseBool(vals[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bool value %s", vals)
|
||||
}
|
||||
if boolVal {
|
||||
out[key] = TriStateTrue
|
||||
} else {
|
||||
out[key] = TriStateFalse
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch field.Kind() {
|
||||
case reflect.Float32:
|
||||
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
||||
|
||||
@@ -2,6 +2,7 @@ package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -141,3 +142,65 @@ func TestUseMmapParsingFromJSON(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUseMmapFormatParams(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req map[string][]string
|
||||
exp TriState
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "True",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"true"},
|
||||
},
|
||||
exp: TriStateTrue,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "False",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"false"},
|
||||
},
|
||||
exp: TriStateFalse,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "Numeric True",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"1"},
|
||||
},
|
||||
exp: TriStateTrue,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "Numeric False",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"0"},
|
||||
},
|
||||
exp: TriStateFalse,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "invalid string",
|
||||
req: map[string][]string{
|
||||
"use_mmap": []string{"foo"},
|
||||
},
|
||||
exp: TriStateUndefined,
|
||||
err: fmt.Errorf("invalid bool value [foo]"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
resp, err := FormatParams(test.req)
|
||||
require.Equal(t, err, test.err)
|
||||
respVal, ok := resp["use_mmap"]
|
||||
if test.exp != TriStateUndefined {
|
||||
assert.True(t, ok, "resp: %v", resp)
|
||||
assert.Equal(t, test.exp, respVal)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
65
cmd/cmd.go
65
cmd/cmd.go
@@ -287,38 +287,12 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
||||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := args[0]
|
||||
|
||||
// check if the model exists on the server
|
||||
show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
var statusError api.StatusError
|
||||
switch {
|
||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
interactive := true
|
||||
|
||||
opts := runOptions{
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
MultiModal: slices.Contains(show.Details.Families, "clip"),
|
||||
ParentModel: show.Details.ParentModel,
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
}
|
||||
|
||||
format, err := cmd.Flags().GetString("format")
|
||||
@@ -362,11 +336,38 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
opts.WordWrap = !nowrap
|
||||
|
||||
if !interactive {
|
||||
return generate(cmd, opts)
|
||||
// Fill out the rest of the options based on information about the
|
||||
// model.
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return generateInteractive(cmd, opts)
|
||||
name := args[0]
|
||||
info, err := func() (*api.ShowResponse, error) {
|
||||
showReq := &api.ShowRequest{Name: name}
|
||||
info, err := client.Show(cmd.Context(), showReq)
|
||||
var se api.StatusError
|
||||
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
}
|
||||
return info, err
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||
opts.ParentModel = info.Details.ParentModel
|
||||
opts.Messages = append(opts.Messages, info.Messages...)
|
||||
|
||||
if interactive {
|
||||
return generateInteractive(cmd, opts)
|
||||
}
|
||||
return generate(cmd, opts)
|
||||
}
|
||||
|
||||
func errFromUnknownKey(unknownKeyErr error) error {
|
||||
|
||||
@@ -31,65 +31,40 @@ const (
|
||||
)
|
||||
|
||||
func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.StopAndClear()
|
||||
|
||||
spinner := progress.NewSpinner("")
|
||||
p.Add("", spinner)
|
||||
|
||||
showReq := api.ShowRequest{Name: opts.Model}
|
||||
showResp, err := client.Show(cmd.Context(), &showReq)
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
|
||||
opts.ParentModel = showResp.Details.ParentModel
|
||||
|
||||
if len(showResp.Messages) > 0 {
|
||||
opts.Messages = append(opts.Messages, showResp.Messages...)
|
||||
}
|
||||
|
||||
chatReq := &api.ChatRequest{
|
||||
Model: opts.Model,
|
||||
Messages: []api.Message{},
|
||||
Model: opts.Model,
|
||||
KeepAlive: opts.KeepAlive,
|
||||
}
|
||||
|
||||
if opts.KeepAlive != nil {
|
||||
chatReq.KeepAlive = opts.KeepAlive
|
||||
}
|
||||
|
||||
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||
return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||
p.StopAndClear()
|
||||
if len(opts.Messages) > 0 {
|
||||
for _, msg := range opts.Messages {
|
||||
switch msg.Role {
|
||||
case "user":
|
||||
fmt.Printf(">>> %s\n", msg.Content)
|
||||
case "assistant":
|
||||
state := &displayResponseState{}
|
||||
displayResponse(msg.Content, opts.WordWrap, state)
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
for _, msg := range opts.Messages {
|
||||
switch msg.Role {
|
||||
case "user":
|
||||
fmt.Printf(">>> %s\n", msg.Content)
|
||||
case "assistant":
|
||||
state := &displayResponseState{}
|
||||
displayResponse(msg.Content, opts.WordWrap, state)
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
opts.Messages = make([]api.Message, 0)
|
||||
|
||||
err := loadModel(cmd, &opts)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -77,20 +77,27 @@ func cleanupTmpDirs() {
|
||||
continue
|
||||
}
|
||||
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid"))
|
||||
if err == nil {
|
||||
pid, err := strconv.Atoi(string(raw))
|
||||
if err == nil {
|
||||
if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
// Another running ollama, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog.Debug("failed to open ollama.pid", "path", d, "error", err)
|
||||
}
|
||||
err = os.RemoveAll(d)
|
||||
if err != nil {
|
||||
slog.Debug("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
|
||||
// No pid, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(raw))
|
||||
if err != nil {
|
||||
slog.Warn("failed to parse pid", "path", d, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
|
||||
slog.Warn("found running ollama", "pid", pid, "path", d)
|
||||
// Another running ollama, ignore this tmpdir
|
||||
continue
|
||||
}
|
||||
|
||||
if err := os.Remove(d); err != nil {
|
||||
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
13
llm/ggla.go
13
llm/ggla.go
@@ -53,7 +53,7 @@ func (llm *ggla) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
|
||||
var r uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||
return err
|
||||
@@ -69,9 +69,18 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
for {
|
||||
var dims uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if errors.Is(retErr, io.EOF) {
|
||||
retErr = io.ErrUnexpectedEOF
|
||||
}
|
||||
}()
|
||||
|
||||
var namesize uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
|
||||
return err
|
||||
@@ -108,7 +117,7 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := rs.Seek((offset+31)&-32, io.SeekStart); err != nil {
|
||||
if _, err := rs.Seek((offset+31)&-32-offset, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
65
llm/ggml.go
65
llm/ggml.go
@@ -6,6 +6,8 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/util/bufioutil"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
@@ -69,6 +71,30 @@ func (kv KV) HeadCountKV() uint64 {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCount() uint64 {
|
||||
if heads := kv.HeadCount(); heads > 0 {
|
||||
return kv.EmbeddingLength() / kv.HeadCount()
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountK() uint64 {
|
||||
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
|
||||
return k
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
}
|
||||
|
||||
func (kv KV) EmbeddingHeadCountV() uint64 {
|
||||
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
|
||||
return v
|
||||
}
|
||||
|
||||
return kv.EmbeddingHeadCount()
|
||||
}
|
||||
|
||||
func (kv KV) GQA() uint64 {
|
||||
return kv.HeadCount() / kv.HeadCountKV()
|
||||
}
|
||||
@@ -254,7 +280,18 @@ func DetectGGMLType(b []byte) string {
|
||||
}
|
||||
}
|
||||
|
||||
func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
||||
// DecodeGGML decodes a GGML model from the given reader.
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||
// the maxArraySize is negative, all arrays are collected.
|
||||
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
if maxArraySize == 0 {
|
||||
maxArraySize = 1024
|
||||
}
|
||||
|
||||
rs = bufioutil.NewBufferedSeeker(rs, 32<<10)
|
||||
|
||||
var magic uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||
return nil, 0, err
|
||||
@@ -267,17 +304,15 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, int64, error) {
|
||||
case FILE_MAGIC_GGLA:
|
||||
c = &containerGGLA{}
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{ByteOrder: binary.LittleEndian}
|
||||
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
c = &containerGGUF{ByteOrder: binary.BigEndian}
|
||||
c = &containerGGUF{ByteOrder: binary.BigEndian, maxArraySize: maxArraySize}
|
||||
default:
|
||||
return nil, 0, errors.New("invalid file magic")
|
||||
}
|
||||
|
||||
model, err := c.Decode(rs)
|
||||
if errors.Is(err, io.EOF) {
|
||||
// noop
|
||||
} else if err != nil {
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
@@ -297,7 +332,10 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
embedding := llm.KV().EmbeddingLength()
|
||||
heads := llm.KV().HeadCount()
|
||||
headsKV := llm.KV().HeadCountKV()
|
||||
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
||||
vocab := uint64(llm.KV()["tokenizer.ggml.tokens"].(*array).size)
|
||||
|
||||
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
||||
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
||||
|
||||
layers := llm.Tensors().Layers()
|
||||
|
||||
@@ -308,7 +346,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
partialOffload = 4 * batch * embedding
|
||||
partialOffload += max(
|
||||
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
|
||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embedding/heads*headsKV),
|
||||
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV),
|
||||
4*batch*(embedding+vocab)+embedding*vocab*105/128,
|
||||
)
|
||||
|
||||
@@ -316,15 +354,15 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
// mixtral 8x22b
|
||||
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
|
||||
partialOffload = max(
|
||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
|
||||
4*(context*batch*heads+context*embedding/heads*headsKV+batch*1024+embedding/heads*headsKV*batch),
|
||||
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV),
|
||||
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch),
|
||||
)
|
||||
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
|
||||
// mixtral 8x7b
|
||||
ffnGateWeight1 := ffnGateWeight.Shape[1]
|
||||
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
|
||||
partialOffload = max(
|
||||
4*batch*(3+embedding/heads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
|
||||
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
|
||||
)
|
||||
}
|
||||
@@ -368,15 +406,14 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
||||
fullOffload,
|
||||
)
|
||||
case "deepseek2":
|
||||
keys := uint64(llm.KV()["deepseek2.attention.key_length"].(uint32))
|
||||
fullOffload = max(
|
||||
4*batch*(3*embedding+vocab),
|
||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*keys*headsKV),
|
||||
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV),
|
||||
)
|
||||
|
||||
partialOffload = max(
|
||||
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
|
||||
4*batch*(2*embedding+1+2*keys*headsKV+context+context*headsKV)+4*keys*context*headsKV+embedding*keys*headsKV*9/16,
|
||||
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
1
llm/ggml_test.go
Normal file
1
llm/ggml_test.go
Normal file
@@ -0,0 +1 @@
|
||||
package llm
|
||||
130
llm/gguf.go
130
llm/gguf.go
@@ -3,11 +3,10 @@ package llm
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@@ -29,6 +28,12 @@ type containerGGUF struct {
|
||||
NumTensor uint64
|
||||
NumKV uint64
|
||||
}
|
||||
|
||||
maxArraySize int
|
||||
}
|
||||
|
||||
func (c *containerGGUF) canCollectArray(size int) bool {
|
||||
return c.maxArraySize < 0 || size <= c.maxArraySize
|
||||
}
|
||||
|
||||
func (c *containerGGUF) Name() string {
|
||||
@@ -54,7 +59,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
||||
}
|
||||
|
||||
model := newGGUF(c)
|
||||
slog.Debug(fmt.Sprintf("model = %#v", model))
|
||||
if err := model.Decode(rs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -85,6 +89,8 @@ type gguf struct {
|
||||
tensors []*Tensor
|
||||
|
||||
parameters uint64
|
||||
|
||||
scratch [16 << 10]byte
|
||||
}
|
||||
|
||||
func newGGUF(container *containerGGUF) *gguf {
|
||||
@@ -181,34 +187,34 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
}
|
||||
|
||||
// decode tensors
|
||||
for i := 0; uint64(i) < llm.numTensor(); i++ {
|
||||
for range llm.numTensor() {
|
||||
name, err := readGGUFString(llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to read tensor name: %w", err)
|
||||
}
|
||||
|
||||
// dims is the number of dimensions in the tensor
|
||||
dims, err := readGGUF[uint32](llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to read tensor dimensions: %w", err)
|
||||
}
|
||||
|
||||
shape := [4]uint64{1, 1, 1, 1}
|
||||
for i := 0; uint32(i) < dims; i++ {
|
||||
shape[i], err = readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to read tensor shape: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
kind, err := readGGUF[uint32](llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to read tensor kind: %w", err)
|
||||
}
|
||||
|
||||
offset, err := readGGUF[uint64](llm, rs)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to read tensor offset: %w", err)
|
||||
}
|
||||
|
||||
tensor := Tensor{
|
||||
@@ -230,24 +236,19 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
alignment = 32
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
padding := llm.padding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
|
||||
return err
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get current offset: %w", err)
|
||||
}
|
||||
|
||||
padding := llm.padding(int64(tensor.Size()), int64(alignment))
|
||||
padding := llm.padding(offset, int64(alignment))
|
||||
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to seek to init padding: %w", err)
|
||||
}
|
||||
|
||||
if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
|
||||
return fmt.Errorf("failed to seek to tensor: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,22 +286,48 @@ func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func discardGGUFString(llm *gguf, r io.Reader) error {
|
||||
buf := llm.scratch[:8]
|
||||
_, err := io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
size := int(llm.ByteOrder.Uint64(buf))
|
||||
for size > 0 {
|
||||
n, err := r.Read(llm.scratch[:min(size, cap(llm.scratch))])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
size -= n
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readGGUFString(llm *gguf, r io.Reader) (string, error) {
|
||||
if llm.Version == 1 {
|
||||
return readGGUFV1String(llm, r)
|
||||
}
|
||||
|
||||
var length uint64
|
||||
if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
|
||||
buf := llm.scratch[:8]
|
||||
_, err := io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := io.CopyN(&b, r, int64(length)); err != nil {
|
||||
length := int(llm.ByteOrder.Uint64(buf))
|
||||
if length > len(llm.scratch) {
|
||||
buf = make([]byte, length)
|
||||
} else {
|
||||
buf = llm.scratch[:length]
|
||||
}
|
||||
clear(buf)
|
||||
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return b.String(), nil
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
||||
@@ -316,7 +343,16 @@ func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
type array struct {
|
||||
size int
|
||||
values []any
|
||||
}
|
||||
|
||||
func (a *array) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(a.values)
|
||||
}
|
||||
|
||||
func readGGUFV1Array(llm *gguf, r io.Reader) (*array, error) {
|
||||
t, err := readGGUF[uint32](llm, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -327,7 +363,12 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := 0; uint32(i) < n; i++ {
|
||||
a := &array{size: int(n)}
|
||||
if llm.canCollectArray(int(n)) {
|
||||
a.values = make([]any, 0, int(n))
|
||||
}
|
||||
|
||||
for i := range n {
|
||||
var e any
|
||||
switch t {
|
||||
case ggufTypeUint8:
|
||||
@@ -361,13 +402,15 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a = append(a, e)
|
||||
if a.values != nil {
|
||||
a.values[i] = e
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||
if llm.Version == 1 {
|
||||
return readGGUFV1Array(llm, r)
|
||||
}
|
||||
@@ -382,7 +425,12 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := 0; uint64(i) < n; i++ {
|
||||
a := &array{size: int(n)}
|
||||
if llm.canCollectArray(int(n)) {
|
||||
a.values = make([]any, int(n))
|
||||
}
|
||||
|
||||
for i := range n {
|
||||
var e any
|
||||
switch t {
|
||||
case ggufTypeUint8:
|
||||
@@ -408,7 +456,11 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
case ggufTypeBool:
|
||||
e, err = readGGUF[bool](llm, r)
|
||||
case ggufTypeString:
|
||||
e, err = readGGUFString(llm, r)
|
||||
if a.values != nil {
|
||||
e, err = readGGUFString(llm, r)
|
||||
} else {
|
||||
err = discardGGUFString(llm, r)
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid array type: %d", t)
|
||||
}
|
||||
@@ -416,10 +468,12 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
a = append(a, e)
|
||||
if a.values != nil {
|
||||
a.values[i] = e
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
||||
|
||||
@@ -115,8 +115,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
slog.Warn("model missing blk.0 layer size")
|
||||
}
|
||||
|
||||
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
|
||||
var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
|
||||
// fp16 k,v = sizeof(float16) * n_ctx * n_layer * (n_embd_head_k + n_embd_head_v) * n_head_kv
|
||||
var kv uint64 = 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * (ggml.KV().EmbeddingHeadCountK() + ggml.KV().EmbeddingHeadCountV()) * ggml.KV().HeadCountKV()
|
||||
|
||||
// KV is proportional to the number of layers
|
||||
layerSize += kv / ggml.KV().BlockCount()
|
||||
|
||||
@@ -22,13 +22,14 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
defer f.Close()
|
||||
gguf := NewGGUFV3(binary.LittleEndian)
|
||||
inputLayerCount := 5
|
||||
|
||||
tensors := []Tensor{
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.3.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.4.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "blk.3.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "blk.4.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
}
|
||||
assert.Len(t, tensors, inputLayerCount+1)
|
||||
err = gguf.Encode(f, KV{
|
||||
@@ -45,8 +46,10 @@ func TestEstimateGPULayers(t *testing.T) {
|
||||
}, tensors)
|
||||
require.NoError(t, err)
|
||||
|
||||
ggml, err := LoadModel(f.Name())
|
||||
require.NoError(t, err)
|
||||
ggml, err := LoadModel(f.Name(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Simple CPU scenario
|
||||
gpus := []gpu.GpuInfo{
|
||||
|
||||
@@ -60,7 +60,12 @@ type llmServer struct {
|
||||
sem *semaphore.Weighted
|
||||
}
|
||||
|
||||
func LoadModel(model string) (*GGML, error) {
|
||||
// LoadModel will load a model from disk. The model must be in the GGML format.
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
|
||||
// the maxArraySize is negative, all arrays are collected.
|
||||
func LoadModel(model string, maxArraySize int) (*GGML, error) {
|
||||
if _, err := os.Stat(model); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -71,7 +76,7 @@ func LoadModel(model string) (*GGML, error) {
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ggml, _, err := DecodeGGML(f)
|
||||
ggml, _, err := DecodeGGML(f, maxArraySize)
|
||||
return ggml, err
|
||||
}
|
||||
|
||||
@@ -81,7 +86,17 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
var err error
|
||||
var cpuRunner string
|
||||
var estimate MemoryEstimate
|
||||
var systemMemory uint64
|
||||
var systemTotalMemory uint64
|
||||
var systemFreeMemory uint64
|
||||
|
||||
systemMemInfo, err := gpu.GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup system memory", "error", err)
|
||||
} else {
|
||||
systemTotalMemory = systemMemInfo.TotalMemory
|
||||
systemFreeMemory = systemMemInfo.FreeMemory
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", systemFreeMemory)
|
||||
}
|
||||
|
||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||
if opts.NumGPU == 0 {
|
||||
@@ -91,19 +106,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
cpuRunner = serverForCpu()
|
||||
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||
} else {
|
||||
if gpus[0].Library == "metal" {
|
||||
memInfo, err := gpu.GetCPUMem()
|
||||
if err != nil {
|
||||
slog.Error("failed to lookup system memory", "error", err)
|
||||
} else {
|
||||
systemMemory = memInfo.TotalMemory
|
||||
slog.Debug("system memory", "total", format.HumanBytes2(systemMemory))
|
||||
}
|
||||
}
|
||||
estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||
|
||||
switch {
|
||||
case gpus[0].Library == "metal" && estimate.VRAMSize > systemMemory:
|
||||
case gpus[0].Library == "metal" && estimate.VRAMSize > systemTotalMemory:
|
||||
// disable partial offloading when model is greater than total system memory as this
|
||||
// can lead to locking up the system
|
||||
opts.NumGPU = 0
|
||||
@@ -211,7 +217,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
|
||||
// Windows CUDA should not use mmap for best performance
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda") || opts.UseMMap == api.TriStateFalse {
|
||||
// Linux with a model larger than free space, mmap leads to thrashing
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == api.TriStateUndefined) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == api.TriStateUndefined) ||
|
||||
opts.UseMMap == api.TriStateFalse {
|
||||
params = append(params, "--no-mmap")
|
||||
}
|
||||
|
||||
@@ -408,7 +417,7 @@ func projectorMemoryRequirements(filename string) uint64 {
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
ggml, _, err := DecodeGGML(file)
|
||||
ggml, _, err := DecodeGGML(file, 0)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -279,7 +279,7 @@ if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\
|
||||
case $OS_NAME in
|
||||
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
|
||||
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||
fedora) [ $OS_VERSION -lt '37' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '37';;
|
||||
fedora) [ $OS_VERSION -lt '39' ] && install_cuda_driver_yum $OS_NAME $OS_VERSION || install_cuda_driver_yum $OS_NAME '39';;
|
||||
amzn) install_cuda_driver_yum 'fedora' '37' ;;
|
||||
debian) install_cuda_driver_apt $OS_NAME $OS_VERSION ;;
|
||||
ubuntu) install_cuda_driver_apt $OS_NAME $(echo $OS_VERSION | sed 's/\.//') ;;
|
||||
|
||||
@@ -414,17 +414,22 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
||||
return err
|
||||
}
|
||||
|
||||
layers, err := parseFromFile(ctx, temp, "", fn)
|
||||
layer, err := NewLayer(temp, baseLayer.MediaType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(layers) != 1 {
|
||||
return errors.New("quantization failed")
|
||||
if _, err := temp.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseLayer.Layer = layers[0].Layer
|
||||
baseLayer.GGML = layers[0].GGML
|
||||
ggml, _, err := llm.DecodeGGML(temp, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
baseLayer.Layer = layer
|
||||
baseLayer.GGML = ggml
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
||||
}
|
||||
defer blob.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(blob)
|
||||
ggml, _, err := llm.DecodeGGML(blob, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -176,7 +176,7 @@ func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(a
|
||||
}
|
||||
defer bin.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(bin)
|
||||
ggml, _, err := llm.DecodeGGML(bin, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -210,7 +210,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
|
||||
|
||||
var offset int64
|
||||
for offset < stat.Size() {
|
||||
ggml, n, err := llm.DecodeGGML(file)
|
||||
ggml, n, err := llm.DecodeGGML(file, 0)
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else if err != nil {
|
||||
|
||||
@@ -754,7 +754,11 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
}
|
||||
|
||||
func getKVData(digest string, verbose bool) (llm.KV, error) {
|
||||
kvData, err := llm.LoadModel(digest)
|
||||
maxArraySize := 0
|
||||
if verbose {
|
||||
maxArraySize = -1
|
||||
}
|
||||
kvData, err := llm.LoadModel(digest, maxArraySize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -1101,11 +1105,20 @@ func Serve(ln net.Listener) error {
|
||||
schedCtx, schedDone := context.WithCancel(ctx)
|
||||
sched := InitScheduler(schedCtx)
|
||||
s := &Server{addr: ln.Addr(), sched: sched}
|
||||
r := s.GenerateRoutes()
|
||||
|
||||
http.Handle("/", s.GenerateRoutes())
|
||||
|
||||
slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
|
||||
srvr := &http.Server{
|
||||
Handler: r,
|
||||
// Use http.DefaultServeMux so we get net/http/pprof for
|
||||
// free.
|
||||
//
|
||||
// TODO(bmizerany): Decide if we want to make this
|
||||
// configurable so it is not exposed by default, or allow
|
||||
// users to bind it to a different port. This was a quick
|
||||
// and easy way to get pprof, but it may not be the best
|
||||
// way.
|
||||
Handler: nil,
|
||||
}
|
||||
|
||||
// listen for a ctrl+c and stop any loaded llm
|
||||
|
||||
@@ -144,7 +144,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
}
|
||||
|
||||
// Load model for fitting
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath)
|
||||
ggml, err := llm.LoadModel(pending.model.ModelPath, 0)
|
||||
if err != nil {
|
||||
pending.errCh <- err
|
||||
break
|
||||
|
||||
@@ -128,14 +128,14 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
|
||||
"tokenizer.ggml.scores": []float32{0},
|
||||
"tokenizer.ggml.token_type": []int32{0},
|
||||
}, []llm.Tensor{
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
|
||||
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
fname := f.Name()
|
||||
model := &Model{Name: modelName, ModelPath: fname}
|
||||
scenario.ggml, err = llm.LoadModel(model.ModelPath)
|
||||
scenario.ggml, err = llm.LoadModel(model.ModelPath, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
scenario.req = &LlmRequest{
|
||||
|
||||
34
util/bufioutil/buffer_seeker.go
Normal file
34
util/bufioutil/buffer_seeker.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package bufioutil
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
)
|
||||
|
||||
type BufferedSeeker struct {
|
||||
rs io.ReadSeeker
|
||||
br *bufio.Reader
|
||||
}
|
||||
|
||||
func NewBufferedSeeker(rs io.ReadSeeker, size int) *BufferedSeeker {
|
||||
return &BufferedSeeker{
|
||||
rs: rs,
|
||||
br: bufio.NewReaderSize(rs, size),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BufferedSeeker) Read(p []byte) (int, error) {
|
||||
return b.br.Read(p)
|
||||
}
|
||||
|
||||
func (b *BufferedSeeker) Seek(offset int64, whence int) (int64, error) {
|
||||
if whence == io.SeekCurrent {
|
||||
offset -= int64(b.br.Buffered())
|
||||
}
|
||||
n, err := b.rs.Seek(offset, whence)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b.br.Reset(b.rs)
|
||||
return n, nil
|
||||
}
|
||||
64
util/bufioutil/buffer_seeker_test.go
Normal file
64
util/bufioutil/buffer_seeker_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package bufioutil
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBufferedSeeker(t *testing.T) {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
bs := NewBufferedSeeker(strings.NewReader(alphabet), 0) // minReadBufferSize = 16
|
||||
|
||||
checkRead := func(buf []byte, expected string) {
|
||||
t.Helper()
|
||||
_, err := bs.Read(buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !bytes.Equal(buf, []byte(expected)) {
|
||||
t.Fatalf("expected %s, got %s", expected, buf)
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first 5 bytes
|
||||
buf := make([]byte, 5)
|
||||
|
||||
checkRead(buf, "abcde")
|
||||
|
||||
// Seek back to the beginning
|
||||
_, err := bs.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// read 'a'
|
||||
checkRead(buf[:1], "a")
|
||||
|
||||
if bs.br.Buffered() == 0 {
|
||||
t.Fatalf("totally unexpected sanity check failed")
|
||||
}
|
||||
|
||||
// Seek past 'b'
|
||||
_, err = bs.Seek(1, io.SeekCurrent)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "cdefg")
|
||||
|
||||
// Seek back to the beginning
|
||||
_, err = bs.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "abcde")
|
||||
|
||||
// Seek to the end
|
||||
_, err = bs.Seek(-5, io.SeekEnd)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
checkRead(buf, "vwxyz")
|
||||
}
|
||||
Reference in New Issue
Block a user