mirror of
https://github.com/ollama/ollama.git
synced 2026-02-27 04:27:01 -05:00
Compare commits
1 Commits
main
...
pdevine/sa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
857cffd22a |
14
api/types.go
14
api/types.go
@@ -15,7 +15,6 @@ import (
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/internal/orderedmap"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
@@ -570,7 +569,6 @@ type DebugInfo struct {
|
||||
|
||||
type Metrics struct {
|
||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
||||
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||
@@ -936,10 +934,6 @@ func (m *Metrics) Summary() {
|
||||
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
|
||||
}
|
||||
|
||||
if m.PeakMemory > 0 {
|
||||
fmt.Fprintf(os.Stderr, "peak memory: %s\n", formatPeakMemory(m.PeakMemory))
|
||||
}
|
||||
|
||||
if m.LoadDuration > 0 {
|
||||
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
|
||||
}
|
||||
@@ -963,14 +957,6 @@ func (m *Metrics) Summary() {
|
||||
}
|
||||
}
|
||||
|
||||
func formatPeakMemory(b uint64) string {
|
||||
if b >= format.GibiByte {
|
||||
return fmt.Sprintf("%.3f GiB", float64(b)/float64(format.GibiByte))
|
||||
}
|
||||
|
||||
return format.HumanBytes2(b)
|
||||
}
|
||||
|
||||
func (opts *Options) FromMap(m map[string]any) error {
|
||||
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
||||
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
|
||||
|
||||
@@ -1518,7 +1518,6 @@ type CompletionResponse struct {
|
||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
EvalDuration time.Duration `json:"eval_duration"`
|
||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
||||
|
||||
// Logprobs contains log probability information if requested
|
||||
Logprobs []Logprob `json:"logprobs,omitempty"`
|
||||
|
||||
@@ -32,10 +32,9 @@ const (
|
||||
)
|
||||
|
||||
type GLM46Parser struct {
|
||||
state glm46ParserState
|
||||
buffer strings.Builder
|
||||
tools []api.Tool
|
||||
callIndex int
|
||||
state glm46ParserState
|
||||
buffer strings.Builder
|
||||
tools []api.Tool
|
||||
}
|
||||
|
||||
func (p *GLM46Parser) HasToolSupport() bool {
|
||||
@@ -49,7 +48,6 @@ func (p *GLM46Parser) HasThinkingSupport() bool {
|
||||
// func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||
p.tools = tools
|
||||
p.callIndex = 0
|
||||
return tools
|
||||
}
|
||||
|
||||
@@ -91,8 +89,6 @@ func (p *GLM46Parser) Add(s string, done bool) (content string, thinking string,
|
||||
slog.Warn("glm-4.6 tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCall.Function.Index = p.callIndex
|
||||
p.callIndex++
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case glm46EventThinkingContent:
|
||||
thinkingSb.WriteString(event.content)
|
||||
|
||||
@@ -11,7 +11,6 @@ type GLM47Parser struct {
|
||||
|
||||
func (p *GLM47Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||
p.tools = tools
|
||||
p.callIndex = 0
|
||||
// When thinking is enabled (nil or true), the prompt ends with <think>,
|
||||
// so model output starts directly with thinking content (no opening tag).
|
||||
if thinkValue == nil || thinkValue.Bool() {
|
||||
|
||||
@@ -97,91 +97,3 @@ func TestGLM47ParserToolCallEscaping(t *testing.T) {
|
||||
t.Fatalf("expected %#v, got %#v", expected, toolCall)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGLM47ParserToolCallIndexing(t *testing.T) {
|
||||
parser := GLM47Parser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
input := `plan</think>
|
||||
<tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call>
|
||||
<tool_call>second<arg_key>b</arg_key><arg_value>2</arg_value></tool_call>
|
||||
<tool_call>third<arg_key>c</arg_key><arg_value>3</arg_value></tool_call>`
|
||||
|
||||
_, _, calls, err := parser.Add(input, true)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
||||
}
|
||||
if len(calls) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(calls[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGLM47ParserToolCallIndexingStreaming(t *testing.T) {
|
||||
parser := GLM47Parser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
var all []api.ToolCall
|
||||
|
||||
_, _, calls, err := parser.Add("plan</think><tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call><tool_call>second<arg_key>b</arg_key>", false)
|
||||
if err != nil {
|
||||
t.Fatalf("step 1 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
_, _, calls, err = parser.Add("<arg_value>2</arg_value></tool_call><tool_call>third<arg_key>c</arg_key><arg_value>3</arg_value></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("step 2 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
||||
}
|
||||
if len(all) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(all[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGLM47ParserToolCallIndexResetOnInit(t *testing.T) {
|
||||
parser := GLM47Parser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
_, _, _, err := parser.Add("plan</think><tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("first parse failed: %v", err)
|
||||
}
|
||||
|
||||
parser.Init(nil, nil, nil)
|
||||
_, _, calls, err := parser.Add("plan</think><tool_call>second<arg_key>b</arg_key><arg_value>2</arg_value></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("second parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 0},
|
||||
}
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
||||
}
|
||||
if !toolCallEqual(calls[0], want) {
|
||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ type Qwen3Parser struct {
|
||||
state qwen3ParserState
|
||||
buffer strings.Builder
|
||||
tools []api.Tool
|
||||
callIndex int
|
||||
hasThinkingSupport bool
|
||||
defaultThinking bool
|
||||
maybeThinkingOpenAtBOL bool
|
||||
@@ -55,7 +54,6 @@ func (p *Qwen3Parser) HasThinkingSupport() bool {
|
||||
func (p *Qwen3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||
p.tools = tools
|
||||
p.buffer.Reset()
|
||||
p.callIndex = 0
|
||||
|
||||
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
|
||||
if thinkValue == nil {
|
||||
@@ -108,8 +106,6 @@ func (p *Qwen3Parser) Add(s string, done bool) (content string, thinking string,
|
||||
slog.Warn("qwen3 tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCall.Function.Index = p.callIndex
|
||||
p.callIndex++
|
||||
calls = append(calls, toolCall)
|
||||
case qwen3EventThinkingContent:
|
||||
thinkingSb.WriteString(event.content)
|
||||
|
||||
@@ -230,89 +230,3 @@ func TestQwen35ParserRespectsNoThink(t *testing.T) {
|
||||
t.Fatalf("expected no tool calls, got %d", len(calls))
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3ParserToolCallIndexing(t *testing.T) {
|
||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
||||
|
||||
input := `<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call>
|
||||
<tool_call>{"name":"second","arguments":{"b":"2"}}</tool_call>
|
||||
<tool_call>{"name":"third","arguments":{"c":"3"}}</tool_call>`
|
||||
_, _, calls, err := parser.Add(input, true)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
||||
}
|
||||
if len(calls) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(calls[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3ParserToolCallIndexingStreaming(t *testing.T) {
|
||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
||||
|
||||
var all []api.ToolCall
|
||||
|
||||
_, _, calls, err := parser.Add(`<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call><tool_call>{"name":"second","arguments":{"b":"2"}`, false)
|
||||
if err != nil {
|
||||
t.Fatalf("step 1 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
_, _, calls, err = parser.Add(`}</tool_call><tool_call>{"name":"third","arguments":{"c":"3"}}</tool_call>`, true)
|
||||
if err != nil {
|
||||
t.Fatalf("step 2 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
||||
}
|
||||
if len(all) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(all[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3ParserToolCallIndexResetOnInit(t *testing.T) {
|
||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
||||
|
||||
_, _, _, err := parser.Add(`<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call>`, true)
|
||||
if err != nil {
|
||||
t.Fatalf("first parse failed: %v", err)
|
||||
}
|
||||
|
||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
||||
_, _, calls, err := parser.Add(`<tool_call>{"name":"second","arguments":{"b":"2"}}</tool_call>`, true)
|
||||
if err != nil {
|
||||
t.Fatalf("second parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 0},
|
||||
}
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
||||
}
|
||||
if !toolCallEqual(calls[0], want) {
|
||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,10 +29,9 @@ const (
|
||||
)
|
||||
|
||||
type Qwen3CoderParser struct {
|
||||
state qwenParserState
|
||||
acc strings.Builder
|
||||
tools []api.Tool
|
||||
callIndex int
|
||||
state qwenParserState
|
||||
acc strings.Builder
|
||||
tools []api.Tool
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) HasToolSupport() bool {
|
||||
@@ -45,7 +44,6 @@ func (p *Qwen3CoderParser) HasThinkingSupport() bool {
|
||||
|
||||
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||
p.tools = tools
|
||||
p.callIndex = 0
|
||||
return tools // Qwen doesn't modify tools
|
||||
}
|
||||
|
||||
@@ -64,8 +62,6 @@ func (p *Qwen3CoderParser) Add(s string, done bool) (content string, thinking st
|
||||
slog.Warn("qwen tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCall.Function.Index = p.callIndex
|
||||
p.callIndex++
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case qwenEventContent:
|
||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||
|
||||
@@ -1035,92 +1035,6 @@ func TestQwenToolCallValueParsing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3CoderParserToolCallIndexing(t *testing.T) {
|
||||
parser := Qwen3CoderParser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
input := `<tool_call><function=first><parameter=a>1</parameter></function></tool_call>
|
||||
<tool_call><function=second><parameter=b>2</parameter></function></tool_call>
|
||||
<tool_call><function=third><parameter=c>3</parameter></function></tool_call>`
|
||||
_, _, calls, err := parser.Add(input, true)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: testArgs(map[string]any{"a": "1"}), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: testArgs(map[string]any{"c": "3"}), Index: 2}},
|
||||
}
|
||||
if len(calls) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(calls[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3CoderParserToolCallIndexingStreaming(t *testing.T) {
|
||||
parser := Qwen3CoderParser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
var all []api.ToolCall
|
||||
|
||||
_, _, calls, err := parser.Add("<tool_call><function=first><parameter=a>1</parameter></function></tool_call><tool_call><function=second>", false)
|
||||
if err != nil {
|
||||
t.Fatalf("step 1 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
_, _, calls, err = parser.Add("<parameter=b>2</parameter></function></tool_call><tool_call><function=third><parameter=c>3</parameter></function></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("step 2 parse failed: %v", err)
|
||||
}
|
||||
all = append(all, calls...)
|
||||
|
||||
want := []api.ToolCall{
|
||||
{Function: api.ToolCallFunction{Name: "first", Arguments: testArgs(map[string]any{"a": "1"}), Index: 0}},
|
||||
{Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 1}},
|
||||
{Function: api.ToolCallFunction{Name: "third", Arguments: testArgs(map[string]any{"c": "3"}), Index: 2}},
|
||||
}
|
||||
if len(all) != len(want) {
|
||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
||||
}
|
||||
for i := range want {
|
||||
if !toolCallEqual(all[i], want[i]) {
|
||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3CoderParserToolCallIndexResetOnInit(t *testing.T) {
|
||||
parser := Qwen3CoderParser{}
|
||||
parser.Init(nil, nil, nil)
|
||||
|
||||
_, _, _, err := parser.Add("<tool_call><function=first><parameter=a>1</parameter></function></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("first parse failed: %v", err)
|
||||
}
|
||||
|
||||
parser.Init(nil, nil, nil)
|
||||
_, _, calls, err := parser.Add("<tool_call><function=second><parameter=b>2</parameter></function></tool_call>", true)
|
||||
if err != nil {
|
||||
t.Fatalf("second parse failed: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 0},
|
||||
}
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
||||
}
|
||||
if !toolCallEqual(calls[0], want) {
|
||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwenXMLTransform(t *testing.T) {
|
||||
cases := []struct {
|
||||
desc string
|
||||
|
||||
@@ -557,7 +557,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
PromptEvalDuration: cr.PromptEvalDuration,
|
||||
EvalCount: cr.EvalCount,
|
||||
EvalDuration: cr.EvalDuration,
|
||||
PeakMemory: cr.PeakMemory,
|
||||
},
|
||||
Logprobs: toAPILogprobs(cr.Logprobs),
|
||||
}
|
||||
@@ -2310,7 +2309,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
PromptEvalDuration: r.PromptEvalDuration,
|
||||
EvalCount: r.EvalCount,
|
||||
EvalDuration: r.EvalDuration,
|
||||
PeakMemory: r.PeakMemory,
|
||||
},
|
||||
Logprobs: toAPILogprobs(r.Logprobs),
|
||||
}
|
||||
|
||||
@@ -78,6 +78,11 @@ func (c *kvCache) findRemaining(tokens []int32) []int32 {
|
||||
prefix++
|
||||
}
|
||||
|
||||
if prefix == len(tokens) && prefix > 0 {
|
||||
// Leave one token to run through the model so we can sample a response.
|
||||
prefix--
|
||||
}
|
||||
|
||||
if prefix < len(c.tokens) {
|
||||
trim := len(c.tokens) - prefix
|
||||
for _, kv := range c.caches {
|
||||
|
||||
@@ -268,7 +268,6 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
PromptEvalDuration int `json:"prompt_eval_duration,omitempty"`
|
||||
EvalCount int `json:"eval_count,omitempty"`
|
||||
EvalDuration int `json:"eval_duration,omitempty"`
|
||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
||||
}
|
||||
if err := json.Unmarshal(scanner.Bytes(), &raw); err != nil {
|
||||
slog.Debug("mlx response parse error", "error", err, "line", string(scanner.Bytes()))
|
||||
@@ -283,7 +282,6 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
PromptEvalDuration: time.Duration(raw.PromptEvalDuration),
|
||||
EvalCount: raw.EvalCount,
|
||||
EvalDuration: time.Duration(raw.EvalDuration),
|
||||
PeakMemory: raw.PeakMemory,
|
||||
}
|
||||
|
||||
fn(cresp)
|
||||
|
||||
@@ -64,10 +64,6 @@ func PeakMemory() int {
|
||||
return int(peak)
|
||||
}
|
||||
|
||||
func ResetPeakMemory() {
|
||||
C.mlx_reset_peak_memory()
|
||||
}
|
||||
|
||||
type Memory struct{}
|
||||
|
||||
func (Memory) LogValue() slog.Value {
|
||||
|
||||
@@ -44,7 +44,6 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
} else {
|
||||
mlx.DisableCompile()
|
||||
}
|
||||
mlx.ResetPeakMemory()
|
||||
|
||||
inputs := r.Tokenizer.Encode(request.Prompt, true)
|
||||
session := r.cache.begin(r.Model, inputs)
|
||||
@@ -139,7 +138,6 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
}
|
||||
|
||||
final.CompletionTokensDuration = time.Since(now)
|
||||
final.PeakMemory = uint64(mlx.PeakMemory())
|
||||
select {
|
||||
case <-request.Ctx.Done():
|
||||
return request.Ctx.Err()
|
||||
|
||||
@@ -54,7 +54,6 @@ type Response struct {
|
||||
PromptTokensDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||
CompletionTokens int `json:"eval_count,omitempty"`
|
||||
CompletionTokensDuration time.Duration `json:"eval_duration,omitempty"`
|
||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
||||
TotalTokens int `json:"total_tokens,omitempty"`
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user