mirror of
https://github.com/ollama/ollama.git
synced 2026-02-27 12:36:54 -05:00
Compare commits
1 Commits
main
...
pdevine/sa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
857cffd22a |
14
api/types.go
14
api/types.go
@@ -15,7 +15,6 @@ import (
|
|||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
|
||||||
"github.com/ollama/ollama/internal/orderedmap"
|
"github.com/ollama/ollama/internal/orderedmap"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
)
|
)
|
||||||
@@ -570,7 +569,6 @@ type DebugInfo struct {
|
|||||||
|
|
||||||
type Metrics struct {
|
type Metrics struct {
|
||||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
||||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
|
||||||
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
||||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||||
@@ -936,10 +934,6 @@ func (m *Metrics) Summary() {
|
|||||||
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
|
fmt.Fprintf(os.Stderr, "total duration: %v\n", m.TotalDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.PeakMemory > 0 {
|
|
||||||
fmt.Fprintf(os.Stderr, "peak memory: %s\n", formatPeakMemory(m.PeakMemory))
|
|
||||||
}
|
|
||||||
|
|
||||||
if m.LoadDuration > 0 {
|
if m.LoadDuration > 0 {
|
||||||
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
|
fmt.Fprintf(os.Stderr, "load duration: %v\n", m.LoadDuration)
|
||||||
}
|
}
|
||||||
@@ -963,14 +957,6 @@ func (m *Metrics) Summary() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func formatPeakMemory(b uint64) string {
|
|
||||||
if b >= format.GibiByte {
|
|
||||||
return fmt.Sprintf("%.3f GiB", float64(b)/float64(format.GibiByte))
|
|
||||||
}
|
|
||||||
|
|
||||||
return format.HumanBytes2(b)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opts *Options) FromMap(m map[string]any) error {
|
func (opts *Options) FromMap(m map[string]any) error {
|
||||||
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
|
||||||
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
|
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
|
||||||
|
|||||||
@@ -1518,7 +1518,6 @@ type CompletionResponse struct {
|
|||||||
PromptEvalDuration time.Duration `json:"prompt_eval_duration"`
|
PromptEvalDuration time.Duration `json:"prompt_eval_duration"`
|
||||||
EvalCount int `json:"eval_count"`
|
EvalCount int `json:"eval_count"`
|
||||||
EvalDuration time.Duration `json:"eval_duration"`
|
EvalDuration time.Duration `json:"eval_duration"`
|
||||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
|
||||||
|
|
||||||
// Logprobs contains log probability information if requested
|
// Logprobs contains log probability information if requested
|
||||||
Logprobs []Logprob `json:"logprobs,omitempty"`
|
Logprobs []Logprob `json:"logprobs,omitempty"`
|
||||||
|
|||||||
@@ -32,10 +32,9 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type GLM46Parser struct {
|
type GLM46Parser struct {
|
||||||
state glm46ParserState
|
state glm46ParserState
|
||||||
buffer strings.Builder
|
buffer strings.Builder
|
||||||
tools []api.Tool
|
tools []api.Tool
|
||||||
callIndex int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *GLM46Parser) HasToolSupport() bool {
|
func (p *GLM46Parser) HasToolSupport() bool {
|
||||||
@@ -49,7 +48,6 @@ func (p *GLM46Parser) HasThinkingSupport() bool {
|
|||||||
// func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
// func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||||
func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
func (p *GLM46Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||||
p.tools = tools
|
p.tools = tools
|
||||||
p.callIndex = 0
|
|
||||||
return tools
|
return tools
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,8 +89,6 @@ func (p *GLM46Parser) Add(s string, done bool) (content string, thinking string,
|
|||||||
slog.Warn("glm-4.6 tool call parsing failed", "error", err)
|
slog.Warn("glm-4.6 tool call parsing failed", "error", err)
|
||||||
return "", "", nil, err
|
return "", "", nil, err
|
||||||
}
|
}
|
||||||
toolCall.Function.Index = p.callIndex
|
|
||||||
p.callIndex++
|
|
||||||
toolCalls = append(toolCalls, toolCall)
|
toolCalls = append(toolCalls, toolCall)
|
||||||
case glm46EventThinkingContent:
|
case glm46EventThinkingContent:
|
||||||
thinkingSb.WriteString(event.content)
|
thinkingSb.WriteString(event.content)
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ type GLM47Parser struct {
|
|||||||
|
|
||||||
func (p *GLM47Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
func (p *GLM47Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||||
p.tools = tools
|
p.tools = tools
|
||||||
p.callIndex = 0
|
|
||||||
// When thinking is enabled (nil or true), the prompt ends with <think>,
|
// When thinking is enabled (nil or true), the prompt ends with <think>,
|
||||||
// so model output starts directly with thinking content (no opening tag).
|
// so model output starts directly with thinking content (no opening tag).
|
||||||
if thinkValue == nil || thinkValue.Bool() {
|
if thinkValue == nil || thinkValue.Bool() {
|
||||||
|
|||||||
@@ -97,91 +97,3 @@ func TestGLM47ParserToolCallEscaping(t *testing.T) {
|
|||||||
t.Fatalf("expected %#v, got %#v", expected, toolCall)
|
t.Fatalf("expected %#v, got %#v", expected, toolCall)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGLM47ParserToolCallIndexing(t *testing.T) {
|
|
||||||
parser := GLM47Parser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
input := `plan</think>
|
|
||||||
<tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call>
|
|
||||||
<tool_call>second<arg_key>b</arg_key><arg_value>2</arg_value></tool_call>
|
|
||||||
<tool_call>third<arg_key>c</arg_key><arg_value>3</arg_value></tool_call>`
|
|
||||||
|
|
||||||
_, _, calls, err := parser.Add(input, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(calls) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(calls[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGLM47ParserToolCallIndexingStreaming(t *testing.T) {
|
|
||||||
parser := GLM47Parser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
var all []api.ToolCall
|
|
||||||
|
|
||||||
_, _, calls, err := parser.Add("plan</think><tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call><tool_call>second<arg_key>b</arg_key>", false)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 1 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
_, _, calls, err = parser.Add("<arg_value>2</arg_value></tool_call><tool_call>third<arg_key>c</arg_key><arg_value>3</arg_value></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 2 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(all) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(all[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGLM47ParserToolCallIndexResetOnInit(t *testing.T) {
|
|
||||||
parser := GLM47Parser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
_, _, _, err := parser.Add("plan</think><tool_call>first<arg_key>a</arg_key><arg_value>1</arg_value></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("first parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
_, _, calls, err := parser.Add("plan</think><tool_call>second<arg_key>b</arg_key><arg_value>2</arg_value></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("second parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := api.ToolCall{
|
|
||||||
Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 0},
|
|
||||||
}
|
|
||||||
if len(calls) != 1 {
|
|
||||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
|
||||||
}
|
|
||||||
if !toolCallEqual(calls[0], want) {
|
|
||||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ type Qwen3Parser struct {
|
|||||||
state qwen3ParserState
|
state qwen3ParserState
|
||||||
buffer strings.Builder
|
buffer strings.Builder
|
||||||
tools []api.Tool
|
tools []api.Tool
|
||||||
callIndex int
|
|
||||||
hasThinkingSupport bool
|
hasThinkingSupport bool
|
||||||
defaultThinking bool
|
defaultThinking bool
|
||||||
maybeThinkingOpenAtBOL bool
|
maybeThinkingOpenAtBOL bool
|
||||||
@@ -55,7 +54,6 @@ func (p *Qwen3Parser) HasThinkingSupport() bool {
|
|||||||
func (p *Qwen3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
func (p *Qwen3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||||
p.tools = tools
|
p.tools = tools
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.callIndex = 0
|
|
||||||
|
|
||||||
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
|
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
|
||||||
if thinkValue == nil {
|
if thinkValue == nil {
|
||||||
@@ -108,8 +106,6 @@ func (p *Qwen3Parser) Add(s string, done bool) (content string, thinking string,
|
|||||||
slog.Warn("qwen3 tool call parsing failed", "error", err)
|
slog.Warn("qwen3 tool call parsing failed", "error", err)
|
||||||
return "", "", nil, err
|
return "", "", nil, err
|
||||||
}
|
}
|
||||||
toolCall.Function.Index = p.callIndex
|
|
||||||
p.callIndex++
|
|
||||||
calls = append(calls, toolCall)
|
calls = append(calls, toolCall)
|
||||||
case qwen3EventThinkingContent:
|
case qwen3EventThinkingContent:
|
||||||
thinkingSb.WriteString(event.content)
|
thinkingSb.WriteString(event.content)
|
||||||
|
|||||||
@@ -230,89 +230,3 @@ func TestQwen35ParserRespectsNoThink(t *testing.T) {
|
|||||||
t.Fatalf("expected no tool calls, got %d", len(calls))
|
t.Fatalf("expected no tool calls, got %d", len(calls))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestQwen3ParserToolCallIndexing(t *testing.T) {
|
|
||||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
|
||||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
|
||||||
|
|
||||||
input := `<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call>
|
|
||||||
<tool_call>{"name":"second","arguments":{"b":"2"}}</tool_call>
|
|
||||||
<tool_call>{"name":"third","arguments":{"c":"3"}}</tool_call>`
|
|
||||||
_, _, calls, err := parser.Add(input, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(calls) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(calls[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestQwen3ParserToolCallIndexingStreaming(t *testing.T) {
|
|
||||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
|
||||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
|
||||||
|
|
||||||
var all []api.ToolCall
|
|
||||||
|
|
||||||
_, _, calls, err := parser.Add(`<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call><tool_call>{"name":"second","arguments":{"b":"2"}`, false)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 1 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
_, _, calls, err = parser.Add(`}</tool_call><tool_call>{"name":"third","arguments":{"c":"3"}}</tool_call>`, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 2 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: args(`{"a":"1"}`), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: args(`{"c":"3"}`), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(all) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(all[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestQwen3ParserToolCallIndexResetOnInit(t *testing.T) {
|
|
||||||
parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
|
|
||||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
|
||||||
|
|
||||||
_, _, _, err := parser.Add(`<tool_call>{"name":"first","arguments":{"a":"1"}}</tool_call>`, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("first parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.Init(nil, nil, &api.ThinkValue{Value: false})
|
|
||||||
_, _, calls, err := parser.Add(`<tool_call>{"name":"second","arguments":{"b":"2"}}</tool_call>`, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("second parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := api.ToolCall{
|
|
||||||
Function: api.ToolCallFunction{Name: "second", Arguments: args(`{"b":"2"}`), Index: 0},
|
|
||||||
}
|
|
||||||
if len(calls) != 1 {
|
|
||||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
|
||||||
}
|
|
||||||
if !toolCallEqual(calls[0], want) {
|
|
||||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -29,10 +29,9 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Qwen3CoderParser struct {
|
type Qwen3CoderParser struct {
|
||||||
state qwenParserState
|
state qwenParserState
|
||||||
acc strings.Builder
|
acc strings.Builder
|
||||||
tools []api.Tool
|
tools []api.Tool
|
||||||
callIndex int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Qwen3CoderParser) HasToolSupport() bool {
|
func (p *Qwen3CoderParser) HasToolSupport() bool {
|
||||||
@@ -45,7 +44,6 @@ func (p *Qwen3CoderParser) HasThinkingSupport() bool {
|
|||||||
|
|
||||||
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
|
||||||
p.tools = tools
|
p.tools = tools
|
||||||
p.callIndex = 0
|
|
||||||
return tools // Qwen doesn't modify tools
|
return tools // Qwen doesn't modify tools
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,8 +62,6 @@ func (p *Qwen3CoderParser) Add(s string, done bool) (content string, thinking st
|
|||||||
slog.Warn("qwen tool call parsing failed", "error", err)
|
slog.Warn("qwen tool call parsing failed", "error", err)
|
||||||
return "", "", nil, err
|
return "", "", nil, err
|
||||||
}
|
}
|
||||||
toolCall.Function.Index = p.callIndex
|
|
||||||
p.callIndex++
|
|
||||||
toolCalls = append(toolCalls, toolCall)
|
toolCalls = append(toolCalls, toolCall)
|
||||||
case qwenEventContent:
|
case qwenEventContent:
|
||||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||||
|
|||||||
@@ -1035,92 +1035,6 @@ func TestQwenToolCallValueParsing(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestQwen3CoderParserToolCallIndexing(t *testing.T) {
|
|
||||||
parser := Qwen3CoderParser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
input := `<tool_call><function=first><parameter=a>1</parameter></function></tool_call>
|
|
||||||
<tool_call><function=second><parameter=b>2</parameter></function></tool_call>
|
|
||||||
<tool_call><function=third><parameter=c>3</parameter></function></tool_call>`
|
|
||||||
_, _, calls, err := parser.Add(input, true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: testArgs(map[string]any{"a": "1"}), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: testArgs(map[string]any{"c": "3"}), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(calls) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(calls))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(calls[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, calls[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestQwen3CoderParserToolCallIndexingStreaming(t *testing.T) {
|
|
||||||
parser := Qwen3CoderParser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
var all []api.ToolCall
|
|
||||||
|
|
||||||
_, _, calls, err := parser.Add("<tool_call><function=first><parameter=a>1</parameter></function></tool_call><tool_call><function=second>", false)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 1 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
_, _, calls, err = parser.Add("<parameter=b>2</parameter></function></tool_call><tool_call><function=third><parameter=c>3</parameter></function></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("step 2 parse failed: %v", err)
|
|
||||||
}
|
|
||||||
all = append(all, calls...)
|
|
||||||
|
|
||||||
want := []api.ToolCall{
|
|
||||||
{Function: api.ToolCallFunction{Name: "first", Arguments: testArgs(map[string]any{"a": "1"}), Index: 0}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 1}},
|
|
||||||
{Function: api.ToolCallFunction{Name: "third", Arguments: testArgs(map[string]any{"c": "3"}), Index: 2}},
|
|
||||||
}
|
|
||||||
if len(all) != len(want) {
|
|
||||||
t.Fatalf("expected %d calls, got %d", len(want), len(all))
|
|
||||||
}
|
|
||||||
for i := range want {
|
|
||||||
if !toolCallEqual(all[i], want[i]) {
|
|
||||||
t.Fatalf("call %d mismatch: got %#v, want %#v", i, all[i], want[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestQwen3CoderParserToolCallIndexResetOnInit(t *testing.T) {
|
|
||||||
parser := Qwen3CoderParser{}
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
|
|
||||||
_, _, _, err := parser.Add("<tool_call><function=first><parameter=a>1</parameter></function></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("first parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.Init(nil, nil, nil)
|
|
||||||
_, _, calls, err := parser.Add("<tool_call><function=second><parameter=b>2</parameter></function></tool_call>", true)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("second parse failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
want := api.ToolCall{
|
|
||||||
Function: api.ToolCallFunction{Name: "second", Arguments: testArgs(map[string]any{"b": "2"}), Index: 0},
|
|
||||||
}
|
|
||||||
if len(calls) != 1 {
|
|
||||||
t.Fatalf("expected 1 call, got %d", len(calls))
|
|
||||||
}
|
|
||||||
if !toolCallEqual(calls[0], want) {
|
|
||||||
t.Fatalf("got %#v, want %#v", calls[0], want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestQwenXMLTransform(t *testing.T) {
|
func TestQwenXMLTransform(t *testing.T) {
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
desc string
|
desc string
|
||||||
|
|||||||
@@ -557,7 +557,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
PromptEvalDuration: cr.PromptEvalDuration,
|
PromptEvalDuration: cr.PromptEvalDuration,
|
||||||
EvalCount: cr.EvalCount,
|
EvalCount: cr.EvalCount,
|
||||||
EvalDuration: cr.EvalDuration,
|
EvalDuration: cr.EvalDuration,
|
||||||
PeakMemory: cr.PeakMemory,
|
|
||||||
},
|
},
|
||||||
Logprobs: toAPILogprobs(cr.Logprobs),
|
Logprobs: toAPILogprobs(cr.Logprobs),
|
||||||
}
|
}
|
||||||
@@ -2310,7 +2309,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
PromptEvalDuration: r.PromptEvalDuration,
|
PromptEvalDuration: r.PromptEvalDuration,
|
||||||
EvalCount: r.EvalCount,
|
EvalCount: r.EvalCount,
|
||||||
EvalDuration: r.EvalDuration,
|
EvalDuration: r.EvalDuration,
|
||||||
PeakMemory: r.PeakMemory,
|
|
||||||
},
|
},
|
||||||
Logprobs: toAPILogprobs(r.Logprobs),
|
Logprobs: toAPILogprobs(r.Logprobs),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -78,6 +78,11 @@ func (c *kvCache) findRemaining(tokens []int32) []int32 {
|
|||||||
prefix++
|
prefix++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if prefix == len(tokens) && prefix > 0 {
|
||||||
|
// Leave one token to run through the model so we can sample a response.
|
||||||
|
prefix--
|
||||||
|
}
|
||||||
|
|
||||||
if prefix < len(c.tokens) {
|
if prefix < len(c.tokens) {
|
||||||
trim := len(c.tokens) - prefix
|
trim := len(c.tokens) - prefix
|
||||||
for _, kv := range c.caches {
|
for _, kv := range c.caches {
|
||||||
|
|||||||
@@ -268,7 +268,6 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
|||||||
PromptEvalDuration int `json:"prompt_eval_duration,omitempty"`
|
PromptEvalDuration int `json:"prompt_eval_duration,omitempty"`
|
||||||
EvalCount int `json:"eval_count,omitempty"`
|
EvalCount int `json:"eval_count,omitempty"`
|
||||||
EvalDuration int `json:"eval_duration,omitempty"`
|
EvalDuration int `json:"eval_duration,omitempty"`
|
||||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(scanner.Bytes(), &raw); err != nil {
|
if err := json.Unmarshal(scanner.Bytes(), &raw); err != nil {
|
||||||
slog.Debug("mlx response parse error", "error", err, "line", string(scanner.Bytes()))
|
slog.Debug("mlx response parse error", "error", err, "line", string(scanner.Bytes()))
|
||||||
@@ -283,7 +282,6 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
|||||||
PromptEvalDuration: time.Duration(raw.PromptEvalDuration),
|
PromptEvalDuration: time.Duration(raw.PromptEvalDuration),
|
||||||
EvalCount: raw.EvalCount,
|
EvalCount: raw.EvalCount,
|
||||||
EvalDuration: time.Duration(raw.EvalDuration),
|
EvalDuration: time.Duration(raw.EvalDuration),
|
||||||
PeakMemory: raw.PeakMemory,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn(cresp)
|
fn(cresp)
|
||||||
|
|||||||
@@ -64,10 +64,6 @@ func PeakMemory() int {
|
|||||||
return int(peak)
|
return int(peak)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ResetPeakMemory() {
|
|
||||||
C.mlx_reset_peak_memory()
|
|
||||||
}
|
|
||||||
|
|
||||||
type Memory struct{}
|
type Memory struct{}
|
||||||
|
|
||||||
func (Memory) LogValue() slog.Value {
|
func (Memory) LogValue() slog.Value {
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
|||||||
} else {
|
} else {
|
||||||
mlx.DisableCompile()
|
mlx.DisableCompile()
|
||||||
}
|
}
|
||||||
mlx.ResetPeakMemory()
|
|
||||||
|
|
||||||
inputs := r.Tokenizer.Encode(request.Prompt, true)
|
inputs := r.Tokenizer.Encode(request.Prompt, true)
|
||||||
session := r.cache.begin(r.Model, inputs)
|
session := r.cache.begin(r.Model, inputs)
|
||||||
@@ -139,7 +138,6 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
final.CompletionTokensDuration = time.Since(now)
|
final.CompletionTokensDuration = time.Since(now)
|
||||||
final.PeakMemory = uint64(mlx.PeakMemory())
|
|
||||||
select {
|
select {
|
||||||
case <-request.Ctx.Done():
|
case <-request.Ctx.Done():
|
||||||
return request.Ctx.Err()
|
return request.Ctx.Err()
|
||||||
|
|||||||
@@ -54,7 +54,6 @@ type Response struct {
|
|||||||
PromptTokensDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
PromptTokensDuration time.Duration `json:"prompt_eval_duration,omitempty"`
|
||||||
CompletionTokens int `json:"eval_count,omitempty"`
|
CompletionTokens int `json:"eval_count,omitempty"`
|
||||||
CompletionTokensDuration time.Duration `json:"eval_duration,omitempty"`
|
CompletionTokensDuration time.Duration `json:"eval_duration,omitempty"`
|
||||||
PeakMemory uint64 `json:"peak_memory,omitempty"`
|
|
||||||
TotalTokens int `json:"total_tokens,omitempty"`
|
TotalTokens int `json:"total_tokens,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user