mirror of
https://github.com/ollama/ollama.git
synced 2026-01-19 04:51:17 -05:00
Compare commits
5 Commits
parth/decr
...
parth/impr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0103a3a89b | ||
|
|
471cbbe95a | ||
|
|
00c96eed1f | ||
|
|
aff30cdbc1 | ||
|
|
bd834dcbe3 |
@@ -13,5 +13,3 @@ The integration tests have 2 modes of operating.
|
|||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
|
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
|
||||||
|
|
||||||
|
|
||||||
Many tests use a default small model suitable to run on many systems. You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
|
|
||||||
@@ -3,50 +3,187 @@
|
|||||||
package integration
|
package integration
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
|
"github.com/ollama/ollama/openai"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestAPIToolCalling(t *testing.T) {
|
var libraryToolsModels = []string{
|
||||||
|
"qwen3-vl",
|
||||||
|
"gpt-oss:20b",
|
||||||
|
"gpt-oss:120b",
|
||||||
|
"qwen3",
|
||||||
|
"llama3.1",
|
||||||
|
"llama3.2",
|
||||||
|
"mistral",
|
||||||
|
"qwen2.5",
|
||||||
|
"qwen2",
|
||||||
|
"mistral-nemo",
|
||||||
|
"mistral-small",
|
||||||
|
"mixtral:8x22b",
|
||||||
|
"qwq",
|
||||||
|
"granite3.3",
|
||||||
|
}
|
||||||
|
|
||||||
|
func float64Ptr(v float64) *float64 {
|
||||||
|
return &v
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendOpenAIChatRequest(ctx context.Context, endpoint string, req openai.ChatCompletionRequest) (*openai.ChatCompletion, error) {
|
||||||
|
jsonData, err := json.Marshal(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", endpoint+"/v1/chat/completions", bytes.NewBuffer(jsonData))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 10 * time.Minute,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("API error: status=%d, body=%s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var chatResp openai.ChatCompletion
|
||||||
|
if err := json.Unmarshal(body, &chatResp); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal response: %w, body: %s", err, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &chatResp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func sendOpenAIChatStreamRequest(ctx context.Context, endpoint string, req openai.ChatCompletionRequest, fn func(openai.ChatCompletionChunk) error) error {
|
||||||
|
jsonData, err := json.Marshal(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, "POST", endpoint+"/v1/chat/completions", bytes.NewBuffer(jsonData))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
|
httpReq.Header.Set("Accept", "text/event-stream")
|
||||||
|
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 0, // No timeout for streaming
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to send request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return fmt.Errorf("API error: status=%d, body=%s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
decoder := resp.Body
|
||||||
|
reader := bytes.NewBuffer([]byte{})
|
||||||
|
buf := make([]byte, 4096)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
n, err := decoder.Read(buf)
|
||||||
|
if n > 0 {
|
||||||
|
reader.Write(buf[:n])
|
||||||
|
|
||||||
|
// Process complete lines
|
||||||
|
for {
|
||||||
|
line, err := reader.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
// Not a complete line yet
|
||||||
|
reader.WriteString(line)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(line, "data: ") {
|
||||||
|
data := strings.TrimPrefix(line, "data: ")
|
||||||
|
|
||||||
|
if data == "[DONE]" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var streamResp openai.ChatCompletionChunk
|
||||||
|
if err := json.Unmarshal([]byte(data), &streamResp); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal stream response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := fn(streamResp); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
if err != io.EOF {
|
||||||
|
return fmt.Errorf("error reading stream: %w", err)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestToolCallingAllAPIs tests both Ollama and OpenAI APIs with shared model loading
|
||||||
|
func TestToolCallingAllAPIs(t *testing.T) {
|
||||||
initialTimeout := 60 * time.Second
|
initialTimeout := 60 * time.Second
|
||||||
streamTimeout := 60 * time.Second
|
streamTimeout := 60 * time.Second
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, endpoint, cleanup := InitServerConnection(ctx, t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
minVRAM := map[string]uint64{
|
|
||||||
"qwen3-vl": 16,
|
|
||||||
"gpt-oss:20b": 16,
|
|
||||||
"gpt-oss:120b": 70,
|
|
||||||
"qwen3": 6,
|
|
||||||
"llama3.1": 8,
|
|
||||||
"llama3.2": 4,
|
|
||||||
"mistral": 6,
|
|
||||||
"qwen2.5": 6,
|
|
||||||
"qwen2": 6,
|
|
||||||
"mistral-nemo": 9,
|
|
||||||
"mistral-small": 16,
|
|
||||||
"mixtral:8x22b": 80,
|
|
||||||
"qwq": 20,
|
|
||||||
"granite3.3": 7,
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, model := range libraryToolsModels {
|
for _, model := range libraryToolsModels {
|
||||||
t.Run(model, func(t *testing.T) {
|
t.Run(model, func(t *testing.T) {
|
||||||
|
// Skip if insufficient VRAM
|
||||||
if v, ok := minVRAM[model]; ok {
|
if v, ok := minVRAM[model]; ok {
|
||||||
skipUnderMinVRAM(t, v)
|
skipUnderMinVRAM(t, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pull model if missing - only do this once per model
|
||||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||||
t.Fatalf("pull failed %s", err)
|
t.Fatalf("pull failed %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t.Run("OllamaAPI", func(t *testing.T) {
|
||||||
tools := []api.Tool{
|
tools := []api.Tool{
|
||||||
{
|
{
|
||||||
Type: "function",
|
Type: "function",
|
||||||
@@ -128,5 +265,91 @@ func TestAPIToolCalling(t *testing.T) {
|
|||||||
t.Error("outer test context done while waiting for tool-calling chat")
|
t.Error("outer test context done while waiting for tool-calling chat")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("OpenAIAPI", func(t *testing.T) {
|
||||||
|
tools := []api.Tool{
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: api.ToolFunction{
|
||||||
|
Name: "get_weather",
|
||||||
|
Description: "Get the current weather in a given location",
|
||||||
|
Parameters: api.ToolFunctionParameters{
|
||||||
|
Type: "object",
|
||||||
|
Required: []string{"location"},
|
||||||
|
Properties: map[string]api.ToolProperty{
|
||||||
|
"location": {
|
||||||
|
Type: api.PropertyType{"string"},
|
||||||
|
Description: "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
req := openai.ChatCompletionRequest{
|
||||||
|
Model: model,
|
||||||
|
Messages: []openai.Message{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "Call get_weather with location set to San Francisco.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tools: tools,
|
||||||
|
Stream: true,
|
||||||
|
Temperature: float64Ptr(0),
|
||||||
|
}
|
||||||
|
|
||||||
|
stallTimer := time.NewTimer(initialTimeout)
|
||||||
|
var gotToolCall bool
|
||||||
|
var lastToolCall openai.ToolCall
|
||||||
|
|
||||||
|
fn := func(response openai.ChatCompletionChunk) error {
|
||||||
|
if len(response.Choices) > 0 && len(response.Choices[0].Delta.ToolCalls) > 0 {
|
||||||
|
gotToolCall = true
|
||||||
|
toolCalls := response.Choices[0].Delta.ToolCalls
|
||||||
|
lastToolCall = toolCalls[len(toolCalls)-1]
|
||||||
|
}
|
||||||
|
if !stallTimer.Reset(streamTimeout) {
|
||||||
|
return fmt.Errorf("stall was detected while streaming response, aborting")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan int)
|
||||||
|
var genErr error
|
||||||
|
go func() {
|
||||||
|
genErr = sendOpenAIChatStreamRequest(ctx, "http://"+endpoint, req, fn)
|
||||||
|
done <- 0
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-stallTimer.C:
|
||||||
|
t.Errorf("tool-calling chat never started. Timed out after: %s", initialTimeout.String())
|
||||||
|
case <-done:
|
||||||
|
if genErr != nil {
|
||||||
|
t.Fatalf("chat failed: %v", genErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !gotToolCall {
|
||||||
|
t.Fatalf("expected at least one tool call, got none")
|
||||||
|
}
|
||||||
|
|
||||||
|
if lastToolCall.Function.Name != "get_weather" {
|
||||||
|
t.Errorf("unexpected tool called: got %q want %q", lastToolCall.Function.Name, "get_weather")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(lastToolCall.Function.Arguments, "location") {
|
||||||
|
t.Errorf("expected tool arguments to include 'location', got: %s", lastToolCall.Function.Arguments)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(lastToolCall.Function.Arguments, "San Francisco") {
|
||||||
|
t.Errorf("expected tool arguments to include 'San Francisco', got: %s", lastToolCall.Function.Arguments)
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
t.Error("outer test context done while waiting for tool-calling chat")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -260,22 +260,6 @@ var (
|
|||||||
"snowflake-arctic-embed",
|
"snowflake-arctic-embed",
|
||||||
"snowflake-arctic-embed2",
|
"snowflake-arctic-embed2",
|
||||||
}
|
}
|
||||||
libraryToolsModels = []string{
|
|
||||||
"qwen3-vl",
|
|
||||||
"gpt-oss:20b",
|
|
||||||
"gpt-oss:120b",
|
|
||||||
"qwen3",
|
|
||||||
"llama3.1",
|
|
||||||
"llama3.2",
|
|
||||||
"mistral",
|
|
||||||
"qwen2.5",
|
|
||||||
"qwen2",
|
|
||||||
"mistral-nemo",
|
|
||||||
"mistral-small",
|
|
||||||
"mixtral:8x22b",
|
|
||||||
"qwq",
|
|
||||||
"granite3.3",
|
|
||||||
}
|
|
||||||
|
|
||||||
blueSkyPrompt = "why is the sky blue? Be brief but factual in your reply"
|
blueSkyPrompt = "why is the sky blue? Be brief but factual in your reply"
|
||||||
blueSkyExpected = []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength", "interact"}
|
blueSkyExpected = []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength", "interact"}
|
||||||
@@ -747,6 +731,23 @@ func skipUnderMinVRAM(t *testing.T, gb uint64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var minVRAM = map[string]uint64{
|
||||||
|
"qwen3-vl": 16,
|
||||||
|
"gpt-oss:20b": 16,
|
||||||
|
"gpt-oss:120b": 70,
|
||||||
|
"qwen3": 6,
|
||||||
|
"llama3.1": 8,
|
||||||
|
"llama3.2": 4,
|
||||||
|
"mistral": 6,
|
||||||
|
"qwen2.5": 6,
|
||||||
|
"qwen2": 6,
|
||||||
|
"mistral-nemo": 9,
|
||||||
|
"mistral-small": 16,
|
||||||
|
"mixtral:8x22b": 80,
|
||||||
|
"qwq": 20,
|
||||||
|
"granite3.3": 7,
|
||||||
|
}
|
||||||
|
|
||||||
// Skip if the target model isn't X% GPU loaded to avoid excessive runtime
|
// Skip if the target model isn't X% GPU loaded to avoid excessive runtime
|
||||||
func skipIfNotGPULoaded(ctx context.Context, t *testing.T, client *api.Client, model string, minPercent int) {
|
func skipIfNotGPULoaded(ctx context.Context, t *testing.T, client *api.Client, model string, minPercent int) {
|
||||||
gpuPercent := getGPUPercent(ctx, t, client, model)
|
gpuPercent := getGPUPercent(ctx, t, client, model)
|
||||||
|
|||||||
Reference in New Issue
Block a user