Compare commits

..

7 Commits

Author SHA1 Message Date
ParthSareen
92af238208 wip 2025-12-02 12:17:36 -08:00
ParthSareen
7461faf651 script to render templates 2025-12-01 18:03:04 -08:00
Daniel Hiltgen
554172759c win: warn if ggml-base detected in PATH (#13289)
If the user has somehow installed another GGML based app which places a
ggml-base lib somewhere in their PATH, we can experience runtime problems
due to incompatibilities.  This change adds a warning message if we detect
a ggml-base outside of our install location to aid in troubleshooting.
2025-12-01 15:36:47 -08:00
Bruce MacDonald
5b6a8e6001 api/client: handle non-json streaming errors (#13007)
While processing the response stream during a chat or generation if an error is occurred it is parsed and returned to the user. The issue with the existing code is that this assumed the response would be valid JSON, which is not a safe assumption and caused cryptic error messages to be displayed due to parsing failures:
`invalid character 'i' looking for beginning of value`

This change updates the stream function to return the raw error string if it cant be parsed as JSON. This should help with debugging issues by making sure the actual error reaches the user.
2025-12-01 15:10:16 -08:00
Daniel Hiltgen
467bbc0dd5 jetpack: require exact match or skip cuda_jetpack* (#13288)
The cuda_jetpack libs will enumerate discrete GPUs on SBSA systems
which leads to runtime failures of missing kernels.  This fix
requires an exact match to enable jetpacks instead of relying on
enumeration to filter out supported libraries.
2025-12-01 12:48:16 -08:00
Jeffrey Morgan
6d9f9323c5 .gitattributes: add app/webview to linguist-vendored (#13274) 2025-11-29 23:46:10 -05:00
Ondrej Kokes
0c2489605d docs: fix output formatting in faq.mdx (#13231)
There were a few Markdown typos in one FAQ answer. It now renders as a proper ascii table.
2025-11-28 19:19:21 -05:00
15 changed files with 728 additions and 1361 deletions

2
.gitattributes vendored
View File

@@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
ml/backend/**/*.glsl linguist-vendored
ml/backend/**/CMakeLists.txt linguist-vendored
app/webview linguist-vendored
llama/build-info.cpp linguist-generated
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

View File

@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
bts := scanner.Bytes()
if err := json.Unmarshal(bts, &errorResponse); err != nil {
return fmt.Errorf("unmarshal: %w", err)
if response.StatusCode >= http.StatusBadRequest {
return StatusError{
StatusCode: response.StatusCode,
Status: response.Status,
ErrorMessage: string(bts),
}
}
return errors.New(string(bts))
}
if response.StatusCode == http.StatusUnauthorized {

View File

@@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
type testError struct {
message string
statusCode int
raw bool // if true, write message as-is instead of JSON encoding
}
func (e testError) Error() string {
@@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
},
},
},
{
name: "plain text error response",
responses: []any{
"internal server error",
},
wantErr: "internal server error",
},
{
name: "HTML error page",
responses: []any{
"<html><body>404 Not Found</body></html>",
},
wantErr: "404 Not Found",
},
}
for _, tc := range testCases {
@@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
return
}
if str, ok := resp.(string); ok {
fmt.Fprintln(w, str)
flusher.Flush()
continue
}
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
@@ -173,9 +194,10 @@ func TestClientStream(t *testing.T) {
func TestClientDo(t *testing.T) {
testCases := []struct {
name string
response any
wantErr string
name string
response any
wantErr string
wantStatusCode int
}{
{
name: "immediate error response",
@@ -183,7 +205,8 @@ func TestClientDo(t *testing.T) {
message: "test error message",
statusCode: http.StatusBadRequest,
},
wantErr: "test error message",
wantErr: "test error message",
wantStatusCode: http.StatusBadRequest,
},
{
name: "server error response",
@@ -191,7 +214,8 @@ func TestClientDo(t *testing.T) {
message: "internal error",
statusCode: http.StatusInternalServerError,
},
wantErr: "internal error",
wantErr: "internal error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "successful response",
@@ -203,6 +227,26 @@ func TestClientDo(t *testing.T) {
Success: true,
},
},
{
name: "plain text error response",
response: testError{
message: "internal server error",
statusCode: http.StatusInternalServerError,
raw: true,
},
wantErr: "internal server error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "HTML error page",
response: testError{
message: "<html><body>404 Not Found</body></html>",
statusCode: http.StatusNotFound,
raw: true,
},
wantErr: "<html><body>404 Not Found</body></html>",
wantStatusCode: http.StatusNotFound,
},
}
for _, tc := range testCases {
@@ -210,11 +254,16 @@ func TestClientDo(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if errResp, ok := tc.response.(testError); ok {
w.WriteHeader(errResp.statusCode)
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
if !errResp.raw {
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
}
} else {
// Write raw message (simulates non-JSON error responses)
fmt.Fprint(w, errResp.message)
}
return
}
@@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
if tc.wantStatusCode != 0 {
if statusErr, ok := err.(StatusError); ok {
if statusErr.StatusCode != tc.wantStatusCode {
t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
}
} else {
t.Errorf("expected StatusError, got %T", err)
}
}
return
}

View File

@@ -0,0 +1,625 @@
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "transformers>=4.57.0",
# "jinja2",
# "fastapi",
# "uvicorn",
# "pydantic",
# "requests",
# ]
# ///
"""
Chat Template Testing Tool
Test HuggingFace chat templates against Ollama renderers.
Usage:
# Run predefined test cases against a HuggingFace model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3
# Compare HuggingFace output with Ollama renderer
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --ollama-model intellect3
# Start server for manual curl testing
uv run cmd/chat_template/chat_template.py --serve
# Show chat template for a model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --show-template
"""
import argparse
import json
import sys
from typing import Any
from transformers import AutoTokenizer
TEST_CASES = [
{
"name": "basic_user_message",
"messages": [{"role": "user", "content": "Hello!"}],
"tools": None,
},
{
"name": "with_system_message",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
"tools": None,
},
{
"name": "multi_turn_conversation",
"messages": [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"},
],
"tools": None,
},
{
"name": "with_tools",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the weather?"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "tool_call_and_response",
"messages": [
{"role": "user", "content": "What is the weather in SF?"},
{
"role": "assistant",
"content": "Let me check the weather.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "parallel_tool_calls",
"messages": [
{"role": "user", "content": "Get weather in SF and NYC"},
{
"role": "assistant",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
},
{
"id": "call_2",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "New York"},
},
},
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
{"role": "tool", "content": '{"temperature": 55}', "tool_call_id": "call_2"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
# Thinking tests
{
"name": "assistant_with_thinking",
"messages": [
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": "The answer is 4.",
"thinking": "Let me calculate: 2 + 2 = 4. This is basic arithmetic.",
},
{"role": "user", "content": "And 3+3?"},
],
"tools": None,
},
{
"name": "thinking_with_tool_call",
"messages": [
{"role": "user", "content": "What's the weather in Paris?"},
{
"role": "assistant",
"content": "I'll check the weather for you.",
"thinking": "The user wants to know the weather in Paris. I should call the get_weather function.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "Paris"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 18, "condition": "cloudy"}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
{
"name": "thinking_only_no_content",
"messages": [
{"role": "user", "content": "Think about this silently."},
{
"role": "assistant",
"content": "", # HuggingFace requires content field
"thinking": "I'm thinking about this but won't respond with visible content.",
},
{"role": "user", "content": "What did you think?"},
],
"tools": None,
},
]
# Cache for tokenizers
_tokenizer_cache: dict[str, Any] = {}
def get_tokenizer(model_name: str):
"""Get or create tokenizer for the given model."""
if model_name not in _tokenizer_cache:
print(f"Loading tokenizer for {model_name}...", file=sys.stderr)
_tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(model_name)
return _tokenizer_cache[model_name]
def apply_template(
model: str,
messages: list[dict],
tools: list[dict] | None = None,
) -> str:
"""Apply HuggingFace chat template to messages."""
tokenizer = get_tokenizer(model)
if tools:
return tokenizer.apply_chat_template(
messages,
tools=tools,
tokenize=False,
add_generation_prompt=True,
)
else:
return tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
def get_ollama_prompt(
ollama_model: str,
messages: list[dict],
tools: list[dict] | None = None,
ollama_host: str = "http://localhost:11434",
) -> str | None:
"""Get rendered prompt from Ollama using debug_render_only."""
import requests
# Convert messages to Ollama format
ollama_messages = []
for msg in messages:
ollama_msg = {"role": msg["role"]}
if "content" in msg:
ollama_msg["content"] = msg["content"]
if "thinking" in msg:
ollama_msg["thinking"] = msg["thinking"]
if "tool_calls" in msg:
# Convert tool_calls to Ollama format
tool_calls = []
for tc in msg["tool_calls"]:
tool_call = {
"function": {
"name": tc["function"]["name"],
"arguments": tc["function"]["arguments"],
}
}
if "id" in tc:
tool_call["id"] = tc["id"]
tool_calls.append(tool_call)
ollama_msg["tool_calls"] = tool_calls
if "tool_call_id" in msg:
ollama_msg["tool_call_id"] = msg["tool_call_id"]
ollama_messages.append(ollama_msg)
payload = {
"model": ollama_model,
"messages": ollama_messages,
"stream": False,
"_debug_render_only": True,
}
if tools:
payload["tools"] = tools
try:
resp = requests.post(f"{ollama_host}/api/chat", json=payload, timeout=30)
resp.raise_for_status()
data = resp.json()
# Field name is _debug_info with underscore prefix
if "_debug_info" in data and "rendered_template" in data["_debug_info"]:
return data["_debug_info"]["rendered_template"]
return None
except requests.exceptions.ConnectionError:
print(f" [ERROR] Cannot connect to Ollama at {ollama_host}", file=sys.stderr)
return None
except Exception as e:
print(f" [ERROR] Ollama request failed: {e}", file=sys.stderr)
return None
def compute_diff(hf_prompt: str, ollama_prompt: str) -> str:
"""Compute a unified diff between HuggingFace and Ollama prompts."""
import difflib
hf_lines = hf_prompt.splitlines(keepends=True)
ollama_lines = ollama_prompt.splitlines(keepends=True)
diff = difflib.unified_diff(
ollama_lines,
hf_lines,
fromfile="Ollama",
tofile="HuggingFace",
lineterm="",
)
return "".join(diff)
def print_test_output(
name: str,
messages: list[dict],
tools: list[dict] | None,
hf_prompt: str,
ollama_prompt: str | None = None,
as_repr: bool = False,
):
"""Print test output in a format suitable for Go test creation and LLM diffing."""
print(f"\n{'='*60}")
print(f"Test: {name}")
print("=" * 60)
print("\n--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print("\n--- Tools ---")
print(json.dumps(tools, indent=2))
if ollama_prompt is not None:
# Comparison mode
if hf_prompt == ollama_prompt:
print("\n--- Result: MATCH ---")
print("\n--- Prompt (both identical) ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
else:
print("\n--- Result: MISMATCH ---")
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("\n--- Ollama Prompt ---")
if as_repr:
print(repr(ollama_prompt))
else:
print(ollama_prompt)
print("\n--- Diff (Ollama -> HuggingFace) ---")
diff = compute_diff(hf_prompt, ollama_prompt)
if diff:
print(diff)
else:
print("(no line-level diff, check whitespace)")
else:
# HuggingFace only mode
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("=" * 60)
def run_tests(
model: str,
as_repr: bool = False,
test_filter: str | None = None,
ollama_model: str | None = None,
ollama_host: str = "http://localhost:11434",
):
"""Run all predefined test cases against a model."""
if ollama_model:
print(f"\nComparing HuggingFace ({model}) vs Ollama ({ollama_model})\n")
else:
print(f"\nRunning tests against: {model}\n")
matches = 0
mismatches = 0
errors = 0
for test_case in TEST_CASES:
name = test_case["name"]
messages = test_case["messages"]
tools = test_case["tools"]
# Filter tests if specified
if test_filter and test_filter.lower() not in name.lower():
continue
try:
hf_prompt = apply_template(model, messages, tools)
ollama_prompt = None
if ollama_model:
ollama_prompt = get_ollama_prompt(
ollama_model, messages, tools, ollama_host
)
if ollama_prompt is None:
errors += 1
elif hf_prompt == ollama_prompt:
matches += 1
else:
mismatches += 1
print_test_output(
name, messages, tools, hf_prompt, ollama_prompt, as_repr=as_repr
)
except Exception as e:
errors += 1
print(f"\n{'='*60}")
print(f"Test: {name} - FAILED")
print(f"--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print(f"--- Tools ---")
print(json.dumps(tools, indent=2))
print(f"--- Error ---")
print(f"{e}")
print("=" * 60)
# Print summary if comparing
if ollama_model:
total = matches + mismatches + errors
print(f"\n{'='*60}")
print("SUMMARY")
print("=" * 60)
print(f" Total: {total}")
print(f" Matches: {matches}")
print(f" Mismatches: {mismatches}")
print(f" Errors: {errors}")
print("=" * 60)
def show_template(model: str):
"""Show the chat template for a model."""
tokenizer = get_tokenizer(model)
print(f"\nChat template for {model}:\n")
print("-" * 60)
print(tokenizer.chat_template)
print("-" * 60)
def start_server(host: str = "0.0.0.0", port: int = 8000):
"""Start the FastAPI server for manual testing."""
from typing import Optional, List, Dict, Any as TypingAny
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
class Message(BaseModel):
role: str
content: Optional[str] = None
tool_calls: Optional[List[Dict[str, TypingAny]]] = None
tool_call_id: Optional[str] = None
class GeneratePromptRequest(BaseModel):
messages: List[Message]
model: str = "PrimeIntellect/INTELLECT-3"
tools: Optional[List[Dict[str, TypingAny]]] = None
inject_tools_as_functions: bool = False
class GeneratePromptResponse(BaseModel):
prompt: str
model: str
app = FastAPI(title="HuggingFace Prompt Generator", version="1.0.0")
@app.post("/generate-prompt", response_model=GeneratePromptResponse)
async def generate_prompt(request: GeneratePromptRequest):
try:
messages = []
for msg in request.messages:
message_dict = {"role": msg.role}
if msg.content is not None:
message_dict["content"] = msg.content
if msg.tool_calls is not None:
tool_calls = []
for tc in msg.tool_calls:
tc_copy = tc.copy()
if "function" in tc_copy and "arguments" in tc_copy["function"]:
args = tc_copy["function"]["arguments"]
if isinstance(args, str):
try:
tc_copy["function"]["arguments"] = json.loads(args)
except json.JSONDecodeError:
pass
tool_calls.append(tc_copy)
message_dict["tool_calls"] = tool_calls
if msg.tool_call_id is not None:
message_dict["tool_call_id"] = msg.tool_call_id
messages.append(message_dict)
prompt = apply_template(request.model, messages, request.tools)
return GeneratePromptResponse(prompt=prompt, model=request.model)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "healthy"}
print(f"Starting server on http://{host}:{port}")
print("Endpoints:")
print(" POST /generate-prompt - Generate prompt from messages")
print(" GET /health - Health check")
uvicorn.run(app, host=host, port=port)
def main():
parser = argparse.ArgumentParser(
description="HuggingFace Prompt Testing Tool",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--model",
"-m",
type=str,
help="HuggingFace model name (e.g., PrimeIntellect/INTELLECT-3)",
)
parser.add_argument(
"--ollama-model",
"-o",
type=str,
help="Ollama model name to compare against (e.g., qwen3-coder)",
)
parser.add_argument(
"--ollama-host",
type=str,
default="http://localhost:11434",
help="Ollama server URL (default: http://localhost:11434)",
)
parser.add_argument(
"--serve",
"-s",
action="store_true",
help="Start FastAPI server for manual curl testing",
)
parser.add_argument(
"--port",
"-p",
type=int,
default=8000,
help="Server port (default: 8000)",
)
parser.add_argument(
"--show-template",
"-t",
action="store_true",
help="Show the chat template for the model",
)
parser.add_argument(
"--repr",
"-r",
action="store_true",
help="Output prompts as Python repr (shows escape sequences)",
)
parser.add_argument(
"--filter",
"-f",
type=str,
help="Filter tests by name (substring match)",
)
args = parser.parse_args()
if args.serve:
start_server(port=args.port)
elif args.model:
if args.show_template:
show_template(args.model)
else:
run_tests(
args.model,
as_repr=args.repr,
test_filter=args.filter,
ollama_model=args.ollama_model,
ollama_host=args.ollama_host,
)
else:
parser.print_help()
print("\nExample usage:")
print(" uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3")
print(" uv run cmd/chat_template/chat_template.py --model Qwen/Qwen3-Coder-480B-A35B-Instruct --ollama-model qwen3-coder")
print(" uv run cmd/chat_template/chat_template.py --serve")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,50 +0,0 @@
# eval
Evaluation tool for testing Ollama models.
## Usage
Run all tests:
```bash
go run . -model llama3.2:latest
```
Run specific suite:
```bash
go run . -model llama3.2:latest -suite tool-calling-basic -v
```
List available suites:
```bash
go run . -list
```
## Adding Tests
Edit `suites.go` to add new test suites. Each test needs:
- `Name`: test identifier
- `Prompt`: what to send to the model
- `Check`: function to validate the response
Example:
```go
{
Name: "my-test",
Prompt: "What is 2+2?",
Check: Contains("4"),
}
```
Available check functions:
- `HasResponse()` - response is non-empty
- `Contains(s)` - response contains substring
- `CallsTool(name)` - model called specific tool
- `NoTools()` - model called no tools
- `MinTools(n)` - model called at least n tools
- `All(checks...)` - all checks pass

View File

@@ -1,151 +0,0 @@
package main
import (
"context"
"strings"
"time"
"github.com/ollama/ollama/api"
)
// Test is a single evaluation test
type Test struct {
Name string
Prompt string
System string
Tools []api.Tool
Think bool
Options map[string]any
Check func(response string, tools []api.ToolCall) bool
}
// Suite is a collection of tests
type Suite struct {
Name string
Tests []Test
}
// Result holds test execution results
type Result struct {
Name string
Passed bool
Error error
Duration time.Duration
Response string
Tools []string
ToolCalls []api.ToolCall
Thinking bool
}
// Run executes a test against a model
func Run(ctx context.Context, client *api.Client, model string, test Test) Result {
result := Result{Name: test.Name}
req := &api.ChatRequest{
Model: model,
Messages: []api.Message{
{Role: "user", Content: test.Prompt},
},
Options: test.Options,
}
if test.System != "" {
req.Messages = append([]api.Message{
{Role: "system", Content: test.System},
}, req.Messages...)
}
if len(test.Tools) > 0 {
req.Tools = test.Tools
}
if test.Think {
req.Think = &api.ThinkValue{Value: true}
}
var resp strings.Builder
var toolCalls []api.ToolCall
start := time.Now()
err := client.Chat(ctx, req, func(r api.ChatResponse) error {
resp.WriteString(r.Message.Content)
if r.Message.Thinking != "" {
result.Thinking = true
}
toolCalls = append(toolCalls, r.Message.ToolCalls...)
return nil
})
result.Duration = time.Since(start)
if err != nil {
result.Error = err
return result
}
result.Response = resp.String()
result.Tools = uniqueToolNames(toolCalls)
result.ToolCalls = toolCalls
result.Passed = test.Check(result.Response, toolCalls)
return result
}
func uniqueToolNames(calls []api.ToolCall) []string {
seen := make(map[string]bool)
var names []string
for _, c := range calls {
if !seen[c.Function.Name] {
seen[c.Function.Name] = true
names = append(names, c.Function.Name)
}
}
return names
}
// Check functions for common test patterns
func HasResponse() func(string, []api.ToolCall) bool {
return func(resp string, _ []api.ToolCall) bool {
return strings.TrimSpace(resp) != ""
}
}
func Contains(s string) func(string, []api.ToolCall) bool {
return func(resp string, _ []api.ToolCall) bool {
return strings.Contains(strings.ToLower(resp), strings.ToLower(s))
}
}
func CallsTool(name string) func(string, []api.ToolCall) bool {
return func(_ string, tools []api.ToolCall) bool {
for _, t := range tools {
if t.Function.Name == name {
return true
}
}
return false
}
}
func NoTools() func(string, []api.ToolCall) bool {
return func(_ string, tools []api.ToolCall) bool {
return len(tools) == 0
}
}
func MinTools(n int) func(string, []api.ToolCall) bool {
return func(_ string, tools []api.ToolCall) bool {
return len(tools) >= n
}
}
func All(checks ...func(string, []api.ToolCall) bool) func(string, []api.ToolCall) bool {
return func(resp string, tools []api.ToolCall) bool {
for _, check := range checks {
if !check(resp, tools) {
return false
}
}
return true
}
}

View File

@@ -1,217 +0,0 @@
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
"strings"
"time"
"github.com/ollama/ollama/api"
)
func main() {
model := flag.String("model", "", "model to evaluate")
suite := flag.String("suite", "", "comma-separated list of suites to run (empty runs all)")
list := flag.Bool("list", false, "list available suites")
verbose := flag.Bool("v", false, "verbose output")
timeout := flag.Int("timeout", 60, "timeout per test in seconds")
export := flag.String("export", "eval-results.json", "export results to file")
flag.Parse()
if *list {
for _, s := range suites {
fmt.Printf("%s (%d tests)\n", s.Name, len(s.Tests))
}
return
}
if *model == "" {
fmt.Fprintf(os.Stderr, "error: -model parameter is required\n")
os.Exit(1)
}
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
if err := client.Heartbeat(ctx); err != nil {
cancel()
fmt.Fprintf(os.Stderr, "error: cannot connect to ollama\n")
os.Exit(1)
}
cancel()
selected := suites
if *suite != "" {
suiteNames := strings.Split(*suite, ",")
selected = []Suite{}
var notFound []string
for _, name := range suiteNames {
name = strings.TrimSpace(name)
if name == "" {
continue
}
found := false
for _, s := range suites {
if s.Name == name {
selected = append(selected, s)
found = true
break
}
}
if !found {
notFound = append(notFound, name)
}
}
if len(notFound) > 0 {
fmt.Fprintf(os.Stderr, "error: suite(s) not found: %s\n", strings.Join(notFound, ", "))
os.Exit(1)
}
}
var results []Result
for _, s := range selected {
if *verbose {
fmt.Printf("\n%s (%d tests)\n", s.Name, len(s.Tests))
}
for i, test := range s.Tests {
if test.Options == nil {
test.Options = map[string]any{"temperature": 0.1}
}
if test.Check == nil {
test.Check = HasResponse()
}
if *verbose {
fmt.Printf(" [%d/%d] %s... ", i+1, len(s.Tests), test.Name)
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*timeout)*time.Second)
result := Run(ctx, client, *model, test)
cancel()
results = append(results, result)
if *verbose {
if result.Error != nil {
fmt.Printf("ERROR: %v\n", result.Error)
} else if result.Passed {
fmt.Printf("PASS (%.2fs)", result.Duration.Seconds())
if len(result.Tools) > 0 || result.Thinking {
fmt.Printf(" [")
if len(result.Tools) > 0 {
fmt.Printf("tools: %s", strings.Join(result.Tools, ","))
}
if result.Thinking {
if len(result.Tools) > 0 {
fmt.Printf(", ")
}
fmt.Printf("thinking")
}
fmt.Printf("]")
}
fmt.Println()
// Print tool calls with details
if len(result.ToolCalls) > 0 {
fmt.Printf(" Tool Calls:\n")
for _, tc := range result.ToolCalls {
argsJSON, _ := json.Marshal(tc.Function.Arguments)
fmt.Printf(" - %s: %s\n", tc.Function.Name, string(argsJSON))
}
}
// Print response if there is one
if result.Response != "" {
fmt.Printf(" Response: %s\n", result.Response)
}
} else {
fmt.Printf("FAIL (%.2fs)\n", result.Duration.Seconds())
// Print tool calls with details even on failure
if len(result.ToolCalls) > 0 {
fmt.Printf(" Tool Calls:\n")
for _, tc := range result.ToolCalls {
argsJSON, _ := json.Marshal(tc.Function.Arguments)
fmt.Printf(" - %s: %s\n", tc.Function.Name, string(argsJSON))
}
}
// Print response even on failure
if result.Response != "" {
fmt.Printf(" Response: %s\n", result.Response)
}
}
}
}
}
printSummary(results)
if *export != "" {
if err := writeJSON(*export, results); err != nil {
fmt.Fprintf(os.Stderr, "warning: export failed: %v\n", err)
} else if *verbose {
fmt.Printf("\nResults: %s\n", *export)
}
}
if anyFailed(results) {
os.Exit(1)
}
}
func printSummary(results []Result) {
var passed, failed, errors int
for _, r := range results {
if r.Error != nil {
errors++
} else if r.Passed {
passed++
} else {
failed++
}
}
total := len(results)
rate := 0.0
if total > 0 {
rate = float64(passed) / float64(total) * 100
}
fmt.Printf("\n%d/%d passed (%.1f%%)", passed, total, rate)
if errors > 0 {
fmt.Printf(", %d errors", errors)
}
fmt.Println()
}
func anyFailed(results []Result) bool {
for _, r := range results {
if !r.Passed || r.Error != nil {
return true
}
}
return false
}
func writeJSON(path string, results []Result) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
return enc.Encode(results)
}

View File

@@ -1,178 +0,0 @@
package main
import "github.com/ollama/ollama/api"
var suites = []Suite{
{
Name: "basic-qa",
Tests: []Test{
{
Name: "simple-math",
Prompt: "What is 2+2? Reply with just the number.",
Check: Contains("4"),
},
{
Name: "capital-city",
Prompt: "What is the capital of France? Reply with just the city name.",
Check: Contains("Paris"),
},
{
Name: "greeting",
Prompt: "Say hello",
Check: HasResponse(),
},
},
},
{
Name: "reasoning",
Tests: []Test{
{
Name: "logic-puzzle",
Prompt: "If all roses are flowers and some flowers fade quickly, can we conclude that some roses fade quickly? Answer yes or no.",
Check: Contains("no"),
},
{
Name: "counting",
Prompt: "How many letters are in the word 'HELLO'?",
Check: Contains("5"),
},
},
},
{
Name: "instruction-following",
Tests: []Test{
{
Name: "json-output",
Prompt: "Reply with a JSON object containing a 'status' field set to 'ok'.",
Check: All(Contains("status"), Contains("ok")),
},
{
Name: "system-prompt",
Prompt: "What is your name?",
System: "You are a helpful assistant named TestBot. When asked your name, always respond with 'TestBot'.",
Check: Contains("TestBot"),
},
},
},
{
Name: "tool-calling-basic",
Tests: []Test{
{
Name: "single-tool",
Prompt: "What's the weather like in San Francisco?",
Tools: []api.Tool{weatherTool},
Check: CallsTool("get_weather"),
},
{
Name: "tool-selection",
Prompt: "What time is it in Tokyo?",
Tools: []api.Tool{weatherTool, timeTool},
Check: CallsTool("get_time"),
},
{
Name: "no-tool-needed",
Prompt: "What is 2+2?",
Tools: []api.Tool{weatherTool, timeTool},
Check: NoTools(),
},
},
},
{
Name: "tool-calling-advanced",
Tests: []Test{
{
Name: "parallel-calls",
Prompt: "Get the weather in both New York and Los Angeles.",
Tools: []api.Tool{weatherTool},
Check: All(CallsTool("get_weather"), MinTools(2)),
},
{
Name: "multi-param",
Prompt: "Search for Italian restaurants with prices between $20 and $40.",
Tools: []api.Tool{restaurantTool},
Check: CallsTool("search_restaurants"),
},
},
},
{
Name: "tool-calling-thinking",
Tests: []Test{
{
Name: "thinking-before-tool",
Prompt: "I need to know the weather in Paris before I decide what to pack.",
Tools: []api.Tool{weatherTool},
Think: true,
Check: CallsTool("get_weather"),
},
{
Name: "thinking-multi-tool",
Prompt: "I'm planning a trip to London. I need to know what time it is there and what the weather is like.",
Tools: []api.Tool{weatherTool, timeTool},
Think: true,
Check: MinTools(1),
},
},
},
}
var weatherTool = api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get the current weather in a given location",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {
Type: api.PropertyType{"string"},
Description: "The city and state",
},
},
},
},
}
var timeTool = api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: "get_time",
Description: "Get the current time in a timezone",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"timezone"},
Properties: map[string]api.ToolProperty{
"timezone": {
Type: api.PropertyType{"string"},
Description: "The timezone name",
},
},
},
},
}
var restaurantTool = api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: "search_restaurants",
Description: "Search for restaurants",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"cuisine"},
Properties: map[string]api.ToolProperty{
"cuisine": {
Type: api.PropertyType{"string"},
Description: "Type of cuisine",
},
"min_price": {
Type: api.PropertyType{"number"},
Description: "Minimum price",
},
"max_price": {
Type: api.PropertyType{"number"},
Description: "Maximum price",
},
},
},
},
}

View File

@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
}
slog.Info("discovering available GPUs...")
detectIncompatibleLibraries()
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
continue
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
continue
} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
continue
} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1")
continue
@@ -484,3 +488,16 @@ func overrideWarnings() {
slog.Warn("if GPUs are not correctly discovered, unset and try again")
}
}
func detectIncompatibleLibraries() {
if runtime.GOOS != "windows" {
return
}
basePath, err := exec.LookPath("ggml-base.dll")
if err != nil || basePath == "" {
return
}
if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
}
}

View File

@@ -57,8 +57,13 @@ ollama ps
```
<Info>
**Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
100% GPU 4 minutes from now ```
**Output**:
```
NAME ID SIZE PROCESSOR UNTIL
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
```
</Info>
The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation. You
- In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`
**MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.

View File

@@ -1,44 +0,0 @@
package parsers
import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/thinking"
)
// Intellect3Parser combines thinking support using
// the built-in thinking parser, with tool call support
// via qwen3-coder's parser.
type Intellect3Parser struct {
thinkingParser thinking.Parser
toolParser Qwen3CoderParser
}
func (p *Intellect3Parser) HasToolSupport() bool {
return true
}
func (p *Intellect3Parser) HasThinkingSupport() bool {
return true
}
func (p *Intellect3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
p.thinkingParser = thinking.Parser{
OpeningTag: "<think>",
ClosingTag: "</think>",
}
p.toolParser = Qwen3CoderParser{}
return p.toolParser.Init(tools, lastMessage, thinkValue)
}
func (p *Intellect3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
// First extract thinking content
thinkingContent, remainingContent := p.thinkingParser.AddContent(s)
// Then process the remaining content for tool calls
toolContent, _, toolCalls, err := p.toolParser.Add(remainingContent, done)
if err != nil {
return "", thinkingContent, nil, err
}
return toolContent, thinkingContent, toolCalls, nil
}

View File

@@ -1,542 +0,0 @@
package parsers
import (
"reflect"
"testing"
"github.com/ollama/ollama/api"
)
func TestIntellect3ParserThinkingOnly(t *testing.T) {
cases := []struct {
desc string
chunks []string
wantText string
wantThink string
}{
{
desc: "simple thinking content",
chunks: []string{"<think>I need to analyze this</think>Here is my response"},
wantText: "Here is my response",
wantThink: "I need to analyze this",
},
{
desc: "thinking with whitespace",
chunks: []string{"<think>\n Some thoughts \n</think>\n\nContent"},
wantText: "Content",
wantThink: "Some thoughts \n", // Thinking parser preserves internal whitespace
},
{
desc: "thinking only",
chunks: []string{"<think>Just thinking</think>"},
wantText: "",
wantThink: "Just thinking",
},
{
desc: "no thinking tags",
chunks: []string{"Just regular content"},
wantText: "Just regular content",
wantThink: "",
},
{
desc: "streaming thinking content",
chunks: []string{"<think>Fir", "st part", " second part</think>Content"},
wantText: "Content",
wantThink: "First part second part",
},
{
desc: "partial opening tag",
chunks: []string{"<thi", "nk>Thinking</think>Content"},
wantText: "Content",
wantThink: "Thinking",
},
{
desc: "partial closing tag",
chunks: []string{"<think>Thinking</thi", "nk>Content"},
wantText: "Content",
wantThink: "Thinking",
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Intellect3Parser{}
parser.Init(nil, nil, nil)
var gotText, gotThink string
for i, chunk := range tc.chunks {
isLast := i == len(tc.chunks)-1
text, think, calls, err := parser.Add(chunk, isLast)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
gotText += text
gotThink += think
if len(calls) > 0 {
t.Fatalf("expected no tool calls, got %v", calls)
}
}
if gotText != tc.wantText {
t.Errorf("content: got %q, want %q", gotText, tc.wantText)
}
if gotThink != tc.wantThink {
t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
}
})
}
}
func TestIntellect3ParserToolCallsOnly(t *testing.T) {
tools := []api.Tool{
tool("get_weather", map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
"unit": {Type: api.PropertyType{"string"}},
}),
}
cases := []struct {
desc string
chunks []string
wantText string
wantCalls []api.ToolCall
}{
{
desc: "simple tool call",
chunks: []string{
"Let me check the weather<tool_call><function=get_weather>\n<parameter=location>\nSan Francisco\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
},
wantText: "Let me check the weather",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "San Francisco",
"unit": "celsius",
},
},
},
},
},
{
desc: "tool call streaming",
chunks: []string{
"Checking<tool_call><function=get_wea",
"ther>\n<parameter=location>\nNew York\n</param", // nolint:all
"eter>\n<parameter=unit>\nfahrenheit\n</parameter>\n</function></tool_call>Done",
},
wantText: "CheckingDone",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "New York",
"unit": "fahrenheit",
},
},
},
},
},
{
desc: "multiple tool calls",
chunks: []string{
"<tool_call><function=get_weather>\n<parameter=location>\nBoston\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
"<tool_call><function=get_weather>\n<parameter=location>\nSeattle\n</parameter>\n<parameter=unit>\nfahrenheit\n</parameter>\n</function></tool_call>",
},
wantText: "",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Boston",
"unit": "celsius",
},
},
},
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Seattle",
"unit": "fahrenheit",
},
},
},
},
},
{
desc: "no tool calls",
chunks: []string{"Just regular content"},
wantText: "Just regular content",
wantCalls: nil,
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Intellect3Parser{}
parser.Init(tools, nil, nil)
var gotText string
var gotCalls []api.ToolCall
for i, chunk := range tc.chunks {
isLast := i == len(tc.chunks)-1
text, think, calls, err := parser.Add(chunk, isLast)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
gotText += text
gotCalls = append(gotCalls, calls...)
if think != "" {
t.Fatalf("expected no thinking, got %q", think)
}
}
if gotText != tc.wantText {
t.Errorf("content: got %q, want %q", gotText, tc.wantText)
}
if !reflect.DeepEqual(gotCalls, tc.wantCalls) {
t.Errorf("tool calls: got %#v, want %#v", gotCalls, tc.wantCalls)
}
})
}
}
func TestIntellect3ParserCombined(t *testing.T) {
tools := []api.Tool{
tool("get_weather", map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
"unit": {Type: api.PropertyType{"string"}},
}),
}
cases := []struct {
desc string
chunks []string
wantText string
wantThink string
wantCalls []api.ToolCall
}{
{
desc: "thinking then tool call",
chunks: []string{
"<think>Need to get weather data</think>Let me check<tool_call><function=get_weather>\n<parameter=location>\nParis\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
},
wantText: "Let me check",
wantThink: "Need to get weather data",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Paris",
"unit": "celsius",
},
},
},
},
},
{
desc: "thinking, tool call, and final content",
chunks: []string{
"<think>User wants weather info</think>Checking weather<tool_call><function=get_weather>\n<parameter=location>\nTokyo\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>Done!",
},
wantText: "Checking weatherDone!",
wantThink: "User wants weather info",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Tokyo",
"unit": "celsius",
},
},
},
},
},
{
desc: "streaming combined content",
chunks: []string{
"<think>Analyzing",
" the request</think>",
"Let me help<tool_call>",
"<function=get_weather>\n<parameter=location>\nLondon",
"\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function>",
"</tool_call>There you go!",
},
wantText: "Let me helpThere you go!",
wantThink: "Analyzing the request",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "London",
"unit": "celsius",
},
},
},
},
},
{
desc: "multiple tool calls with thinking",
chunks: []string{
"<think>Need multiple locations</think>",
"<tool_call><function=get_weather>\n<parameter=location>\nBoston\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
"and<tool_call><function=get_weather>\n<parameter=location>\nBerlin\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
},
wantText: "and",
wantThink: "Need multiple locations",
wantCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Boston",
"unit": "celsius",
},
},
},
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "Berlin",
"unit": "celsius",
},
},
},
},
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Intellect3Parser{}
parser.Init(tools, nil, nil)
var gotText, gotThink string
var gotCalls []api.ToolCall
for i, chunk := range tc.chunks {
isLast := i == len(tc.chunks)-1
text, think, calls, err := parser.Add(chunk, isLast)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
gotText += text
gotThink += think
gotCalls = append(gotCalls, calls...)
}
if gotText != tc.wantText {
t.Errorf("content: got %q, want %q", gotText, tc.wantText)
}
if gotThink != tc.wantThink {
t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
}
if !reflect.DeepEqual(gotCalls, tc.wantCalls) {
t.Errorf("tool calls: got %#v, want %#v", gotCalls, tc.wantCalls)
}
})
}
}
func TestIntellect3ParserEdgeCases(t *testing.T) {
tools := []api.Tool{
tool("test_func", map[string]api.ToolProperty{
"param": {Type: api.PropertyType{"string"}},
}),
}
cases := []struct {
desc string
chunks []string
wantText string
wantThink string
wantCalls int
}{
{
desc: "empty input",
chunks: []string{""},
wantText: "",
wantThink: "",
wantCalls: 0,
},
{
desc: "only whitespace",
chunks: []string{" \n \t "},
wantText: "",
wantThink: "",
wantCalls: 0,
},
{
desc: "unclosed thinking tag",
chunks: []string{"<think>Never closes"},
wantText: "",
wantThink: "Never closes",
wantCalls: 0,
},
{
desc: "unclosed tool call tag",
chunks: []string{"<tool_call><function=test_func>\n<parameter=param>\nvalue\n</parameter>\n</function>"},
wantText: "", // Qwen3CoderParser waits for closing tag, doesn't emit partial tool calls
wantThink: "",
wantCalls: 0, // Won't be parsed until </tool_call> is seen
},
{
desc: "unicode in thinking",
chunks: []string{"<think>思考中 🤔</think>答案是 42"},
wantText: "答案是 42",
wantThink: "思考中 🤔",
wantCalls: 0,
},
{
desc: "fake thinking tag",
chunks: []string{"<thinking>This is not the right tag</thinking>Content"},
wantText: "<thinking>This is not the right tag</thinking>Content",
wantThink: "",
wantCalls: 0,
},
{
desc: "fake tool call tag",
chunks: []string{"<tool>Not a tool call</tool>"},
wantText: "<tool>Not a tool call</tool>",
wantThink: "",
wantCalls: 0,
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Intellect3Parser{}
parser.Init(tools, nil, nil)
var gotText, gotThink string
var gotCalls []api.ToolCall
for i, chunk := range tc.chunks {
isLast := i == len(tc.chunks)-1
text, think, calls, err := parser.Add(chunk, isLast)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
gotText += text
gotThink += think
gotCalls = append(gotCalls, calls...)
}
if gotText != tc.wantText {
t.Errorf("content: got %q, want %q", gotText, tc.wantText)
}
if gotThink != tc.wantThink {
t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
}
if len(gotCalls) != tc.wantCalls {
t.Errorf("tool calls count: got %d, want %d", len(gotCalls), tc.wantCalls)
}
})
}
}
func TestIntellect3ParserCapabilities(t *testing.T) {
parser := Intellect3Parser{}
if !parser.HasToolSupport() {
t.Error("Intellect3Parser should have tool support")
}
if !parser.HasThinkingSupport() {
t.Error("Intellect3Parser should have thinking support")
}
}
func TestIntellect3ParserInit(t *testing.T) {
parser := Intellect3Parser{}
tools := []api.Tool{
tool("test", map[string]api.ToolProperty{
"param": {Type: api.PropertyType{"string"}},
}),
}
returnedTools := parser.Init(tools, nil, nil)
// Should return tools unchanged (delegated to Qwen3CoderParser)
if !reflect.DeepEqual(returnedTools, tools) {
t.Errorf("Init should return tools unchanged")
}
}
func TestIntellect3ParserWhitespaceHandling(t *testing.T) {
tools := []api.Tool{
tool("test", map[string]api.ToolProperty{
"param": {Type: api.PropertyType{"string"}},
}),
}
cases := []struct {
desc string
chunks []string
wantText string
wantThink string
}{
{
desc: "whitespace between thinking and content",
chunks: []string{"<think>Thinking</think>\n\n\nContent"},
wantText: "Content",
wantThink: "Thinking",
},
{
desc: "whitespace inside thinking tags",
chunks: []string{"<think> \n Thinking \n </think>Content"},
wantText: "Content",
wantThink: "Thinking \n ", // Thinking parser preserves internal whitespace
},
{
desc: "leading whitespace before thinking",
chunks: []string{" <think>Thinking</think>Content"},
wantText: "Content",
wantThink: "Thinking",
},
{
desc: "whitespace before tool call",
chunks: []string{"Text <tool_call><function=test>\n<parameter=param>\nvalue\n</parameter>\n</function></tool_call>"},
wantText: "Text",
wantThink: "",
},
{
desc: "whitespace after tool call",
chunks: []string{"<tool_call><function=test>\n<parameter=param>\nvalue\n</parameter>\n</function></tool_call> Text"},
wantText: "Text",
wantThink: "",
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Intellect3Parser{}
parser.Init(tools, nil, nil)
var gotText, gotThink string
for i, chunk := range tc.chunks {
isLast := i == len(tc.chunks)-1
text, think, _, err := parser.Add(chunk, isLast)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
gotText += text
gotThink += think
}
if gotText != tc.wantText {
t.Errorf("content: got %q, want %q", gotText, tc.wantText)
}
if gotThink != tc.wantThink {
t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
}
})
}
}

View File

@@ -54,8 +54,6 @@ func ParserForName(name string) Parser {
return harmony.NewHarmonyMessageHandler()
case "cogito":
return &CogitoParser{}
case "intellect-3":
return &Intellect3Parser{}
default:
return nil
}

View File

@@ -1,160 +0,0 @@
package renderers
import (
"strings"
"github.com/ollama/ollama/api"
)
type Intellect3Renderer struct{}
func (r *Intellect3Renderer) Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
var sb strings.Builder
// filter out system messages and choose the first (if any) to win
var systemMessage string
var filteredMessages []api.Message
for _, message := range messages {
if message.Role != "system" {
filteredMessages = append(filteredMessages, message)
continue
}
if systemMessage == "" {
systemMessage = message.Content
}
}
if systemMessage != "" || len(tools) > 0 {
sb.WriteString(imStartTag + "system\n")
sb.WriteString(systemMessage)
if len(tools) > 0 {
sb.WriteString("\n\n# Tools\n\nYou have access to the following functions:\n\n")
sb.WriteString("<tools>")
for _, tool := range tools {
sb.WriteString("\n")
sb.WriteString("<function>\n")
sb.WriteString("<name>" + tool.Function.Name + "</name>")
if tool.Function.Description != "" {
sb.WriteString("\n<description>" + tool.Function.Description + "</description>")
}
sb.WriteString("\n<parameters>")
for name, prop := range tool.Function.Parameters.Properties {
sb.WriteString("\n<parameter>")
sb.WriteString("\n<name>" + name + "</name>")
if len(prop.Type) > 0 {
sb.WriteString("\n<type>" + formatToolDefinitionType(prop.Type) + "</type>")
}
if prop.Description != "" {
sb.WriteString("\n<description>" + prop.Description + "</description>")
}
// Render any additional keys not already handled
handledKeys := map[string]bool{
"type": true,
"description": true,
}
sb.WriteString(renderAdditionalKeys(prop, handledKeys))
sb.WriteString("\n</parameter>")
}
// Render extra keys for parameters (everything except 'type' and 'properties')
paramHandledKeys := map[string]bool{
"type": true,
"properties": true,
}
sb.WriteString(renderAdditionalKeys(tool.Function.Parameters, paramHandledKeys))
sb.WriteString("\n</parameters>")
sb.WriteString("\n</function>")
}
sb.WriteString("\n</tools>")
sb.WriteString("\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>")
}
sb.WriteString(imEndTag + "\n")
}
for i, message := range filteredMessages {
lastMessage := i == len(filteredMessages)-1
prefill := lastMessage && message.Role == "assistant"
switch message.Role {
case "assistant":
if len(message.ToolCalls) > 0 {
sb.WriteString(imStartTag + "assistant")
// Add thinking tags if present
if message.Thinking != "" {
sb.WriteString("\n<think>" + strings.TrimSpace(message.Thinking) + "</think>")
}
if message.Content != "" {
sb.WriteString("\n" + strings.TrimSpace(message.Content) + "\n")
}
for _, toolCall := range message.ToolCalls {
sb.WriteString("\n<tool_call>\n<function=" + toolCall.Function.Name + ">")
for name, value := range toolCall.Function.Arguments {
valueStr := formatToolCallArgument(value)
sb.WriteString("\n<parameter=" + name + ">\n" + valueStr + "\n</parameter>")
}
sb.WriteString("\n</function>\n</tool_call>")
}
sb.WriteString("<|im_end|>\n")
} else {
sb.WriteString(imStartTag + "assistant")
// Add thinking tags if present
if message.Thinking != "" {
sb.WriteString("\n<think>" + strings.TrimSpace(message.Thinking) + "</think>")
}
// Add content if present
if message.Content != "" {
if message.Thinking != "" {
sb.WriteString("\n" + strings.TrimSpace(message.Content))
} else {
sb.WriteString("\n" + message.Content)
}
}
if !prefill {
sb.WriteString(imEndTag + "\n")
}
}
case "tool":
// consecutive tool responses should share a single `<im_start>user`, but
// have their own <tool_response> tags
// only start a new user block if this is the first tool response
if i == 0 || filteredMessages[i-1].Role != "tool" {
sb.WriteString(imStartTag + "user\n")
}
sb.WriteString("<tool_response>\n")
sb.WriteString(message.Content)
sb.WriteString("\n</tool_response>\n")
// close the user block only if this is the last tool response
if i == len(filteredMessages)-1 || filteredMessages[i+1].Role != "tool" {
sb.WriteString(imEndTag + "\n")
}
default:
sb.WriteString(imStartTag + message.Role + "\n")
sb.WriteString(message.Content)
sb.WriteString(imEndTag + "\n")
}
if lastMessage && !prefill {
sb.WriteString(imStartTag + "assistant\n<think>")
}
}
return sb.String(), nil
}

View File

@@ -59,9 +59,6 @@ func rendererForName(name string) Renderer {
case "cogito":
renderer := &CogitoRenderer{isThinking: true}
return renderer
case "intellect-3":
renderer := &Intellect3Renderer{}
return renderer
default:
return nil
}