Compare commits

...

16 Commits

Author SHA1 Message Date
ParthSareen
92af238208 wip 2025-12-02 12:17:36 -08:00
ParthSareen
7461faf651 script to render templates 2025-12-01 18:03:04 -08:00
Daniel Hiltgen
554172759c win: warn if ggml-base detected in PATH (#13289)
If the user has somehow installed another GGML based app which places a
ggml-base lib somewhere in their PATH, we can experience runtime problems
due to incompatibilities.  This change adds a warning message if we detect
a ggml-base outside of our install location to aid in troubleshooting.
2025-12-01 15:36:47 -08:00
Bruce MacDonald
5b6a8e6001 api/client: handle non-json streaming errors (#13007)
While processing the response stream during a chat or generation if an error is occurred it is parsed and returned to the user. The issue with the existing code is that this assumed the response would be valid JSON, which is not a safe assumption and caused cryptic error messages to be displayed due to parsing failures:
`invalid character 'i' looking for beginning of value`

This change updates the stream function to return the raw error string if it cant be parsed as JSON. This should help with debugging issues by making sure the actual error reaches the user.
2025-12-01 15:10:16 -08:00
Daniel Hiltgen
467bbc0dd5 jetpack: require exact match or skip cuda_jetpack* (#13288)
The cuda_jetpack libs will enumerate discrete GPUs on SBSA systems
which leads to runtime failures of missing kernels.  This fix
requires an exact match to enable jetpacks instead of relying on
enumeration to filter out supported libraries.
2025-12-01 12:48:16 -08:00
Jeffrey Morgan
6d9f9323c5 .gitattributes: add app/webview to linguist-vendored (#13274) 2025-11-29 23:46:10 -05:00
Ondrej Kokes
0c2489605d docs: fix output formatting in faq.mdx (#13231)
There were a few Markdown typos in one FAQ answer. It now renders as a proper ascii table.
2025-11-28 19:19:21 -05:00
EntropyYue
8b1b89a984 docs: remove deprecated parameters (#13237) 2025-11-26 11:03:09 +09:00
Eva H
47e272c35a app/cmd: update ollama help to navigate to ollama doc instead of github page (#13174) 2025-11-20 16:30:35 -05:00
Jeffrey Morgan
417a81fda3 app: open app instead of always navigating to / on connect (#13164) 2025-11-20 12:59:17 -08:00
Daniel Hiltgen
dba62ff3a5 discovery: fix cuda overlap case (#13176)
Recent refactoring introduced a regression for filtering cuda overlap to favor newest supported version.
2025-11-20 12:15:37 -08:00
Grace
d70e935526 Parser for Cogito v2 (#13145) 2025-11-19 17:21:07 -08:00
Michael Yang
5c1063df7f deepseek2: upgrade to run v3+ models (#13166)
the check for mla omits v3 and r1 which should not return unsupported.
instead check the tokenizer for compatibility
2025-11-19 17:05:39 -08:00
Jesse Gross
cb485b2019 kvcache: Run tests both with and without PermutedV
The causal cache can store data differently depending on what is
best for the backend. We should run tests both ways.
2025-11-19 16:45:30 -08:00
nicole pardal
b2af50960f nomic-embed: nomic-embed-text defaulted to ollama runner (#13144) 2025-11-19 13:03:44 -08:00
Michael Yang
eac5b8bfbd chore: mark vulkan shaders as vendored files 2025-11-19 12:01:23 -08:00
23 changed files with 2277 additions and 539 deletions

4
.gitattributes vendored
View File

@@ -15,8 +15,12 @@ ml/backend/**/*.cu linguist-vendored
ml/backend/**/*.cuh linguist-vendored
ml/backend/**/*.m linguist-vendored
ml/backend/**/*.metal linguist-vendored
ml/backend/**/*.comp linguist-vendored
ml/backend/**/*.glsl linguist-vendored
ml/backend/**/CMakeLists.txt linguist-vendored
app/webview linguist-vendored
llama/build-info.cpp linguist-generated
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

View File

@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
bts := scanner.Bytes()
if err := json.Unmarshal(bts, &errorResponse); err != nil {
return fmt.Errorf("unmarshal: %w", err)
if response.StatusCode >= http.StatusBadRequest {
return StatusError{
StatusCode: response.StatusCode,
Status: response.Status,
ErrorMessage: string(bts),
}
}
return errors.New(string(bts))
}
if response.StatusCode == http.StatusUnauthorized {

View File

@@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
type testError struct {
message string
statusCode int
raw bool // if true, write message as-is instead of JSON encoding
}
func (e testError) Error() string {
@@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
},
},
},
{
name: "plain text error response",
responses: []any{
"internal server error",
},
wantErr: "internal server error",
},
{
name: "HTML error page",
responses: []any{
"<html><body>404 Not Found</body></html>",
},
wantErr: "404 Not Found",
},
}
for _, tc := range testCases {
@@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
return
}
if str, ok := resp.(string); ok {
fmt.Fprintln(w, str)
flusher.Flush()
continue
}
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
@@ -173,9 +194,10 @@ func TestClientStream(t *testing.T) {
func TestClientDo(t *testing.T) {
testCases := []struct {
name string
response any
wantErr string
name string
response any
wantErr string
wantStatusCode int
}{
{
name: "immediate error response",
@@ -183,7 +205,8 @@ func TestClientDo(t *testing.T) {
message: "test error message",
statusCode: http.StatusBadRequest,
},
wantErr: "test error message",
wantErr: "test error message",
wantStatusCode: http.StatusBadRequest,
},
{
name: "server error response",
@@ -191,7 +214,8 @@ func TestClientDo(t *testing.T) {
message: "internal error",
statusCode: http.StatusInternalServerError,
},
wantErr: "internal error",
wantErr: "internal error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "successful response",
@@ -203,6 +227,26 @@ func TestClientDo(t *testing.T) {
Success: true,
},
},
{
name: "plain text error response",
response: testError{
message: "internal server error",
statusCode: http.StatusInternalServerError,
raw: true,
},
wantErr: "internal server error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "HTML error page",
response: testError{
message: "<html><body>404 Not Found</body></html>",
statusCode: http.StatusNotFound,
raw: true,
},
wantErr: "<html><body>404 Not Found</body></html>",
wantStatusCode: http.StatusNotFound,
},
}
for _, tc := range testCases {
@@ -210,11 +254,16 @@ func TestClientDo(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if errResp, ok := tc.response.(testError); ok {
w.WriteHeader(errResp.statusCode)
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
if !errResp.raw {
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
}
} else {
// Write raw message (simulates non-JSON error responses)
fmt.Fprint(w, errResp.message)
}
return
}
@@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
if tc.wantStatusCode != 0 {
if statusErr, ok := err.(StatusError); ok {
if statusErr.StatusCode != tc.wantStatusCode {
t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
}
} else {
t.Errorf("expected StatusError, got %T", err)
}
}
return
}

View File

@@ -397,8 +397,8 @@ func checkUserLoggedIn(uiServerPort int) bool {
// handleConnectURLScheme fetches the connect URL and opens it in the browser
func handleConnectURLScheme() {
if checkUserLoggedIn(uiServerPort) {
slog.Info("user is already logged in, opening settings instead")
sendUIRequestMessage("/")
slog.Info("user is already logged in, opening app instead")
showWindow(wv.webview.Window())
return
}
@@ -466,6 +466,8 @@ func handleURLSchemeInCurrentInstance(urlSchemeRequest string) {
if isConnect {
handleConnectURLScheme()
} else {
sendUIRequestMessage("/")
if wv.webview != nil {
showWindow(wv.webview.Window())
}
}
}

View File

@@ -24,27 +24,14 @@ bool firstTimeRun,startHidden; // Set in run before initialization
for (NSURL *url in urls) {
if ([url.scheme isEqualToString:@"ollama"]) {
NSString *path = url.path;
if (!path || [path isEqualToString:@""]) {
// For URLs like ollama://settings (without triple slash),
// the "settings" part is parsed as the host, not the path.
// We need to convert it to a path by prepending "/"
if (url.host && ![url.host isEqualToString:@""]) {
path = [@"/" stringByAppendingString:url.host];
} else {
path = @"/";
}
}
if ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"]) {
if (path && ([path isEqualToString:@"/connect"] || [url.host isEqualToString:@"connect"])) {
// Special case: handle connect by opening browser instead of app
handleConnectURL();
} else {
// Set app to be active and visible
[NSApp setActivationPolicy:NSApplicationActivationPolicyRegular];
[NSApp activateIgnoringOtherApps:YES];
// Open the path with the UI
[self uiRequest:path];
}
break;
@@ -260,7 +247,7 @@ bool firstTimeRun,startHidden; // Set in run before initialization
}
- (void)openHelp:(id)sender {
NSURL *url = [NSURL URLWithString:@"https://github.com/ollama/ollama/tree/main/docs"];
NSURL *url = [NSURL URLWithString:@"https://docs.ollama.com/"];
[[NSWorkspace sharedWorkspace] openURL:url];
}

View File

@@ -147,7 +147,9 @@ func handleURLSchemeRequest(urlScheme string) {
if isConnect {
handleConnectURLScheme()
} else {
sendUIRequestMessage("/")
if wv.webview != nil {
showWindow(wv.webview.Window())
}
}
}

View File

@@ -0,0 +1,625 @@
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "transformers>=4.57.0",
# "jinja2",
# "fastapi",
# "uvicorn",
# "pydantic",
# "requests",
# ]
# ///
"""
Chat Template Testing Tool
Test HuggingFace chat templates against Ollama renderers.
Usage:
# Run predefined test cases against a HuggingFace model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3
# Compare HuggingFace output with Ollama renderer
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --ollama-model intellect3
# Start server for manual curl testing
uv run cmd/chat_template/chat_template.py --serve
# Show chat template for a model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --show-template
"""
import argparse
import json
import sys
from typing import Any
from transformers import AutoTokenizer
TEST_CASES = [
{
"name": "basic_user_message",
"messages": [{"role": "user", "content": "Hello!"}],
"tools": None,
},
{
"name": "with_system_message",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
"tools": None,
},
{
"name": "multi_turn_conversation",
"messages": [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"},
],
"tools": None,
},
{
"name": "with_tools",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the weather?"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "tool_call_and_response",
"messages": [
{"role": "user", "content": "What is the weather in SF?"},
{
"role": "assistant",
"content": "Let me check the weather.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "parallel_tool_calls",
"messages": [
{"role": "user", "content": "Get weather in SF and NYC"},
{
"role": "assistant",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
},
{
"id": "call_2",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "New York"},
},
},
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
{"role": "tool", "content": '{"temperature": 55}', "tool_call_id": "call_2"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
# Thinking tests
{
"name": "assistant_with_thinking",
"messages": [
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": "The answer is 4.",
"thinking": "Let me calculate: 2 + 2 = 4. This is basic arithmetic.",
},
{"role": "user", "content": "And 3+3?"},
],
"tools": None,
},
{
"name": "thinking_with_tool_call",
"messages": [
{"role": "user", "content": "What's the weather in Paris?"},
{
"role": "assistant",
"content": "I'll check the weather for you.",
"thinking": "The user wants to know the weather in Paris. I should call the get_weather function.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "Paris"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 18, "condition": "cloudy"}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
{
"name": "thinking_only_no_content",
"messages": [
{"role": "user", "content": "Think about this silently."},
{
"role": "assistant",
"content": "", # HuggingFace requires content field
"thinking": "I'm thinking about this but won't respond with visible content.",
},
{"role": "user", "content": "What did you think?"},
],
"tools": None,
},
]
# Cache for tokenizers
_tokenizer_cache: dict[str, Any] = {}
def get_tokenizer(model_name: str):
"""Get or create tokenizer for the given model."""
if model_name not in _tokenizer_cache:
print(f"Loading tokenizer for {model_name}...", file=sys.stderr)
_tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(model_name)
return _tokenizer_cache[model_name]
def apply_template(
model: str,
messages: list[dict],
tools: list[dict] | None = None,
) -> str:
"""Apply HuggingFace chat template to messages."""
tokenizer = get_tokenizer(model)
if tools:
return tokenizer.apply_chat_template(
messages,
tools=tools,
tokenize=False,
add_generation_prompt=True,
)
else:
return tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
def get_ollama_prompt(
ollama_model: str,
messages: list[dict],
tools: list[dict] | None = None,
ollama_host: str = "http://localhost:11434",
) -> str | None:
"""Get rendered prompt from Ollama using debug_render_only."""
import requests
# Convert messages to Ollama format
ollama_messages = []
for msg in messages:
ollama_msg = {"role": msg["role"]}
if "content" in msg:
ollama_msg["content"] = msg["content"]
if "thinking" in msg:
ollama_msg["thinking"] = msg["thinking"]
if "tool_calls" in msg:
# Convert tool_calls to Ollama format
tool_calls = []
for tc in msg["tool_calls"]:
tool_call = {
"function": {
"name": tc["function"]["name"],
"arguments": tc["function"]["arguments"],
}
}
if "id" in tc:
tool_call["id"] = tc["id"]
tool_calls.append(tool_call)
ollama_msg["tool_calls"] = tool_calls
if "tool_call_id" in msg:
ollama_msg["tool_call_id"] = msg["tool_call_id"]
ollama_messages.append(ollama_msg)
payload = {
"model": ollama_model,
"messages": ollama_messages,
"stream": False,
"_debug_render_only": True,
}
if tools:
payload["tools"] = tools
try:
resp = requests.post(f"{ollama_host}/api/chat", json=payload, timeout=30)
resp.raise_for_status()
data = resp.json()
# Field name is _debug_info with underscore prefix
if "_debug_info" in data and "rendered_template" in data["_debug_info"]:
return data["_debug_info"]["rendered_template"]
return None
except requests.exceptions.ConnectionError:
print(f" [ERROR] Cannot connect to Ollama at {ollama_host}", file=sys.stderr)
return None
except Exception as e:
print(f" [ERROR] Ollama request failed: {e}", file=sys.stderr)
return None
def compute_diff(hf_prompt: str, ollama_prompt: str) -> str:
"""Compute a unified diff between HuggingFace and Ollama prompts."""
import difflib
hf_lines = hf_prompt.splitlines(keepends=True)
ollama_lines = ollama_prompt.splitlines(keepends=True)
diff = difflib.unified_diff(
ollama_lines,
hf_lines,
fromfile="Ollama",
tofile="HuggingFace",
lineterm="",
)
return "".join(diff)
def print_test_output(
name: str,
messages: list[dict],
tools: list[dict] | None,
hf_prompt: str,
ollama_prompt: str | None = None,
as_repr: bool = False,
):
"""Print test output in a format suitable for Go test creation and LLM diffing."""
print(f"\n{'='*60}")
print(f"Test: {name}")
print("=" * 60)
print("\n--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print("\n--- Tools ---")
print(json.dumps(tools, indent=2))
if ollama_prompt is not None:
# Comparison mode
if hf_prompt == ollama_prompt:
print("\n--- Result: MATCH ---")
print("\n--- Prompt (both identical) ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
else:
print("\n--- Result: MISMATCH ---")
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("\n--- Ollama Prompt ---")
if as_repr:
print(repr(ollama_prompt))
else:
print(ollama_prompt)
print("\n--- Diff (Ollama -> HuggingFace) ---")
diff = compute_diff(hf_prompt, ollama_prompt)
if diff:
print(diff)
else:
print("(no line-level diff, check whitespace)")
else:
# HuggingFace only mode
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("=" * 60)
def run_tests(
model: str,
as_repr: bool = False,
test_filter: str | None = None,
ollama_model: str | None = None,
ollama_host: str = "http://localhost:11434",
):
"""Run all predefined test cases against a model."""
if ollama_model:
print(f"\nComparing HuggingFace ({model}) vs Ollama ({ollama_model})\n")
else:
print(f"\nRunning tests against: {model}\n")
matches = 0
mismatches = 0
errors = 0
for test_case in TEST_CASES:
name = test_case["name"]
messages = test_case["messages"]
tools = test_case["tools"]
# Filter tests if specified
if test_filter and test_filter.lower() not in name.lower():
continue
try:
hf_prompt = apply_template(model, messages, tools)
ollama_prompt = None
if ollama_model:
ollama_prompt = get_ollama_prompt(
ollama_model, messages, tools, ollama_host
)
if ollama_prompt is None:
errors += 1
elif hf_prompt == ollama_prompt:
matches += 1
else:
mismatches += 1
print_test_output(
name, messages, tools, hf_prompt, ollama_prompt, as_repr=as_repr
)
except Exception as e:
errors += 1
print(f"\n{'='*60}")
print(f"Test: {name} - FAILED")
print(f"--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print(f"--- Tools ---")
print(json.dumps(tools, indent=2))
print(f"--- Error ---")
print(f"{e}")
print("=" * 60)
# Print summary if comparing
if ollama_model:
total = matches + mismatches + errors
print(f"\n{'='*60}")
print("SUMMARY")
print("=" * 60)
print(f" Total: {total}")
print(f" Matches: {matches}")
print(f" Mismatches: {mismatches}")
print(f" Errors: {errors}")
print("=" * 60)
def show_template(model: str):
"""Show the chat template for a model."""
tokenizer = get_tokenizer(model)
print(f"\nChat template for {model}:\n")
print("-" * 60)
print(tokenizer.chat_template)
print("-" * 60)
def start_server(host: str = "0.0.0.0", port: int = 8000):
"""Start the FastAPI server for manual testing."""
from typing import Optional, List, Dict, Any as TypingAny
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
class Message(BaseModel):
role: str
content: Optional[str] = None
tool_calls: Optional[List[Dict[str, TypingAny]]] = None
tool_call_id: Optional[str] = None
class GeneratePromptRequest(BaseModel):
messages: List[Message]
model: str = "PrimeIntellect/INTELLECT-3"
tools: Optional[List[Dict[str, TypingAny]]] = None
inject_tools_as_functions: bool = False
class GeneratePromptResponse(BaseModel):
prompt: str
model: str
app = FastAPI(title="HuggingFace Prompt Generator", version="1.0.0")
@app.post("/generate-prompt", response_model=GeneratePromptResponse)
async def generate_prompt(request: GeneratePromptRequest):
try:
messages = []
for msg in request.messages:
message_dict = {"role": msg.role}
if msg.content is not None:
message_dict["content"] = msg.content
if msg.tool_calls is not None:
tool_calls = []
for tc in msg.tool_calls:
tc_copy = tc.copy()
if "function" in tc_copy and "arguments" in tc_copy["function"]:
args = tc_copy["function"]["arguments"]
if isinstance(args, str):
try:
tc_copy["function"]["arguments"] = json.loads(args)
except json.JSONDecodeError:
pass
tool_calls.append(tc_copy)
message_dict["tool_calls"] = tool_calls
if msg.tool_call_id is not None:
message_dict["tool_call_id"] = msg.tool_call_id
messages.append(message_dict)
prompt = apply_template(request.model, messages, request.tools)
return GeneratePromptResponse(prompt=prompt, model=request.model)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "healthy"}
print(f"Starting server on http://{host}:{port}")
print("Endpoints:")
print(" POST /generate-prompt - Generate prompt from messages")
print(" GET /health - Health check")
uvicorn.run(app, host=host, port=port)
def main():
parser = argparse.ArgumentParser(
description="HuggingFace Prompt Testing Tool",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--model",
"-m",
type=str,
help="HuggingFace model name (e.g., PrimeIntellect/INTELLECT-3)",
)
parser.add_argument(
"--ollama-model",
"-o",
type=str,
help="Ollama model name to compare against (e.g., qwen3-coder)",
)
parser.add_argument(
"--ollama-host",
type=str,
default="http://localhost:11434",
help="Ollama server URL (default: http://localhost:11434)",
)
parser.add_argument(
"--serve",
"-s",
action="store_true",
help="Start FastAPI server for manual curl testing",
)
parser.add_argument(
"--port",
"-p",
type=int,
default=8000,
help="Server port (default: 8000)",
)
parser.add_argument(
"--show-template",
"-t",
action="store_true",
help="Show the chat template for the model",
)
parser.add_argument(
"--repr",
"-r",
action="store_true",
help="Output prompts as Python repr (shows escape sequences)",
)
parser.add_argument(
"--filter",
"-f",
type=str,
help="Filter tests by name (substring match)",
)
args = parser.parse_args()
if args.serve:
start_server(port=args.port)
elif args.model:
if args.show_template:
show_template(args.model)
else:
run_tests(
args.model,
as_repr=args.repr,
test_filter=args.filter,
ollama_model=args.ollama_model,
ollama_host=args.ollama_host,
)
else:
parser.print_help()
print("\nExample usage:")
print(" uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3")
print(" uv run cmd/chat_template/chat_template.py --model Qwen/Qwen3-Coder-480B-A35B-Instruct --ollama-model qwen3-coder")
print(" uv run cmd/chat_template/chat_template.py --serve")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
}
slog.Info("discovering available GPUs...")
detectIncompatibleLibraries()
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
continue
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
continue
} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
continue
} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1")
continue
@@ -125,10 +129,20 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
supportedMu := sync.Mutex{}
supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
for i := range devices {
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
if !devices[i].NeedsInitValidation() {
// No need to validate, add to the supported map
supportedMu.Lock()
if _, ok := supported[devices[i].Library]; !ok {
supported[devices[i].Library] = make(map[string]map[string]int)
}
if _, ok := supported[devices[i].Library][libDir]; !ok {
supported[devices[i].Library][libDir] = make(map[string]int)
}
supported[devices[i].Library][libDir][devices[i].ID] = i
supportedMu.Unlock()
continue
}
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
slog.Debug("verifying if device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
wg.Add(1)
go func(i int) {
@@ -474,3 +488,16 @@ func overrideWarnings() {
slog.Warn("if GPUs are not correctly discovered, unset and try again")
}
}
func detectIncompatibleLibraries() {
if runtime.GOOS != "windows" {
return
}
basePath, err := exec.LookPath("ggml-base.dll")
if err != nil || basePath == "" {
return
}
if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
}
}

View File

@@ -57,8 +57,13 @@ ollama ps
```
<Info>
**Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
100% GPU 4 minutes from now ```
**Output**:
```
NAME ID SIZE PROCESSOR UNTIL
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
```
</Info>
The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation. You
- In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`
**MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.

View File

@@ -149,9 +149,6 @@ PARAMETER <parameter> <parametervalue>
| Parameter | Description | Value Type | Example Usage |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
| mirostat | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) | int | mirostat 0 |
| mirostat_eta | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1) | float | mirostat_eta 0.1 |
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |

View File

@@ -251,6 +251,7 @@ func (kv KV) OllamaEngineRequired() bool {
"qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
"nomic-bert",
}, kv.Architecture())
}

View File

@@ -388,9 +388,9 @@ func NewFunctionNameMap() *FunctionNameMap {
}
}
// Init initializes the handler with tools and optional last message
// Init initializes the handler with tools, optional last message, and think value
// Implements the Parser interface
func (h *HarmonyMessageHandler) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
func (h *HarmonyMessageHandler) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
// Initialize the harmony parser
if h.HarmonyParser == nil {
h.HarmonyParser = &HarmonyParser{

View File

File diff suppressed because it is too large Load Diff

View File

@@ -236,11 +236,6 @@ type Model struct {
}
func New(c fs.Config) (model.Model, error) {
if c.Uint("attention.key_length_mla") == 0 {
// non-MLA models aren't yet supported
return nil, model.ErrUnsupportedModel
}
layers := make([]Layer, c.Uint("block_count"))
firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))
@@ -259,6 +254,30 @@ func New(c fs.Config) (model.Model, error) {
keyLength := int(cmp.Or(c.Uint("attention.key_length_mla"), c.Uint("attention.key_length")))
valueLength := int(cmp.Or(c.Uint("attention.value_length_mla"), c.Uint("attention.value_length")))
var pre []string
switch c.String("tokenizer.ggml.pre") {
case "deepseek-v3":
pre = []string{
// Split regex into multiple parts (according to DeepSeek3's regex)
"\\p{N}{1,3}",
`[一-龥぀-ゟ゠-ヿ]+`,
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
}
case "deepseek-llm":
// TODO: these models haven't been vetted so skip for now
// pre = []string{
// "[\r\n]",
// "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿDŽ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ--ℝℤΩℨK--ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿff-stﬓ-ﬗA--z𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
// "\\s?[!-/:-~---‟ -。]+",
// "\\s+$",
// "[一-龥ࠀ-一가-퟿]+",
// "[0-9]",
// }
fallthrough
default:
return nil, model.ErrUnsupportedTokenizer
}
m := Model{
BytePairEncoding: model.NewBytePairEncoding(
&model.Vocabulary{
@@ -273,10 +292,7 @@ func New(c fs.Config) (model.Model, error) {
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
},
// Split regex into multiple parts (according to DeepSeek3's regex)
"\\p{N}{1,3}",
`[一-龥぀-ゟ゠-ヿ]+`,
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
pre...,
),
Layers: layers,
Options: &Options{

319
model/parsers/cogito.go Normal file
View File

@@ -0,0 +1,319 @@
package parsers
import (
"encoding/json"
"errors"
"log/slog"
"strings"
"unicode"
"github.com/ollama/ollama/api"
)
type CogitoParserState int
const (
CogitoCollectingThinking CogitoParserState = iota
CogitoCollectingContent
CogitoCollectingToolCalls
CogitoCollectingToolOutput
)
const (
cogitoThinkingCloseTag = "</think>"
cogitoToolCallsBeginTag = "<tool▁calls▁begin>"
cogitoToolCallsEndTag = "<tool▁calls▁end>"
cogitoToolCallBeginTag = "<tool▁call▁begin>"
cogitoToolCallEndTag = "<tool▁call▁end>"
cogitoToolSepTag = "<tool▁sep>"
cogitoToolOutputBeginTag = "<tool▁output▁begin>"
cogitoToolOutputEndTag = "<tool▁output▁end>"
cogitoToolOutputsBeginTag = "<tool▁outputs▁begin>"
cogitoToolOutputsEndTag = "<tool▁outputs▁end>"
)
type CogitoParser struct {
state CogitoParserState
buffer strings.Builder
}
func (p *CogitoParser) HasToolSupport() bool {
return true
}
func (p *CogitoParser) HasThinkingSupport() bool {
return true
}
func (p *CogitoParser) setInitialState(lastMessage *api.Message, tools []api.Tool, thinkValue *api.ThinkValue) {
prefill := lastMessage != nil && lastMessage.Role == "assistant"
// Check both model capability AND request preference
thinkingEnabled := thinkValue != nil && thinkValue.Bool()
// thinkingEnabled should be set to false for tools
if !thinkingEnabled {
p.state = CogitoCollectingContent
return
}
if prefill && lastMessage.Content != "" {
p.state = CogitoCollectingContent
return
}
// Note: for cogito, if there are tools, then we don't want to be thinking
if len(tools) > 0 {
p.state = CogitoCollectingContent
return
}
p.state = CogitoCollectingThinking
}
func (p *CogitoParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
p.setInitialState(lastMessage, tools, thinkValue)
return tools
}
type cogitoEvent interface {
isCogitoEvent()
}
type cogitoEventThinkingContent struct {
content string
}
type cogitoEventContent struct {
content string
}
type cogitoEventToolCall struct {
toolCall api.ToolCall
}
func (cogitoEventThinkingContent) isCogitoEvent() {}
func (cogitoEventContent) isCogitoEvent() {}
func (cogitoEventToolCall) isCogitoEvent() {}
func (p *CogitoParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
p.buffer.WriteString(s)
events := p.parseEvents()
var toolCalls []api.ToolCall
var contentSb strings.Builder
var thinkingSb strings.Builder
for _, event := range events {
switch event := event.(type) {
case cogitoEventToolCall:
toolCalls = append(toolCalls, event.toolCall)
case cogitoEventThinkingContent:
thinkingSb.WriteString(event.content)
case cogitoEventContent:
contentSb.WriteString(event.content)
}
}
return contentSb.String(), thinkingSb.String(), toolCalls, nil
}
func (p *CogitoParser) parseEvents() []cogitoEvent {
var all []cogitoEvent
keepLooping := true
for keepLooping {
var events []cogitoEvent
events, keepLooping = p.eat()
if len(events) > 0 {
all = append(all, events...)
}
}
return all
}
func (p *CogitoParser) eat() ([]cogitoEvent, bool) {
var events []cogitoEvent
bufStr := p.buffer.String()
if bufStr == "" {
return events, false
}
switch p.state {
case CogitoCollectingThinking:
if strings.Contains(bufStr, cogitoThinkingCloseTag) { // thinking[</think>] -> content
split := strings.SplitN(bufStr, cogitoThinkingCloseTag, 2)
thinking := split[0]
thinking = strings.TrimRightFunc(thinking, unicode.IsSpace)
remaining := split[1]
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = CogitoCollectingContent
if len(thinking) > 0 {
events = append(events, cogitoEventThinkingContent{content: thinking})
}
return events, true
} else if overlapLen := overlap(bufStr, cogitoThinkingCloseTag); overlapLen > 0 { // partial </think>
beforePartialTag := bufStr[:len(bufStr)-overlapLen]
trailingLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingLen
unambiguous := bufStr[:ambiguousStart]
ambiguous := bufStr[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, cogitoEventThinkingContent{content: unambiguous})
}
return events, false
} else { // otherwise its thinking content
whitespaceLen := trailingWhitespaceLen(bufStr)
ambiguousStart := len(bufStr) - whitespaceLen
unambiguous := bufStr[:ambiguousStart]
ambiguous := bufStr[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, cogitoEventThinkingContent{content: unambiguous})
}
return events, false
}
case CogitoCollectingContent:
switch {
case strings.Contains(bufStr, cogitoToolCallsBeginTag): // content[<tool▁calls▁begin>] -> tool calls
split := strings.SplitN(bufStr, cogitoToolCallsBeginTag, 2)
contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
remaining := split[1]
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = CogitoCollectingToolCalls
if len(contentBefore) > 0 {
events = append(events, cogitoEventContent{content: contentBefore})
}
return events, true
case strings.Contains(bufStr, cogitoToolOutputsBeginTag): // content[<tool▁outputs▁begin>] -> tool outputs
split := strings.SplitN(bufStr, cogitoToolOutputsBeginTag, 2)
contentBefore := strings.TrimRightFunc(split[0], unicode.IsSpace)
remaining := split[1]
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = CogitoCollectingToolOutput
if len(contentBefore) > 0 {
events = append(events, cogitoEventContent{content: contentBefore})
}
return events, true
default: // otherwise its content
p.buffer.Reset()
if len(bufStr) > 0 {
events = append(events, cogitoEventContent{content: bufStr})
}
return events, false
}
case CogitoCollectingToolCalls:
if idx := strings.Index(bufStr, cogitoToolCallBeginTag); idx != -1 {
startIdx := idx + len(cogitoToolCallBeginTag)
if endIdx := strings.Index(bufStr[startIdx:], cogitoToolCallEndTag); endIdx != -1 {
toolCallContent := bufStr[startIdx : startIdx+endIdx]
if toolCall, err := p.parseToolCallContent(toolCallContent); err == nil {
remaining := bufStr[startIdx+endIdx+len(cogitoToolCallEndTag):]
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
p.buffer.Reset()
p.buffer.WriteString(remaining)
events = append(events, cogitoEventToolCall{toolCall: toolCall})
return events, true
} else {
slog.Warn("cogito tool call parsing failed", "error", err)
}
}
}
if idx := strings.Index(bufStr, cogitoToolCallsEndTag); idx != -1 {
remaining := bufStr[idx+len(cogitoToolCallsEndTag):]
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = CogitoCollectingContent
return events, true
}
return events, false
case CogitoCollectingToolOutput:
if idx := strings.Index(bufStr, cogitoToolOutputBeginTag); idx != -1 {
startIdx := idx + len(cogitoToolOutputBeginTag)
if endIdx := strings.Index(bufStr[startIdx:], cogitoToolOutputEndTag); endIdx != -1 {
remaining := bufStr[startIdx+endIdx+len(cogitoToolOutputEndTag):]
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
p.buffer.Reset()
p.buffer.WriteString(remaining)
return events, true
}
}
if idx := strings.Index(bufStr, cogitoToolOutputsEndTag); idx != -1 {
remaining := bufStr[idx+len(cogitoToolOutputsEndTag):]
remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = CogitoCollectingContent
return events, true
}
return events, false
}
return events, false
}
func (p *CogitoParser) parseToolCallContent(content string) (api.ToolCall, error) {
// Expected format: function<tool▁sep>tool_name\n```json\n{args}\n```
parts := strings.SplitN(content, cogitoToolSepTag, 2)
if len(parts) < 2 {
return api.ToolCall{}, errors.New("invalid format")
}
nameAndArgs := parts[1]
jsonStart := strings.Index(nameAndArgs, "\n```json\n")
if jsonStart == -1 {
return api.ToolCall{}, errors.New("invalid format")
}
toolName := strings.TrimSpace(nameAndArgs[:jsonStart])
jsonContent := nameAndArgs[jsonStart+len("\n```json\n"):]
jsonEnd := strings.Index(jsonContent, "\n```")
if jsonEnd == -1 {
return api.ToolCall{}, errors.New("invalid format")
}
argsJSON := jsonContent[:jsonEnd]
var args api.ToolCallFunctionArguments
if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
return api.ToolCall{}, err
}
return api.ToolCall{
Function: api.ToolCallFunction{
Name: toolName,
Arguments: args,
},
}, nil
}

View File

@@ -0,0 +1,565 @@
package parsers
import (
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestCogitoParser(t *testing.T) {
tests := []struct {
name string
input string
expectedContent string
expectedThinking string
expectedToolCalls []api.ToolCall
tools []api.Tool
lastMessage *api.Message
}{
{
name: "simple_content",
input: "This is a simple response.",
expectedContent: "This is a simple response.",
expectedThinking: "",
},
{
name: "thinking_only",
input: "This is thinking content.</think>This is response content.",
expectedContent: "This is response content.",
expectedThinking: "This is thinking content.",
},
{
name: "tool_call_simple",
input: `<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end><tool▁calls▁end>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Parameters: api.ToolFunctionParameters{
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
},
},
},
},
},
},
{
name: "thinking_with_tool_call",
input: `I need to check the weather.</think><tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end><tool▁calls▁end>`,
expectedContent: "I need to check the weather.</think>",
expectedThinking: "", // No thinking when tools are present (Cogito-specific behavior)
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Parameters: api.ToolFunctionParameters{
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
},
},
},
},
},
},
{
name: "multiple_tool_calls",
input: `<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```" + `<tool▁call▁end>
<tool▁call▁begin>function<tool▁sep>get_weather
` + "```json\n" + `{"location":"London"}
` + "```" + `<tool▁call▁end><tool▁calls▁end>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "London",
},
},
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Parameters: api.ToolFunctionParameters{
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
},
},
},
},
},
},
{
name: "complex_tool_arguments",
input: `<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>process_data
` + "```json\n" + `{"items":["item1","item2"],"config":{"enabled":true,"threshold":0.95},"count":42}
` + "```" + `<tool▁call▁end><tool▁calls▁end>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "process_data",
Arguments: api.ToolCallFunctionArguments{
"items": []any{"item1", "item2"},
"config": map[string]any{"enabled": true, "threshold": 0.95},
"count": 42.0,
},
},
},
},
},
{
name: "tool_output_parsing",
input: `<tool▁outputs▁begin><tool▁output▁begin>{"temperature": 22, "condition": "sunny"}<tool▁output▁end><tool▁outputs▁end>`,
expectedContent: "",
expectedThinking: "",
},
{
name: "thinking_with_multiline_content",
input: `This is line 1
This is line 2
This is line 3</think>Final response here.`,
expectedContent: "Final response here.",
expectedThinking: "This is line 1\nThis is line 2\nThis is line 3",
},
{
name: "no_thinking_simple",
input: "This is content.",
expectedContent: "This is content.",
expectedThinking: "",
},
{
name: "prefill_content_only",
input: "Continuing from previous content.",
expectedContent: "Continuing from previous content.",
lastMessage: &api.Message{
Role: "assistant",
Content: "Previous content",
},
},
{
name: "prefill_with_thinking",
input: "Continuing thinking</think>Continuing content.",
expectedContent: "Continuing content.",
expectedThinking: "Continuing thinking",
lastMessage: &api.Message{
Role: "assistant",
},
},
// Edge cases
{
name: "nested_think_tags_in_thinking",
input: "I'm thinking <think>nested</think> more thinking</think>Final content.",
expectedContent: "more thinking</think>Final content.",
expectedThinking: "I'm thinking <think>nested",
},
{
name: "multiple_think_close_tags",
input: "First thinking</think>Content</think>More content.",
expectedContent: "Content</think>More content.",
expectedThinking: "First thinking",
},
{
name: "empty_thinking_content",
input: "</think>Just content here.",
expectedContent: "</think>Just content here.",
expectedThinking: "",
},
{
name: "thinking_disabled_with_think_tags",
input: "Content with </think> tags should be treated as content.",
expectedContent: "Content with </think> tags should be treated as content.",
expectedThinking: "",
lastMessage: &api.Message{
Role: "assistant",
Content: "existing", // Forces non-thinking mode
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Use thinking-enabled parser for tests that expect thinking
hasThinking := tt.expectedThinking != ""
parser := &CogitoParser{} // it has thinking support
parser.Init(tt.tools, tt.lastMessage, &api.ThinkValue{Value: hasThinking}) // but we should set it with the request that the user wants
content, thinking, toolCalls, err := parser.Add(tt.input, true)
if err != nil {
t.Fatalf("Add() error = %v", err)
}
if diff := cmp.Diff(tt.expectedContent, content); diff != "" {
t.Errorf("content mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(tt.expectedThinking, thinking); diff != "" {
t.Errorf("thinking mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(tt.expectedToolCalls, toolCalls); diff != "" {
t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
}
})
}
}
func TestCogitoParser_Streaming(t *testing.T) {
parser := &CogitoParser{}
parser.Init(nil, nil, &api.ThinkValue{Value: true})
chunks := []string{
"This is ",
"thinking content",
".</think>This is ",
"content.<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>test_tool\n```json\n{\"arg\":\"value\"}\n```<tool▁call▁end><tool▁calls▁end>",
}
var finalContent, finalThinking strings.Builder
var finalToolCalls []api.ToolCall
for i, chunk := range chunks {
done := i == len(chunks)-1
content, thinking, toolCalls, err := parser.Add(chunk, done)
if err != nil {
t.Fatalf("Add() error on chunk %d: %v", i, err)
}
finalContent.WriteString(content)
finalThinking.WriteString(thinking)
finalToolCalls = append(finalToolCalls, toolCalls...)
}
expectedContent := "This is content."
expectedThinking := "This is thinking content."
expectedToolCalls := []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "test_tool",
Arguments: api.ToolCallFunctionArguments{
"arg": "value",
},
},
},
}
if finalContent.String() != expectedContent {
t.Errorf("expected content %q, got %q", expectedContent, finalContent.String())
}
if finalThinking.String() != expectedThinking {
t.Errorf("expected thinking %q, got %q", expectedThinking, finalThinking.String())
}
if diff := cmp.Diff(expectedToolCalls, finalToolCalls); diff != "" {
t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
}
}
func TestCogitoParser_StreamingEdgeCases(t *testing.T) {
tests := []struct {
name string
chunks []string
expectedContent string
expectedThinking string
expectedToolCalls []api.ToolCall
hasThinkingSupport bool
}{
{
name: "split_thinking_tag",
chunks: []string{
"This is thinking content</thi",
"nk>This is content.",
},
expectedContent: "This is content.",
expectedThinking: "This is thinking content",
hasThinkingSupport: true,
},
{
name: "split_tool_calls_begin_tag_conservative_parsing",
chunks: []string{
"Content before<tool▁calls▁beg",
"in><tool▁call▁begin>function<tool▁sep>test\n```json\n{}\n```<tool▁call▁end><tool▁calls▁end>",
},
// Parser is conservative - treats incomplete tags as content
expectedContent: "Content before<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>test\n```json\n{}\n```<tool▁call▁end><tool▁calls▁end>",
expectedToolCalls: nil,
hasThinkingSupport: false,
},
{
name: "thinking_disabled_with_split_tags",
chunks: []string{
"Content with </thi",
"nk> should be treated as content.",
},
expectedContent: "Content with </think> should be treated as content.",
expectedThinking: "",
hasThinkingSupport: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &CogitoParser{}
parser.Init(nil, nil, &api.ThinkValue{Value: tt.hasThinkingSupport})
var finalContent, finalThinking strings.Builder
var finalToolCalls []api.ToolCall
for i, chunk := range tt.chunks {
done := i == len(tt.chunks)-1
content, thinking, toolCalls, err := parser.Add(chunk, done)
if err != nil {
t.Fatalf("Add() error on chunk %d: %v", i, err)
}
finalContent.WriteString(content)
finalThinking.WriteString(thinking)
finalToolCalls = append(finalToolCalls, toolCalls...)
}
if finalContent.String() != tt.expectedContent {
t.Errorf("expected content %q, got %q", tt.expectedContent, finalContent.String())
}
if finalThinking.String() != tt.expectedThinking {
t.Errorf("expected thinking %q, got %q", tt.expectedThinking, finalThinking.String())
}
if diff := cmp.Diff(tt.expectedToolCalls, finalToolCalls); diff != "" {
t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
}
})
}
}
func TestCogitoParser_HasToolSupport(t *testing.T) {
parser := &CogitoParser{}
if !parser.HasToolSupport() {
t.Error("CogitoParser should support tools")
}
}
func TestCogitoParser_Init(t *testing.T) {
parser := &CogitoParser{}
tools := []api.Tool{
{Function: api.ToolFunction{Name: "test_tool"}},
}
lastMessage := &api.Message{Role: "assistant", Content: "previous"}
returnedTools := parser.Init(tools, lastMessage, nil)
if len(returnedTools) != len(tools) {
t.Errorf("expected %d tools returned, got %d", len(tools), len(returnedTools))
}
}
func TestCogitoParser_parseToolCallContent(t *testing.T) {
tests := []struct {
name string
content string
expected api.ToolCall
expectError bool
}{
{
name: "valid_tool_call_standard_format",
content: `function<tool▁sep>get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
expectError: false,
},
{
name: "valid_tool_call_complex_args",
content: `function<tool▁sep>process_data
` + "```json\n" + `{"items":["item1","item2"],"config":{"enabled":true},"count":42}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "process_data",
Arguments: api.ToolCallFunctionArguments{
"items": []any{"item1", "item2"},
"config": map[string]any{"enabled": true},
"count": 42.0,
},
},
},
expectError: false,
},
{
name: "valid_tool_call_empty_args",
content: `function<tool▁sep>no_args_tool
` + "```json\n" + `{}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "no_args_tool",
Arguments: api.ToolCallFunctionArguments{},
},
},
expectError: false,
},
{
name: "missing_separator",
content: `functionget_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
expected: api.ToolCall{},
expectError: true,
},
{
name: "invalid_function_type",
content: `not_function<tool▁sep>get_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
expected: api.ToolCall{},
expectError: true,
},
{
name: "missing_json_block_start",
content: `function<tool▁sep>get_weather{"location":"Paris"}` + "```",
expected: api.ToolCall{},
expectError: true,
},
{
name: "missing_json_block_end",
content: `function<tool▁sep>get_weather` + "```json\n" + `{"location":"Paris"}`,
expected: api.ToolCall{},
expectError: true,
},
{
name: "invalid_json",
content: `function<tool▁sep>get_weather` + "```json\n" + `{location:Paris}` + "\n```",
expected: api.ToolCall{},
expectError: true,
},
{
name: "empty_function_type",
content: `<tool▁sep>get_weather` + "```json\n" + `{"location":"Paris"}` + "\n```",
expected: api.ToolCall{},
expectError: true,
},
{
name: "tool_with_spaces_in_name",
content: `function<tool▁sep> get_weather
` + "```json\n" + `{"location":"Paris"}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
},
},
},
expectError: false,
},
{
name: "tool_with_multiline_json",
content: `function<tool▁sep>get_weather
` + "```json\n" + `{
"location": "Paris",
"units": "metric"
}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: api.ToolCallFunctionArguments{
"location": "Paris",
"units": "metric",
},
},
},
expectError: false,
},
{
name: "tool_with_nested_objects",
content: `function<tool▁sep>complex_tool
` + "```json\n" + `{"nested":{"deep":{"value":123}}}
` + "```",
expected: api.ToolCall{
Function: api.ToolCallFunction{
Name: "complex_tool",
Arguments: api.ToolCallFunctionArguments{
"nested": map[string]any{
"deep": map[string]any{
"value": 123.0,
},
},
},
},
},
expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := &CogitoParser{}
result, err := parser.parseToolCallContent(tt.content)
if tt.expectError {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if diff := cmp.Diff(tt.expected, result); diff != "" {
t.Errorf("tool call mismatch (-want +got):\n%s", diff)
}
})
}
}

View File

@@ -6,9 +6,9 @@ import (
)
type Parser interface {
// Init initializes the parser with tools and optional last message for chat prefill
// Init initializes the parser with tools, optional last message for chat prefill, and think value
// Returns processed tools if the parser needs to modify them (e.g., harmony renames them)
Init(tools []api.Tool, lastMessage *api.Message) []api.Tool
Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool
// Add processes streamed content and returns parsed content, thinking, and tool calls
// The done flag indicates if this is the last chunk (used for draining accumulators)
Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error)
@@ -52,6 +52,8 @@ func ParserForName(name string) Parser {
return &PassthroughParser{}
case "harmony":
return harmony.NewHarmonyMessageHandler()
case "cogito":
return &CogitoParser{}
default:
return nil
}
@@ -59,7 +61,7 @@ func ParserForName(name string) Parser {
type PassthroughParser struct{}
func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
return tools // passthrough doesn't modify tools
}

View File

@@ -10,7 +10,7 @@ type mockParser struct {
name string
}
func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
return tools
}

View File

@@ -43,7 +43,7 @@ func (p *Qwen3CoderParser) HasThinkingSupport() bool {
return false
}
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
p.tools = tools
return tools // Qwen doesn't modify tools
}
@@ -432,7 +432,7 @@ func transformToXML(raw string) string {
groups := qwenTagRegex.FindStringSubmatch(match)
tag := groups[1]
var escapedValue strings.Builder
xml.EscapeText(&escapedValue, []byte(groups[2]))
_ = xml.EscapeText(&escapedValue, []byte(groups[2])) // error is always nil for strings.Builder
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
})

View File

@@ -54,7 +54,7 @@ func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
p.state = CollectingThinkingContent
}
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
p.tools = tools
p.setInitialState(lastMessage)
return tools

View File

@@ -198,7 +198,7 @@ func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
parser.Init([]api.Tool{}, nil, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
@@ -515,7 +515,7 @@ func TestQwenOldParserStreaming(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
parser.Init([]api.Tool{}, nil, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
@@ -822,7 +822,7 @@ func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
parser.Init([]api.Tool{}, nil, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)

View File

@@ -205,7 +205,7 @@ func TestQwen3VLThinkingParserStreaming(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, nil)
parser.Init([]api.Tool{}, nil, nil)
// parser.state = CollectingThinkingContent
for i, step := range tc.steps {
@@ -386,7 +386,7 @@ func TestQwen3VLParserState(t *testing.T) {
for _, tc := range cases {
parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
parser.Init(nil, tc.last)
parser.Init(nil, tc.last, nil)
if parser.state != tc.wantState {
t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
}
@@ -437,7 +437,7 @@ func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
parser.Init([]api.Tool{}, last, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
@@ -500,7 +500,7 @@ func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
parser.Init([]api.Tool{}, last, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
@@ -523,7 +523,7 @@ func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
// last message is assistant with content ⇒ start in CollectingContent
last := &api.Message{Role: "assistant", Content: "has content"}
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
parser.Init([]api.Tool{}, last, nil)
type step struct {
input string
@@ -750,7 +750,7 @@ func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, nil)
parser.Init([]api.Tool{}, nil, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
@@ -859,7 +859,7 @@ func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, tc.prefillMsg)
parser.Init([]api.Tool{}, tc.prefillMsg, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)

View File

@@ -340,7 +340,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
builtinParser = parsers.ParserForName(m.Config.Parser)
if builtinParser != nil {
// no tools or last message for generate endpoint
builtinParser.Init(nil, nil)
builtinParser.Init(nil, nil, req.Think)
}
}
@@ -2051,7 +2051,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
lastMessage = &msgs[len(msgs)-1]
}
// Initialize parser and get processed tools
processedTools = builtinParser.Init(req.Tools, lastMessage)
processedTools = builtinParser.Init(req.Tools, lastMessage, req.Think)
}
}