Compare commits

..

8 Commits

Author SHA1 Message Date
ParthSareen
92af238208 wip 2025-12-02 12:17:36 -08:00
ParthSareen
7461faf651 script to render templates 2025-12-01 18:03:04 -08:00
Daniel Hiltgen
554172759c win: warn if ggml-base detected in PATH (#13289)
If the user has somehow installed another GGML based app which places a
ggml-base lib somewhere in their PATH, we can experience runtime problems
due to incompatibilities.  This change adds a warning message if we detect
a ggml-base outside of our install location to aid in troubleshooting.
2025-12-01 15:36:47 -08:00
Bruce MacDonald
5b6a8e6001 api/client: handle non-json streaming errors (#13007)
While processing the response stream during a chat or generation if an error is occurred it is parsed and returned to the user. The issue with the existing code is that this assumed the response would be valid JSON, which is not a safe assumption and caused cryptic error messages to be displayed due to parsing failures:
`invalid character 'i' looking for beginning of value`

This change updates the stream function to return the raw error string if it cant be parsed as JSON. This should help with debugging issues by making sure the actual error reaches the user.
2025-12-01 15:10:16 -08:00
Daniel Hiltgen
467bbc0dd5 jetpack: require exact match or skip cuda_jetpack* (#13288)
The cuda_jetpack libs will enumerate discrete GPUs on SBSA systems
which leads to runtime failures of missing kernels.  This fix
requires an exact match to enable jetpacks instead of relying on
enumeration to filter out supported libraries.
2025-12-01 12:48:16 -08:00
Jeffrey Morgan
6d9f9323c5 .gitattributes: add app/webview to linguist-vendored (#13274) 2025-11-29 23:46:10 -05:00
Ondrej Kokes
0c2489605d docs: fix output formatting in faq.mdx (#13231)
There were a few Markdown typos in one FAQ answer. It now renders as a proper ascii table.
2025-11-28 19:19:21 -05:00
EntropyYue
8b1b89a984 docs: remove deprecated parameters (#13237) 2025-11-26 11:03:09 +09:00
21 changed files with 728 additions and 2061 deletions

2
.gitattributes vendored
View File

@@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
ml/backend/**/*.glsl linguist-vendored
ml/backend/**/CMakeLists.txt linguist-vendored
app/webview linguist-vendored
llama/build-info.cpp linguist-generated
ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

View File

@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
bts := scanner.Bytes()
if err := json.Unmarshal(bts, &errorResponse); err != nil {
return fmt.Errorf("unmarshal: %w", err)
if response.StatusCode >= http.StatusBadRequest {
return StatusError{
StatusCode: response.StatusCode,
Status: response.Status,
ErrorMessage: string(bts),
}
}
return errors.New(string(bts))
}
if response.StatusCode == http.StatusUnauthorized {

View File

@@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
type testError struct {
message string
statusCode int
raw bool // if true, write message as-is instead of JSON encoding
}
func (e testError) Error() string {
@@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
},
},
},
{
name: "plain text error response",
responses: []any{
"internal server error",
},
wantErr: "internal server error",
},
{
name: "HTML error page",
responses: []any{
"<html><body>404 Not Found</body></html>",
},
wantErr: "404 Not Found",
},
}
for _, tc := range testCases {
@@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
return
}
if str, ok := resp.(string); ok {
fmt.Fprintln(w, str)
flusher.Flush()
continue
}
if err := json.NewEncoder(w).Encode(resp); err != nil {
t.Fatalf("failed to encode response: %v", err)
}
@@ -173,9 +194,10 @@ func TestClientStream(t *testing.T) {
func TestClientDo(t *testing.T) {
testCases := []struct {
name string
response any
wantErr string
name string
response any
wantErr string
wantStatusCode int
}{
{
name: "immediate error response",
@@ -183,7 +205,8 @@ func TestClientDo(t *testing.T) {
message: "test error message",
statusCode: http.StatusBadRequest,
},
wantErr: "test error message",
wantErr: "test error message",
wantStatusCode: http.StatusBadRequest,
},
{
name: "server error response",
@@ -191,7 +214,8 @@ func TestClientDo(t *testing.T) {
message: "internal error",
statusCode: http.StatusInternalServerError,
},
wantErr: "internal error",
wantErr: "internal error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "successful response",
@@ -203,6 +227,26 @@ func TestClientDo(t *testing.T) {
Success: true,
},
},
{
name: "plain text error response",
response: testError{
message: "internal server error",
statusCode: http.StatusInternalServerError,
raw: true,
},
wantErr: "internal server error",
wantStatusCode: http.StatusInternalServerError,
},
{
name: "HTML error page",
response: testError{
message: "<html><body>404 Not Found</body></html>",
statusCode: http.StatusNotFound,
raw: true,
},
wantErr: "<html><body>404 Not Found</body></html>",
wantStatusCode: http.StatusNotFound,
},
}
for _, tc := range testCases {
@@ -210,11 +254,16 @@ func TestClientDo(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if errResp, ok := tc.response.(testError); ok {
w.WriteHeader(errResp.statusCode)
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
if !errResp.raw {
err := json.NewEncoder(w).Encode(map[string]string{
"error": errResp.message,
})
if err != nil {
t.Fatal("failed to encode error response:", err)
}
} else {
// Write raw message (simulates non-JSON error responses)
fmt.Fprint(w, errResp.message)
}
return
}
@@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
if err.Error() != tc.wantErr {
t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
}
if tc.wantStatusCode != 0 {
if statusErr, ok := err.(StatusError); ok {
if statusErr.StatusCode != tc.wantStatusCode {
t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
}
} else {
t.Errorf("expected StatusError, got %T", err)
}
}
return
}

View File

@@ -0,0 +1,625 @@
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "transformers>=4.57.0",
# "jinja2",
# "fastapi",
# "uvicorn",
# "pydantic",
# "requests",
# ]
# ///
"""
Chat Template Testing Tool
Test HuggingFace chat templates against Ollama renderers.
Usage:
# Run predefined test cases against a HuggingFace model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3
# Compare HuggingFace output with Ollama renderer
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --ollama-model intellect3
# Start server for manual curl testing
uv run cmd/chat_template/chat_template.py --serve
# Show chat template for a model
uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --show-template
"""
import argparse
import json
import sys
from typing import Any
from transformers import AutoTokenizer
TEST_CASES = [
{
"name": "basic_user_message",
"messages": [{"role": "user", "content": "Hello!"}],
"tools": None,
},
{
"name": "with_system_message",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
"tools": None,
},
{
"name": "multi_turn_conversation",
"messages": [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"},
],
"tools": None,
},
{
"name": "with_tools",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the weather?"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "tool_call_and_response",
"messages": [
{"role": "user", "content": "What is the weather in SF?"},
{
"role": "assistant",
"content": "Let me check the weather.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"required": ["location"],
"properties": {
"location": {"type": "string", "description": "The city"}
},
},
},
}
],
},
{
"name": "parallel_tool_calls",
"messages": [
{"role": "user", "content": "Get weather in SF and NYC"},
{
"role": "assistant",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "San Francisco"},
},
},
{
"id": "call_2",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "New York"},
},
},
],
},
{"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
{"role": "tool", "content": '{"temperature": 55}', "tool_call_id": "call_2"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
# Thinking tests
{
"name": "assistant_with_thinking",
"messages": [
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": "The answer is 4.",
"thinking": "Let me calculate: 2 + 2 = 4. This is basic arithmetic.",
},
{"role": "user", "content": "And 3+3?"},
],
"tools": None,
},
{
"name": "thinking_with_tool_call",
"messages": [
{"role": "user", "content": "What's the weather in Paris?"},
{
"role": "assistant",
"content": "I'll check the weather for you.",
"thinking": "The user wants to know the weather in Paris. I should call the get_weather function.",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "get_weather",
"arguments": {"location": "Paris"},
},
}
],
},
{"role": "tool", "content": '{"temperature": 18, "condition": "cloudy"}', "tool_call_id": "call_1"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
}
],
},
{
"name": "thinking_only_no_content",
"messages": [
{"role": "user", "content": "Think about this silently."},
{
"role": "assistant",
"content": "", # HuggingFace requires content field
"thinking": "I'm thinking about this but won't respond with visible content.",
},
{"role": "user", "content": "What did you think?"},
],
"tools": None,
},
]
# Cache for tokenizers
_tokenizer_cache: dict[str, Any] = {}
def get_tokenizer(model_name: str):
"""Get or create tokenizer for the given model."""
if model_name not in _tokenizer_cache:
print(f"Loading tokenizer for {model_name}...", file=sys.stderr)
_tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(model_name)
return _tokenizer_cache[model_name]
def apply_template(
model: str,
messages: list[dict],
tools: list[dict] | None = None,
) -> str:
"""Apply HuggingFace chat template to messages."""
tokenizer = get_tokenizer(model)
if tools:
return tokenizer.apply_chat_template(
messages,
tools=tools,
tokenize=False,
add_generation_prompt=True,
)
else:
return tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
def get_ollama_prompt(
ollama_model: str,
messages: list[dict],
tools: list[dict] | None = None,
ollama_host: str = "http://localhost:11434",
) -> str | None:
"""Get rendered prompt from Ollama using debug_render_only."""
import requests
# Convert messages to Ollama format
ollama_messages = []
for msg in messages:
ollama_msg = {"role": msg["role"]}
if "content" in msg:
ollama_msg["content"] = msg["content"]
if "thinking" in msg:
ollama_msg["thinking"] = msg["thinking"]
if "tool_calls" in msg:
# Convert tool_calls to Ollama format
tool_calls = []
for tc in msg["tool_calls"]:
tool_call = {
"function": {
"name": tc["function"]["name"],
"arguments": tc["function"]["arguments"],
}
}
if "id" in tc:
tool_call["id"] = tc["id"]
tool_calls.append(tool_call)
ollama_msg["tool_calls"] = tool_calls
if "tool_call_id" in msg:
ollama_msg["tool_call_id"] = msg["tool_call_id"]
ollama_messages.append(ollama_msg)
payload = {
"model": ollama_model,
"messages": ollama_messages,
"stream": False,
"_debug_render_only": True,
}
if tools:
payload["tools"] = tools
try:
resp = requests.post(f"{ollama_host}/api/chat", json=payload, timeout=30)
resp.raise_for_status()
data = resp.json()
# Field name is _debug_info with underscore prefix
if "_debug_info" in data and "rendered_template" in data["_debug_info"]:
return data["_debug_info"]["rendered_template"]
return None
except requests.exceptions.ConnectionError:
print(f" [ERROR] Cannot connect to Ollama at {ollama_host}", file=sys.stderr)
return None
except Exception as e:
print(f" [ERROR] Ollama request failed: {e}", file=sys.stderr)
return None
def compute_diff(hf_prompt: str, ollama_prompt: str) -> str:
"""Compute a unified diff between HuggingFace and Ollama prompts."""
import difflib
hf_lines = hf_prompt.splitlines(keepends=True)
ollama_lines = ollama_prompt.splitlines(keepends=True)
diff = difflib.unified_diff(
ollama_lines,
hf_lines,
fromfile="Ollama",
tofile="HuggingFace",
lineterm="",
)
return "".join(diff)
def print_test_output(
name: str,
messages: list[dict],
tools: list[dict] | None,
hf_prompt: str,
ollama_prompt: str | None = None,
as_repr: bool = False,
):
"""Print test output in a format suitable for Go test creation and LLM diffing."""
print(f"\n{'='*60}")
print(f"Test: {name}")
print("=" * 60)
print("\n--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print("\n--- Tools ---")
print(json.dumps(tools, indent=2))
if ollama_prompt is not None:
# Comparison mode
if hf_prompt == ollama_prompt:
print("\n--- Result: MATCH ---")
print("\n--- Prompt (both identical) ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
else:
print("\n--- Result: MISMATCH ---")
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("\n--- Ollama Prompt ---")
if as_repr:
print(repr(ollama_prompt))
else:
print(ollama_prompt)
print("\n--- Diff (Ollama -> HuggingFace) ---")
diff = compute_diff(hf_prompt, ollama_prompt)
if diff:
print(diff)
else:
print("(no line-level diff, check whitespace)")
else:
# HuggingFace only mode
print("\n--- HuggingFace Prompt ---")
if as_repr:
print(repr(hf_prompt))
else:
print(hf_prompt)
print("=" * 60)
def run_tests(
model: str,
as_repr: bool = False,
test_filter: str | None = None,
ollama_model: str | None = None,
ollama_host: str = "http://localhost:11434",
):
"""Run all predefined test cases against a model."""
if ollama_model:
print(f"\nComparing HuggingFace ({model}) vs Ollama ({ollama_model})\n")
else:
print(f"\nRunning tests against: {model}\n")
matches = 0
mismatches = 0
errors = 0
for test_case in TEST_CASES:
name = test_case["name"]
messages = test_case["messages"]
tools = test_case["tools"]
# Filter tests if specified
if test_filter and test_filter.lower() not in name.lower():
continue
try:
hf_prompt = apply_template(model, messages, tools)
ollama_prompt = None
if ollama_model:
ollama_prompt = get_ollama_prompt(
ollama_model, messages, tools, ollama_host
)
if ollama_prompt is None:
errors += 1
elif hf_prompt == ollama_prompt:
matches += 1
else:
mismatches += 1
print_test_output(
name, messages, tools, hf_prompt, ollama_prompt, as_repr=as_repr
)
except Exception as e:
errors += 1
print(f"\n{'='*60}")
print(f"Test: {name} - FAILED")
print(f"--- Input Messages ---")
print(json.dumps(messages, indent=2))
if tools:
print(f"--- Tools ---")
print(json.dumps(tools, indent=2))
print(f"--- Error ---")
print(f"{e}")
print("=" * 60)
# Print summary if comparing
if ollama_model:
total = matches + mismatches + errors
print(f"\n{'='*60}")
print("SUMMARY")
print("=" * 60)
print(f" Total: {total}")
print(f" Matches: {matches}")
print(f" Mismatches: {mismatches}")
print(f" Errors: {errors}")
print("=" * 60)
def show_template(model: str):
"""Show the chat template for a model."""
tokenizer = get_tokenizer(model)
print(f"\nChat template for {model}:\n")
print("-" * 60)
print(tokenizer.chat_template)
print("-" * 60)
def start_server(host: str = "0.0.0.0", port: int = 8000):
"""Start the FastAPI server for manual testing."""
from typing import Optional, List, Dict, Any as TypingAny
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
class Message(BaseModel):
role: str
content: Optional[str] = None
tool_calls: Optional[List[Dict[str, TypingAny]]] = None
tool_call_id: Optional[str] = None
class GeneratePromptRequest(BaseModel):
messages: List[Message]
model: str = "PrimeIntellect/INTELLECT-3"
tools: Optional[List[Dict[str, TypingAny]]] = None
inject_tools_as_functions: bool = False
class GeneratePromptResponse(BaseModel):
prompt: str
model: str
app = FastAPI(title="HuggingFace Prompt Generator", version="1.0.0")
@app.post("/generate-prompt", response_model=GeneratePromptResponse)
async def generate_prompt(request: GeneratePromptRequest):
try:
messages = []
for msg in request.messages:
message_dict = {"role": msg.role}
if msg.content is not None:
message_dict["content"] = msg.content
if msg.tool_calls is not None:
tool_calls = []
for tc in msg.tool_calls:
tc_copy = tc.copy()
if "function" in tc_copy and "arguments" in tc_copy["function"]:
args = tc_copy["function"]["arguments"]
if isinstance(args, str):
try:
tc_copy["function"]["arguments"] = json.loads(args)
except json.JSONDecodeError:
pass
tool_calls.append(tc_copy)
message_dict["tool_calls"] = tool_calls
if msg.tool_call_id is not None:
message_dict["tool_call_id"] = msg.tool_call_id
messages.append(message_dict)
prompt = apply_template(request.model, messages, request.tools)
return GeneratePromptResponse(prompt=prompt, model=request.model)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "healthy"}
print(f"Starting server on http://{host}:{port}")
print("Endpoints:")
print(" POST /generate-prompt - Generate prompt from messages")
print(" GET /health - Health check")
uvicorn.run(app, host=host, port=port)
def main():
parser = argparse.ArgumentParser(
description="HuggingFace Prompt Testing Tool",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--model",
"-m",
type=str,
help="HuggingFace model name (e.g., PrimeIntellect/INTELLECT-3)",
)
parser.add_argument(
"--ollama-model",
"-o",
type=str,
help="Ollama model name to compare against (e.g., qwen3-coder)",
)
parser.add_argument(
"--ollama-host",
type=str,
default="http://localhost:11434",
help="Ollama server URL (default: http://localhost:11434)",
)
parser.add_argument(
"--serve",
"-s",
action="store_true",
help="Start FastAPI server for manual curl testing",
)
parser.add_argument(
"--port",
"-p",
type=int,
default=8000,
help="Server port (default: 8000)",
)
parser.add_argument(
"--show-template",
"-t",
action="store_true",
help="Show the chat template for the model",
)
parser.add_argument(
"--repr",
"-r",
action="store_true",
help="Output prompts as Python repr (shows escape sequences)",
)
parser.add_argument(
"--filter",
"-f",
type=str,
help="Filter tests by name (substring match)",
)
args = parser.parse_args()
if args.serve:
start_server(port=args.port)
elif args.model:
if args.show_template:
show_template(args.model)
else:
run_tests(
args.model,
as_repr=args.repr,
test_filter=args.filter,
ollama_model=args.ollama_model,
ollama_host=args.ollama_host,
)
else:
parser.print_help()
print("\nExample usage:")
print(" uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3")
print(" uv run cmd/chat_template/chat_template.py --model Qwen/Qwen3-Coder-480B-A35B-Instruct --ollama-model qwen3-coder")
print(" uv run cmd/chat_template/chat_template.py --serve")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1,148 +0,0 @@
package main
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
_ "github.com/ollama/ollama/model/models" // Register all models
"github.com/ollama/ollama/model/renderers"
"github.com/ollama/ollama/sample"
)
func main() {
modelPath := "/Users/parth/.ollama/models/blobs/sha256-a87e10578f328b087f888ac7bd1018555e26028a1130980f20312b4de3a10d70"
fmt.Println("Loading OLMo model...")
m, err := model.New(modelPath, ml.BackendParams{AllocMemory: true})
if err != nil {
log.Fatal(err)
}
if err := m.Backend().Load(context.Background(), func(f float32) {}); err != nil {
log.Fatal(err)
}
fmt.Println("✅ Model loaded successfully!")
// Initialize the cache
cache := m.Config().Cache
if cache != nil {
// Initialize with reasonable defaults:
// - dtype: F16
// - maxSequences: 1 (single sequence)
// - capacity: 2048 (context length)
// - maxBatch: 512
cache.Init(m.Backend(), ml.DTypeF16, 1, 2048, 512)
fmt.Printf("✅ Cache initialized (type: %T)\n", cache)
}
// Use the olmo3 renderer to format the prompt properly
messages := []api.Message{
{Role: "user", Content: "wagwan"},
}
// prompt := "Question: What is machine learning? Answer:"
prompt, err := renderers.RenderWithRenderer("olmo3", messages, nil, nil)
if err != nil {
log.Fatal(err)
}
// prompt = prompt[:len(prompt)]
// prompt := "Question: What is machine learning? Answer:"
fmt.Printf("\nRendered prompt:\n%s\n", prompt)
tp := m.(model.TextProcessor)
tokens, err := tp.Encode(prompt, false)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Tokens: %v (count: %d)\n", tokens, len(tokens))
// Generate 20 tokens
maxTokens := 20
generated := make([]int32, 0, maxTokens)
// Create sampler (temperature=0 for greedy sampling)
sampler := sample.NewSampler(0, 0, 0, 0, -1, nil)
for i := 0; i < maxTokens; i++ {
// Create a new context for each generation step to avoid memory buildup
ctx := m.Backend().NewContext()
var inputTokens []int32
var positions []int32
if i == 0 {
// First iteration: process all prompt tokens
inputTokens = tokens
positions = make([]int32, len(tokens))
for j := range positions {
positions[j] = int32(j)
}
} else {
// Subsequent iterations: only process the newly generated token
// The last token is at position len(tokens)-1 (its index in the sequence)
inputTokens = []int32{tokens[len(tokens)-1]}
positions = []int32{int32(len(tokens) - 1)}
}
sequences := make([]int, len(inputTokens))
// All tokens belong to sequence 0
inputsTensor := ctx.Input().FromInts(inputTokens, len(inputTokens))
outputs := ctx.Input().FromInts([]int32{int32(len(inputTokens) - 1)}, 1)
batch := input.Batch{
Inputs: inputsTensor,
Positions: positions,
Sequences: sequences,
Outputs: outputs,
}
// Forward pass (model.Forward handles cache.StartForward internally)
logits, err := model.Forward(ctx, m, batch)
if err != nil {
ctx.Close()
log.Fatal(err)
}
logits = logits.Contiguous(ctx)
ctx.Forward(logits).Compute(logits)
logitValues := logits.Floats()
// Sample next token
nextToken, err := sampler.Sample(logitValues)
if err != nil {
ctx.Close()
log.Fatal(err)
}
// Close context before moving to next iteration
ctx.Close()
generated = append(generated, nextToken)
tokens = append(tokens, nextToken)
// Decode and print
decoded, _ := tp.Decode([]int32{nextToken})
fmt.Print(decoded)
// Stop on EOS or <|im_end|>
if nextToken == 2 || nextToken == 1 { // Common EOS tokens
break
}
// Check if we generated <|im_end|> (stop token for chat)
if decoded == "<|im_end|>" {
break
}
}
fmt.Println("\n\n✅ Generation completed!")
fullText, _ := tp.Decode(generated)
fmt.Printf("Generated: %s\n", fullText)
}

View File

@@ -200,8 +200,6 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
conv = &qwen25VLModel{}
case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
conv = &qwen3VLModel{}
case "OLMo2ForCausalLM", "Olmo2ForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM":
conv = &olmoModel{}
case "BertModel":
conv = &bertModel{}
case "CohereForCausalLM":

View File

@@ -1,94 +0,0 @@
package convert
import (
"cmp"
"github.com/ollama/ollama/fs/ggml"
)
type olmoModel struct {
ModelParameters
HiddenSize uint32 `json:"hidden_size"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
RMSNormEPS float32 `json:"rms_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
ClampKQV float32 `json:"f_clamp_kqv"`
SlidingWindow uint32 `json:"sliding_window"`
LayerTypes []string `json:"layer_types"`
}
var _ ModelConverter = (*olmoModel)(nil)
func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "olmo"
kv["olmo.block_count"] = p.NumHiddenLayers
kv["olmo.context_length"] = p.MaxPositionEmbeddings
kv["olmo.embedding_length"] = p.HiddenSize
kv["olmo.feed_forward_length"] = p.IntermediateSize
kv["olmo.attention.head_count"] = p.NumAttentionHeads
kv["olmo.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
if p.RopeTheta > 0 {
kv["olmo.rope.freq_base"] = p.RopeTheta
} else {
kv["olmo.rope.freq_base"] = float32(10000.0)
}
if p.RMSNormEPS > 0 {
kv["olmo.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
}
if p.ClampKQV > 0 {
kv["olmo.attention.clamp_kqv"] = p.ClampKQV
}
if p.SlidingWindow > 0 {
kv["olmo.attention.sliding_window"] = p.SlidingWindow
}
if len(p.LayerTypes) > 0 {
kv["olmo.attention.layer_types"] = p.LayerTypes
}
return kv
}
func (p *olmoModel) Tensors(ts []Tensor) []*ggml.Tensor {
out := make([]*ggml.Tensor, 0, len(ts))
for _, t := range ts {
out = append(out, &ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *olmoModel) Replacements() []string {
return []string{
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.layers", "blk",
"model.norm", "output_norm",
"self_attn.q_proj", "attn_q",
"self_attn.k_proj", "attn_k",
"self_attn.v_proj", "attn_v",
"self_attn.o_proj", "attn_output",
"self_attn.q_norm", "attn_q_norm",
"self_attn.k_norm", "attn_k_norm",
"post_attention_layernorm", "post_attention_norm",
"post_feedforward_layernorm", "post_ffw_norm",
"mlp.gate_proj", "ffn_gate",
"mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up",
}
}

View File

@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
}
slog.Info("discovering available GPUs...")
detectIncompatibleLibraries()
// Warn if any user-overrides are set which could lead to incorrect GPU discovery
overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
continue
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
continue
} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
continue
} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
slog.Info("experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1")
continue
@@ -484,3 +488,16 @@ func overrideWarnings() {
slog.Warn("if GPUs are not correctly discovered, unset and try again")
}
}
func detectIncompatibleLibraries() {
if runtime.GOOS != "windows" {
return
}
basePath, err := exec.LookPath("ggml-base.dll")
if err != nil || basePath == "" {
return
}
if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
}
}

View File

@@ -57,8 +57,13 @@ ollama ps
```
<Info>
**Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
100% GPU 4 minutes from now ```
**Output**:
```
NAME ID SIZE PROCESSOR UNTIL
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
```
</Info>
The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation. You
- In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`
**MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.

View File

@@ -149,9 +149,6 @@ PARAMETER <parameter> <parametervalue>
| Parameter | Description | Value Type | Example Usage |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
| mirostat | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) | int | mirostat 0 |
| mirostat_eta | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1) | float | mirostat_eta 0.1 |
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |

View File

@@ -252,7 +252,6 @@ func (kv KV) OllamaEngineRequired() bool {
"deepseekocr",
"deepseek2",
"nomic-bert",
"olmo2",
}, kv.Architecture())
}

View File

@@ -13,7 +13,6 @@ import (
_ "github.com/ollama/ollama/model/models/mistral3"
_ "github.com/ollama/ollama/model/models/mllama"
_ "github.com/ollama/ollama/model/models/nomicbert"
_ "github.com/ollama/ollama/model/models/olmo"
_ "github.com/ollama/ollama/model/models/qwen2"
_ "github.com/ollama/ollama/model/models/qwen25vl"
_ "github.com/ollama/ollama/model/models/qwen3"

View File

@@ -1,271 +0,0 @@
package olmo
import (
"cmp"
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/fast"
"github.com/ollama/ollama/ml/nn/rope"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
type Options struct {
hiddenSize, numHeads, numKVHeads int
headDim, ropeDim int
eps, ropeBase, ropeScale float32
clampKQV float32
originalContextLength int
attnFactor float32
slidingWindow int32
slidingWindowPattern []bool // per-layer SWA pattern (true = SWA, false = full attention)
}
type Model struct {
model.Base
model.TextProcessor
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
Layers []Layer `gguf:"blk"`
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
Output *nn.Linear `gguf:"output,alt:token_embd"`
Options
}
func New(c fs.Config) (model.Model, error) {
vocabulary := model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
Merges: c.Strings("tokenizer.ggml.merges"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
EOS: append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
}
if c.String("tokenizer.ggml.model") != "gpt2" {
return nil, model.ErrUnsupportedTokenizer
}
var pretokenizers []string
if c.String("tokenizer.ggml.pre") != "default" {
pretokenizers = []string{
"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
}
}
processor := model.NewBytePairEncoding(&vocabulary, pretokenizers...)
slidingWindow := int32(c.Uint("attention.sliding_window"))
slidingWindowPattern := c.Bools("attention.sliding_window_pattern")
m := Model{
TextProcessor: processor,
Layers: make([]Layer, c.Uint("block_count")),
Options: Options{
hiddenSize: int(c.Uint("embedding_length")),
numHeads: int(c.Uint("attention.head_count")),
numKVHeads: int(c.Uint("attention.head_count_kv")),
headDim: int(c.Uint("attention.key_length")),
ropeDim: int(c.Uint("rope.dimension_count")),
eps: c.Float("attention.layer_norm_rms_epsilon"),
ropeBase: c.Float("rope.freq_base", 1e4),
ropeScale: c.Float("rope.scaling.factor", 1),
clampKQV: c.Float("attention.clamp_kqv", 0),
originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
attnFactor: c.Float("rope.scaling.attn_factor", 1),
slidingWindow: slidingWindow,
slidingWindowPattern: slidingWindowPattern,
},
}
// OLMo3 uses interleaved sliding window attention (every 4th layer is full attention)
m.Cache = kvcache.NewWrapperCache(
kvcache.NewSWACache(slidingWindow, m.Shift),
kvcache.NewCausalCache(m.Shift),
)
return &m, nil
}
type SelfAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_output"`
QNorm *nn.RMSNorm `gguf:"attn_q_norm"`
KNorm *nn.RMSNorm `gguf:"attn_k_norm"`
RopeFactors ml.Tensor `gguf:"rope_freqs.weight"`
}
func (o *Options) ropeOptions(factors ml.Tensor, isSWA bool) []func(*rope.Options) {
opts := []func(*rope.Options){
rope.WithFactors(factors),
}
if o.originalContextLength > 0 {
if isSWA {
// For SWA layers, use regular rope with no YaRN scaling
// ext_factor=0.0, attn_factor=1.0 per llama.cpp
opts = append(opts,
rope.WithOriginalContextLength(o.originalContextLength),
rope.WithExtrapolationFactor(0.),
rope.WithAttentionFactor(1.),
)
} else {
// For full attention layers, use YaRN scaling
opts = append(opts,
rope.WithOriginalContextLength(o.originalContextLength),
rope.WithExtrapolationFactor(1.),
rope.WithAttentionFactor(o.attnFactor),
)
}
}
return opts
}
func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positions ml.Tensor, cache kvcache.Cache, opts *Options, isSWA bool) ml.Tensor {
batchSize := hiddenState.Dim(1)
headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads)
ropeDim := cmp.Or(opts.ropeDim, headDim)
query := sa.Query.Forward(ctx, hiddenState)
if sa.QNorm != nil {
query = sa.QNorm.Forward(ctx, query, opts.eps)
}
query = query.Reshape(ctx, headDim, opts.numHeads, batchSize)
key := sa.Key.Forward(ctx, hiddenState)
if sa.KNorm != nil {
key = sa.KNorm.Forward(ctx, key, opts.eps)
}
key = key.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
value := sa.Value.Forward(ctx, hiddenState)
value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
freqScale := float32(1.0)
if !isSWA {
freqScale = 1. / opts.ropeScale
}
ropeOpts := opts.ropeOptions(sa.RopeFactors, isSWA)
query = fast.RoPE(ctx, query, positions, ropeDim, opts.ropeBase, freqScale, ropeOpts...)
key = fast.RoPE(ctx, key, positions, ropeDim, opts.ropeBase, freqScale, ropeOpts...)
attention := nn.Attention(ctx, query, key, value, 1.0/math.Sqrt(float64(headDim)), cache)
attention = attention.Reshape(ctx, headDim*opts.numHeads, batchSize)
return sa.Output.Forward(ctx, attention)
}
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
ropeDim := cmp.Or(m.ropeDim, m.hiddenSize/m.numHeads)
isSWA := m.isSWALayer(layer)
freqScale := float32(1.0)
if !isSWA {
freqScale = 1. / m.ropeScale
}
ropeOpts := m.Options.ropeOptions(m.Layers[layer].SelfAttention.RopeFactors, isSWA)
return fast.RoPE(ctx, key, shift, ropeDim, m.ropeBase, freqScale, ropeOpts...), nil
}
type MLP struct {
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
Gate *nn.Linear `gguf:"ffn_gate"`
}
func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
return mlp.Down.Forward(ctx, hiddenState)
}
type Layer struct {
SelfAttention *SelfAttention
PostAttentionNorm *nn.RMSNorm `gguf:"post_attention_norm"`
MLP *MLP
PostFFWNorm *nn.RMSNorm `gguf:"post_ffw_norm"`
}
func (l *Layer) Forward(ctx ml.Context, hiddenState, positions, outputs ml.Tensor, cache kvcache.Cache, opts *Options, isSWA bool) ml.Tensor {
residual := hiddenState
hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positions, cache, opts, isSWA)
if outputs != nil {
hiddenState = hiddenState.Rows(ctx, outputs)
residual = residual.Rows(ctx, outputs)
}
if l.PostAttentionNorm != nil {
hiddenState = l.PostAttentionNorm.Forward(ctx, hiddenState, opts.eps)
}
ffnInput := hiddenState.Add(ctx, residual)
hiddenState = l.MLP.Forward(ctx, ffnInput, opts)
if l.PostFFWNorm != nil {
hiddenState = l.PostFFWNorm.Forward(ctx, hiddenState, opts.eps)
}
return hiddenState.Add(ctx, ffnInput)
}
// isSWALayer returns true if the layer uses sliding window attention.
// Uses the sliding_window_pattern from the model config if available,
// otherwise falls back to the default OLMo3 pattern (every 4th layer is full attention).
func (m *Model) isSWALayer(layerIdx int) bool {
if len(m.slidingWindowPattern) > layerIdx {
return m.slidingWindowPattern[layerIdx]
}
// Fallback: OLMo3 pattern where every 4th layer (indices 3, 7, 11, ...) uses full attention
return (layerIdx+1)%4 != 0
}
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
for i, layer := range m.Layers {
m.Cache.SetLayer(i)
isSWA := m.isSWALayer(i)
// Set cache type for interleaved SWA (OLMo3)
if wc, ok := m.Cache.(*kvcache.WrapperCache); ok {
if isSWA {
wc.SetLayerType(0) // SWA cache
} else {
wc.SetLayerType(1) // Causal cache
}
}
var outputs ml.Tensor
if i == len(m.Layers)-1 {
outputs = batch.Outputs
}
hiddenState = layer.Forward(ctx, hiddenState, positions, outputs, m.Cache, &m.Options, isSWA)
}
hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
return m.Output.Forward(ctx, hiddenState), nil
}
func init() {
model.Register("olmo2", New)
}

View File

@@ -1,132 +0,0 @@
package olmo
import (
"context"
"fmt"
"log"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
"github.com/ollama/ollama/sample"
)
func main() {
modelPath := "/Users/nicole/models/Olmo-3-7B-Think/olmo-3-7b-think-q8_0.gguf"
fmt.Println("Loading OLMo model...")
m, err := model.New(modelPath, ml.BackendParams{AllocMemory: true})
if err != nil {
log.Fatal(err)
}
if err := m.Backend().Load(context.Background(), func(f float32) {}); err != nil {
log.Fatal(err)
}
fmt.Println("✅ Model loaded successfully!")
// Initialize the cache
cache := m.Config().Cache
if cache != nil {
// Initialize with reasonable defaults:
// - dtype: F16
// - maxSequences: 1 (single sequence)
// - capacity: 2048 (context length)
// - maxBatch: 512
cache.Init(m.Backend(), ml.DTypeF16, 1, 2048, 512)
fmt.Printf("✅ Cache initialized (type: %T)\n", cache)
}
// Test generation
prompt := "Question: What is machine learning? Answer:"
fmt.Printf("\nPrompt: %s\n", prompt)
tp := m.(model.TextProcessor)
tokens, err := tp.Encode(prompt, true)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Tokens: %v (count: %d)\n", tokens, len(tokens))
// Generate 20 tokens
maxTokens := 20
generated := make([]int32, 0, maxTokens)
// Create sampler (temperature=0 for greedy sampling)
sampler := sample.NewSampler(0, 0, 0, 0, -1, nil)
for i := 0; i < maxTokens; i++ {
// Create a new context for each generation step to avoid memory buildup
ctx := m.Backend().NewContext()
var inputTokens []int32
var positions []int32
if i == 0 {
// First iteration: process all prompt tokens
inputTokens = tokens
positions = make([]int32, len(tokens))
for j := range positions {
positions[j] = int32(j)
}
} else {
// Subsequent iterations: only process the newly generated token
// The last token is at position len(tokens)-1 (its index in the sequence)
inputTokens = []int32{tokens[len(tokens)-1]}
positions = []int32{int32(len(tokens) - 1)}
}
sequences := make([]int, len(inputTokens))
// All tokens belong to sequence 0
inputsTensor := ctx.Input().FromInts(inputTokens, len(inputTokens))
outputs := ctx.Input().FromInts([]int32{int32(len(inputTokens) - 1)}, 1)
batch := input.Batch{
Inputs: inputsTensor,
Positions: positions,
Sequences: sequences,
Outputs: outputs,
}
// Forward pass (model.Forward handles cache.StartForward internally)
logits, err := model.Forward(ctx, m, batch)
if err != nil {
ctx.Close()
log.Fatal(err)
}
logits = logits.Contiguous(ctx)
ctx.Forward(logits).Compute(logits)
logitValues := logits.Floats()
// Sample next token
nextToken, err := sampler.Sample(logitValues)
if err != nil {
ctx.Close()
log.Fatal(err)
}
// Close context before moving to next iteration
ctx.Close()
generated = append(generated, nextToken)
tokens = append(tokens, nextToken)
// Decode and print
decoded, _ := tp.Decode([]int32{nextToken})
fmt.Print(decoded)
// Stop on EOS
if nextToken == 2 || nextToken == 1 { // Common EOS tokens
break
}
}
fmt.Println("\n\n✅ Generation completed!")
fullText, _ := tp.Decode(generated)
fmt.Printf("Generated: %s\n", fullText)
}

View File

@@ -1,469 +0,0 @@
package parsers
import (
"context"
"fmt"
"log/slog"
"regexp"
"strconv"
"strings"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
)
type olmo3ParserState int
const (
olmo3StateContent olmo3ParserState = iota
olmo3StateToolCalls
olmo3StateToolCallsDone
)
const (
olmo3FuncCallsOpenTag = "<function_calls>"
olmo3FuncCallsCloseTag = "</function_calls>"
)
type Olmo3Parser struct {
state olmo3ParserState
buffer strings.Builder
}
func (p *Olmo3Parser) HasToolSupport() bool {
return true
}
func (p *Olmo3Parser) HasThinkingSupport() bool {
return false
}
func (p *Olmo3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
p.state = olmo3StateContent
return tools
}
type olmo3ParserEvent interface {
isOlmo3ParserEvent()
}
type olmo3ParserEventContent struct {
content string
}
type olmo3ParserEventToolCalls struct {
calls []api.ToolCall
}
func (olmo3ParserEventContent) isOlmo3ParserEvent() {}
func (olmo3ParserEventToolCalls) isOlmo3ParserEvent() {}
func (p *Olmo3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
p.buffer.WriteString(s)
if done {
// Drain any remaining content
bufStr := p.buffer.String()
p.buffer.Reset()
if p.state == olmo3StateContent && len(bufStr) > 0 {
return bufStr, "", nil, nil
}
return "", "", nil, nil
}
events := p.parseEvents()
var contentSb strings.Builder
var allCalls []api.ToolCall
for _, event := range events {
switch event := event.(type) {
case olmo3ParserEventContent:
contentSb.WriteString(event.content)
case olmo3ParserEventToolCalls:
allCalls = append(allCalls, event.calls...)
}
}
return contentSb.String(), "", allCalls, nil
}
func (p *Olmo3Parser) parseEvents() []olmo3ParserEvent {
var all []olmo3ParserEvent
keepLooping := true
for keepLooping {
var events []olmo3ParserEvent
events, keepLooping = p.eat()
if len(events) > 0 {
all = append(all, events...)
}
}
if len(all) > 0 {
slog.Log(context.TODO(), logutil.LevelTrace, "olmo3 events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
}
return all
}
func (p *Olmo3Parser) eat() ([]olmo3ParserEvent, bool) {
var events []olmo3ParserEvent
bufStr := p.buffer.String()
if bufStr == "" {
return events, false
}
switch p.state {
case olmo3StateContent:
if strings.Contains(bufStr, olmo3FuncCallsOpenTag) {
// Found <function_calls> tag
split := strings.SplitN(bufStr, olmo3FuncCallsOpenTag, 2)
content := split[0]
remaining := split[1]
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = olmo3StateToolCalls
if len(content) > 0 {
events = append(events, olmo3ParserEventContent{content: content})
}
return events, true
} else if overlapLen := overlap(bufStr, olmo3FuncCallsOpenTag); overlapLen > 0 {
// Partial <function_calls> tag - withhold ambiguous content
unambiguous := bufStr[:len(bufStr)-overlapLen]
ambiguous := bufStr[len(bufStr)-overlapLen:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, olmo3ParserEventContent{content: unambiguous})
}
return events, false
} else {
// Regular content - emit all
p.buffer.Reset()
if len(bufStr) > 0 {
events = append(events, olmo3ParserEventContent{content: bufStr})
}
return events, false
}
case olmo3StateToolCalls:
if strings.Contains(bufStr, olmo3FuncCallsCloseTag) {
// Found </function_calls> tag
split := strings.SplitN(bufStr, olmo3FuncCallsCloseTag, 2)
toolCallsStr := split[0]
remaining := split[1]
p.buffer.Reset()
p.buffer.WriteString(remaining)
p.state = olmo3StateToolCallsDone
// Parse the function calls
calls, err := parseOlmo3FunctionCalls(toolCallsStr)
if err != nil {
slog.Log(context.TODO(), logutil.LevelTrace, "failed to parse olmo3 function calls", "error", err, "content", toolCallsStr)
} else if len(calls) > 0 {
events = append(events, olmo3ParserEventToolCalls{calls: calls})
}
return events, true
} else if overlapLen := overlap(bufStr, olmo3FuncCallsCloseTag); overlapLen > 0 {
// Partial </function_calls> tag - wait for more
return events, false
}
// Still collecting tool calls, wait for close tag
return events, false
case olmo3StateToolCallsDone:
// After tool calls, emit remaining content
p.buffer.Reset()
p.state = olmo3StateContent
if len(bufStr) > 0 {
events = append(events, olmo3ParserEventContent{content: bufStr})
}
return events, false
}
return events, false
}
// parseOlmo3FunctionCalls parses function calls in Python-esque format:
// func_name(arg1="value1", arg2=123)
// Multiple calls are separated by newlines
func parseOlmo3FunctionCalls(s string) ([]api.ToolCall, error) {
var calls []api.ToolCall
s = strings.TrimSpace(s)
if s == "" {
return calls, nil
}
// Split by newlines for multiple function calls
lines := strings.Split(s, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
call, err := parseOlmo3SingleFunctionCall(line)
if err != nil {
return nil, fmt.Errorf("failed to parse function call %q: %w", line, err)
}
calls = append(calls, call)
}
return calls, nil
}
// Regex to match function call: func_name(args)
var funcCallRegex = regexp.MustCompile(`^(\w+)\((.*)\)$`)
// Regex to match a single argument: key=value
// Value can be: "string", 'string', number, true, false, null, or nested structures
var argRegex = regexp.MustCompile(`^(\w+)=(.+)$`)
func parseOlmo3SingleFunctionCall(s string) (api.ToolCall, error) {
matches := funcCallRegex.FindStringSubmatch(s)
if matches == nil {
return api.ToolCall{}, fmt.Errorf("invalid function call format")
}
funcName := matches[1]
argsStr := matches[2]
args, err := parseOlmo3Arguments(argsStr)
if err != nil {
return api.ToolCall{}, fmt.Errorf("failed to parse arguments: %w", err)
}
return api.ToolCall{
Function: api.ToolCallFunction{
Name: funcName,
Arguments: args,
},
}, nil
}
// parseOlmo3Arguments parses comma-separated key=value pairs
// Handles nested parentheses, brackets, braces, and quoted strings
func parseOlmo3Arguments(s string) (map[string]any, error) {
args := make(map[string]any)
s = strings.TrimSpace(s)
if s == "" {
return args, nil
}
// Split by commas, but respect nested structures and quotes
parts := splitArguments(s)
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}
// Find the first = sign
eqIdx := strings.Index(part, "=")
if eqIdx == -1 {
return nil, fmt.Errorf("invalid argument format: %s", part)
}
key := strings.TrimSpace(part[:eqIdx])
valueStr := strings.TrimSpace(part[eqIdx+1:])
value, err := parseOlmo3Value(valueStr)
if err != nil {
return nil, fmt.Errorf("failed to parse value for %s: %w", key, err)
}
args[key] = value
}
return args, nil
}
// splitArguments splits arguments by commas, respecting quotes and nested structures
func splitArguments(s string) []string {
var parts []string
var current strings.Builder
depth := 0
inString := false
stringChar := byte(0)
escaped := false
for i := 0; i < len(s); i++ {
c := s[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' && inString {
current.WriteByte(c)
escaped = true
continue
}
if (c == '"' || c == '\'') && !inString {
inString = true
stringChar = c
current.WriteByte(c)
continue
}
if c == stringChar && inString {
inString = false
stringChar = 0
current.WriteByte(c)
continue
}
if !inString {
switch c {
case '(', '[', '{':
depth++
current.WriteByte(c)
case ')', ']', '}':
depth--
current.WriteByte(c)
case ',':
if depth == 0 {
parts = append(parts, current.String())
current.Reset()
continue
}
current.WriteByte(c)
default:
current.WriteByte(c)
}
} else {
current.WriteByte(c)
}
}
if current.Len() > 0 {
parts = append(parts, current.String())
}
return parts
}
// parseOlmo3Value parses a value which can be a string, number, boolean, null, array, or object
func parseOlmo3Value(s string) (any, error) {
s = strings.TrimSpace(s)
// Check for quoted string
if (strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`)) ||
(strings.HasPrefix(s, `'`) && strings.HasSuffix(s, `'`)) {
// Remove quotes and unescape
inner := s[1 : len(s)-1]
return unescapeString(inner), nil
}
// Check for boolean
if s == "true" || s == "True" {
return true, nil
}
if s == "false" || s == "False" {
return false, nil
}
// Check for null/None
if s == "null" || s == "None" || s == "nil" {
return nil, nil
}
// Check for number
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
return i, nil
}
if f, err := strconv.ParseFloat(s, 64); err == nil {
return f, nil
}
// Check for array [...]
if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
return parseOlmo3Array(s[1 : len(s)-1])
}
// Check for object {...}
if strings.HasPrefix(s, "{") && strings.HasSuffix(s, "}") {
return parseOlmo3Object(s[1 : len(s)-1])
}
// Default to string without quotes
return s, nil
}
func parseOlmo3Array(s string) ([]any, error) {
s = strings.TrimSpace(s)
if s == "" {
return []any{}, nil
}
parts := splitArguments(s)
var arr []any
for _, part := range parts {
val, err := parseOlmo3Value(part)
if err != nil {
return nil, err
}
arr = append(arr, val)
}
return arr, nil
}
func parseOlmo3Object(s string) (map[string]any, error) {
s = strings.TrimSpace(s)
if s == "" {
return map[string]any{}, nil
}
// Objects use key: value or "key": value format
obj := make(map[string]any)
parts := splitArguments(s)
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}
// Find colon separator
colonIdx := strings.Index(part, ":")
if colonIdx == -1 {
return nil, fmt.Errorf("invalid object entry: %s", part)
}
keyStr := strings.TrimSpace(part[:colonIdx])
valueStr := strings.TrimSpace(part[colonIdx+1:])
// Remove quotes from key if present
if (strings.HasPrefix(keyStr, `"`) && strings.HasSuffix(keyStr, `"`)) ||
(strings.HasPrefix(keyStr, `'`) && strings.HasSuffix(keyStr, `'`)) {
keyStr = keyStr[1 : len(keyStr)-1]
}
val, err := parseOlmo3Value(valueStr)
if err != nil {
return nil, fmt.Errorf("failed to parse value for key %s: %w", keyStr, err)
}
obj[keyStr] = val
}
return obj, nil
}
func unescapeString(s string) string {
// Handle common escape sequences
s = strings.ReplaceAll(s, `\\`, "\x00") // Placeholder for backslash
s = strings.ReplaceAll(s, `\"`, `"`)
s = strings.ReplaceAll(s, `\'`, `'`)
s = strings.ReplaceAll(s, `\n`, "\n")
s = strings.ReplaceAll(s, `\t`, "\t")
s = strings.ReplaceAll(s, `\r`, "\r")
s = strings.ReplaceAll(s, "\x00", `\`) // Restore backslash
return s
}

View File

@@ -1,483 +0,0 @@
package parsers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestOlmo3Parser(t *testing.T) {
tests := []struct {
name string
input string
expectedContent string
expectedThinking string
expectedCalls []api.ToolCall
}{
{
name: "simple content",
input: "Hello, how can I help you?",
expectedContent: "Hello, how can I help you?",
},
{
name: "simple tool call",
input: `<function_calls>get_weather(location="San Francisco")</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "San Francisco"},
},
},
},
},
{
name: "content then tool call",
input: `Let me check the weather.<function_calls>get_weather(location="NYC")</function_calls>`,
expectedContent: "Let me check the weather.",
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "NYC"},
},
},
},
},
{
name: "tool call with multiple arguments",
input: `<function_calls>book_flight(from="SFO", to="NYC", date="2024-01-15")</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "book_flight",
Arguments: map[string]any{
"from": "SFO",
"to": "NYC",
"date": "2024-01-15",
},
},
},
},
},
{
name: "multiple tool calls",
input: `<function_calls>get_weather(location="San Francisco")
get_weather(location="New York")</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "San Francisco"},
},
},
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "New York"},
},
},
},
},
{
name: "tool call with numeric argument",
input: `<function_calls>set_temperature(value=72)</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "set_temperature",
Arguments: map[string]any{"value": int64(72)},
},
},
},
},
{
name: "tool call with float argument",
input: `<function_calls>set_price(amount=19.99)</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "set_price",
Arguments: map[string]any{"amount": 19.99},
},
},
},
},
{
name: "tool call with boolean argument",
input: `<function_calls>toggle_setting(enabled=true)</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "toggle_setting",
Arguments: map[string]any{"enabled": true},
},
},
},
},
{
name: "tool call with null argument",
input: `<function_calls>clear_value(field=null)</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "clear_value",
Arguments: map[string]any{"field": nil},
},
},
},
},
{
name: "tool call with array argument",
input: `<function_calls>process_items(items=["apple", "banana", "cherry"])</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "process_items",
Arguments: map[string]any{"items": []any{"apple", "banana", "cherry"}},
},
},
},
},
{
name: "tool call with dict argument",
input: `<function_calls>update_config(settings={"theme": "dark", "fontSize": 14})</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "update_config",
Arguments: map[string]any{
"settings": map[string]any{
"theme": "dark",
"fontSize": int64(14),
},
},
},
},
},
},
{
name: "tool call with nested dict",
input: `<function_calls>create_request(data={"user": {"name": "John", "age": 30}, "active": true})</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "create_request",
Arguments: map[string]any{
"data": map[string]any{
"user": map[string]any{
"name": "John",
"age": int64(30),
},
"active": true,
},
},
},
},
},
},
{
name: "tool call with no arguments",
input: `<function_calls>get_current_time()</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_current_time",
Arguments: map[string]any{},
},
},
},
},
{
name: "tool call with single quotes",
input: `<function_calls>search(query='hello world')</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: map[string]any{"query": "hello world"},
},
},
},
},
{
name: "tool call with escaped quotes",
input: `<function_calls>search(query="say \"hello\"")</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: map[string]any{"query": `say "hello"`},
},
},
},
},
{
name: "tool call with mixed argument types",
input: `<function_calls>create_user(name="John", age=30, active=true)</function_calls>`,
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "create_user",
Arguments: map[string]any{
"name": "John",
"age": int64(30),
"active": true,
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := &Olmo3Parser{}
p.Init(nil, nil, nil)
content, thinking, calls, err := p.Add(tt.input, false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Drain remaining content
finalContent, finalThinking, finalCalls, err := p.Add("", true)
if err != nil {
t.Fatalf("unexpected error on done: %v", err)
}
content += finalContent
thinking += finalThinking
calls = append(calls, finalCalls...)
if diff := cmp.Diff(content, tt.expectedContent); diff != "" {
t.Errorf("content mismatch (-got +want):\n%s", diff)
}
if diff := cmp.Diff(thinking, tt.expectedThinking); diff != "" {
t.Errorf("thinking mismatch (-got +want):\n%s", diff)
}
if diff := cmp.Diff(calls, tt.expectedCalls); diff != "" {
t.Errorf("calls mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestOlmo3Parser_Streaming(t *testing.T) {
tests := []struct {
name string
chunks []string
expectedContent string
expectedCalls []api.ToolCall
}{
{
name: "streaming content",
chunks: []string{"Hello, ", "how ", "can I help?"},
expectedContent: "Hello, how can I help?",
},
{
name: "streaming tool call",
chunks: []string{"<function_", "calls>get_weather", "(location=\"SF\")", "</function_calls>"},
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "SF"},
},
},
},
},
{
name: "streaming content then tool call",
chunks: []string{"Let me check.", "<function_calls>", "get_weather(location=\"NYC\")", "</function_calls>"},
expectedContent: "Let me check.",
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "NYC"},
},
},
},
},
{
name: "tool call tag split across chunks",
chunks: []string{"<func", "tion_calls>test()</function_calls>"},
expectedCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "test",
Arguments: map[string]any{},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := &Olmo3Parser{}
p.Init(nil, nil, nil)
var allContent string
var allCalls []api.ToolCall
for _, chunk := range tt.chunks {
content, _, calls, err := p.Add(chunk, false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
allContent += content
allCalls = append(allCalls, calls...)
}
// Drain
content, _, calls, err := p.Add("", true)
if err != nil {
t.Fatalf("unexpected error on done: %v", err)
}
allContent += content
allCalls = append(allCalls, calls...)
if diff := cmp.Diff(allContent, tt.expectedContent); diff != "" {
t.Errorf("content mismatch (-got +want):\n%s", diff)
}
if diff := cmp.Diff(allCalls, tt.expectedCalls); diff != "" {
t.Errorf("calls mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestOlmo3Parser_HasToolSupport(t *testing.T) {
p := &Olmo3Parser{}
if !p.HasToolSupport() {
t.Error("expected HasToolSupport to return true")
}
}
func TestOlmo3Parser_HasThinkingSupport(t *testing.T) {
p := &Olmo3Parser{}
if p.HasThinkingSupport() {
t.Error("expected HasThinkingSupport to return false")
}
}
func TestParseOlmo3FunctionCalls(t *testing.T) {
tests := []struct {
name string
input string
expected []api.ToolCall
wantErr bool
}{
{
name: "simple call",
input: `get_weather(location="SF")`,
expected: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "SF"},
},
},
},
},
{
name: "multiple args",
input: `send_email(to="user@example.com", subject="Hello", body="Test message")`,
expected: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "send_email",
Arguments: map[string]any{
"to": "user@example.com",
"subject": "Hello",
"body": "Test message",
},
},
},
},
},
{
name: "multiple calls with newlines",
input: `get_weather(location="SF")
get_time(timezone="PST")`,
expected: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "SF"},
},
},
{
Function: api.ToolCallFunction{
Name: "get_time",
Arguments: map[string]any{"timezone": "PST"},
},
},
},
},
{
name: "empty input",
input: "",
expected: nil,
},
{
name: "whitespace only",
input: " \n ",
expected: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
calls, err := parseOlmo3FunctionCalls(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("parseOlmo3FunctionCalls() error = %v, wantErr %v", err, tt.wantErr)
return
}
if diff := cmp.Diff(calls, tt.expected); diff != "" {
t.Errorf("calls mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestParseOlmo3Value(t *testing.T) {
tests := []struct {
name string
input string
expected any
}{
{"string double quotes", `"hello"`, "hello"},
{"string single quotes", `'hello'`, "hello"},
{"integer", "42", int64(42)},
{"negative integer", "-10", int64(-10)},
{"float", "3.14", 3.14},
{"boolean true", "true", true},
{"boolean True", "True", true},
{"boolean false", "false", false},
{"null", "null", nil},
{"None", "None", nil},
{"empty array", "[]", []any{}},
{"array with strings", `["a", "b"]`, []any{"a", "b"}},
{"array with numbers", "[1, 2, 3]", []any{int64(1), int64(2), int64(3)}},
{"empty object", "{}", map[string]any{}},
{"simple object", `{"name": "John"}`, map[string]any{"name": "John"}},
{"object with number", `{"age": 30}`, map[string]any{"age": int64(30)}},
{"object with multiple keys", `{"a": 1, "b": 2}`, map[string]any{"a": int64(1), "b": int64(2)}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := parseOlmo3Value(tt.input)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if diff := cmp.Diff(result, tt.expected); diff != "" {
t.Errorf("value mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@@ -54,8 +54,6 @@ func ParserForName(name string) Parser {
return harmony.NewHarmonyMessageHandler()
case "cogito":
return &CogitoParser{}
case "olmo3":
return &Olmo3Parser{}
default:
return nil
}

View File

@@ -1,147 +0,0 @@
package renderers
import (
"encoding/json"
"fmt"
"sort"
"strings"
"github.com/ollama/ollama/api"
)
const (
olmo3DefaultSystemMessage = "You are a helpful function-calling AI assistant. "
olmo3NoFunctionsMessage = "You do not currently have access to any functions. "
olmo3WithFunctionsMessage = "You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions."
)
type Olmo3Renderer struct{}
func (r *Olmo3Renderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
var systemMessage *api.Message
filteredMessages := make([]api.Message, 0, len(messages))
for i, message := range messages {
if message.Role == "system" {
if systemMessage == nil {
systemMessage = &messages[i]
}
continue
}
filteredMessages = append(filteredMessages, message)
}
// Render system message
if systemMessage != nil {
// Custom system message - single newline after "system"
sb.WriteString("<|im_start|>system\n")
sb.WriteString(systemMessage.Content)
if len(tools) > 0 {
functionsJSON, err := marshalWithSpaces(tools)
if err != nil {
return "", err
}
sb.WriteString("<functions>")
sb.WriteString(string(functionsJSON))
sb.WriteString("</functions>")
}
sb.WriteString("<|im_end|>\n")
} else {
// Default system message - single newline after "system"
sb.WriteString("<|im_start|>system\n")
sb.WriteString(olmo3DefaultSystemMessage)
if len(tools) > 0 {
functionsJSON, err := marshalWithSpaces(tools)
if err != nil {
return "", err
}
sb.WriteString(olmo3WithFunctionsMessage)
sb.WriteString("<functions>")
sb.WriteString(string(functionsJSON))
sb.WriteString("</functions>")
} else {
sb.WriteString(olmo3NoFunctionsMessage)
sb.WriteString("<functions></functions>")
}
sb.WriteString("<|im_end|>\n")
}
for i, message := range filteredMessages {
lastMessage := i == len(filteredMessages)-1
switch message.Role {
case "user":
sb.WriteString("<|im_start|>user\n")
sb.WriteString(message.Content)
sb.WriteString("<|im_end|>\n")
case "assistant":
sb.WriteString("<|im_start|>assistant\n")
if message.Content != "" {
sb.WriteString(message.Content)
}
if len(message.ToolCalls) > 0 {
sb.WriteString("<function_calls>")
for j, tc := range message.ToolCalls {
// Format as function_name(arg1="value1", arg2="value2")
sb.WriteString(tc.Function.Name)
sb.WriteString("(")
// Get sorted keys for deterministic output
keys := make([]string, 0, len(tc.Function.Arguments))
for k := range tc.Function.Arguments {
keys = append(keys, k)
}
sort.Strings(keys)
for k, key := range keys {
if k > 0 {
sb.WriteString(", ")
}
value, err := json.Marshal(tc.Function.Arguments[key])
if err != nil {
return "", err
}
sb.WriteString(fmt.Sprintf("%s=%s", key, string(value)))
}
sb.WriteString(")")
if j < len(message.ToolCalls)-1 {
sb.WriteString("\n")
}
}
sb.WriteString("</function_calls>")
}
// Add end tag unless it's the last message with content only (prefill)
if !lastMessage || len(message.ToolCalls) > 0 {
sb.WriteString("<|im_end|>\n")
}
case "tool":
sb.WriteString("<|im_start|>environment\n")
sb.WriteString(message.Content)
sb.WriteString("<|im_end|>\n")
}
}
// Add generation prompt if needed
needsGenerationPrompt := true
if len(filteredMessages) > 0 {
lastMsg := filteredMessages[len(filteredMessages)-1]
if lastMsg.Role == "assistant" && len(lastMsg.ToolCalls) == 0 && lastMsg.Content != "" {
needsGenerationPrompt = false
}
}
if needsGenerationPrompt {
sb.WriteString("<|im_start|>assistant\n")
}
return sb.String(), nil
}

View File

@@ -1,290 +0,0 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestOlmo3Renderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
tools []api.Tool
expected string
}{
{
name: "basic without system - adds default system",
msgs: []api.Message{
{Role: "user", Content: "Hello!"},
},
expected: "<|im_start|>system\n" +
"You are a helpful function-calling AI assistant. You do not currently have access to any functions. <functions></functions><|im_end|>\n" +
"<|im_start|>user\n" +
"Hello!<|im_end|>\n" +
"<|im_start|>assistant\n\n",
},
{
name: "with system message no tools",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello!"},
},
expected: "<|im_start|>system\n" +
"You are a helpful assistant.<|im_end|>\n" +
"<|im_start|>user\n" +
"Hello!<|im_end|>\n" +
"<|im_start|>assistant\n\n",
},
{
name: "with system message and tools",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "What is the weather?"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get the current weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}, Description: "The city"},
},
},
},
},
},
expected: "<|im_start|>system\n" +
`You are a helpful assistant.<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
"<|im_start|>user\n" +
"What is the weather?<|im_end|>\n" +
"<|im_start|>assistant\n\n",
},
{
name: "default system with tools - includes function instruction",
msgs: []api.Message{
{Role: "user", Content: "What is the weather?"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get the current weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}, Description: "The city"},
},
},
},
},
},
expected: "<|im_start|>system\n" +
"You are a helpful function-calling AI assistant. " +
"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
`<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
"<|im_start|>user\n" +
"What is the weather?<|im_end|>\n" +
"<|im_start|>assistant\n\n",
},
{
name: "assistant with tool calls - function call syntax",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "What is the weather in SF?"},
{
Role: "assistant",
Content: "Let me check the weather.",
ToolCalls: []api.ToolCall{
{
ID: "call_1",
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{
"location": "San Francisco",
},
},
},
},
},
{Role: "tool", Content: `{"temperature": 68}`, ToolName: "get_weather"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Description: "Get the current weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}, Description: "The city"},
},
},
},
},
},
expected: "<|im_start|>system\n" +
`You are a helpful assistant.<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
"<|im_start|>user\n" +
"What is the weather in SF?<|im_end|>\n" +
"<|im_start|>assistant\n" +
`Let me check the weather.<function_calls>get_weather(location="San Francisco")</function_calls><|im_end|>` + "\n" +
"<|im_start|>environment\n" +
`{"temperature": 68}<|im_end|>` + "\n" +
"<|im_start|>assistant\n\n",
},
{
name: "multi-turn conversation",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi there!"},
{Role: "user", Content: "How are you?"},
},
expected: "<|im_start|>system\n" +
"You are a helpful assistant.<|im_end|>\n" +
"<|im_start|>user\n" +
"Hello<|im_end|>\n" +
"<|im_start|>assistant\n" +
"Hi there!<|im_end|>\n" +
"<|im_start|>user\n" +
"How are you?<|im_end|>\n" +
"<|im_start|>assistant\n\n",
},
{
name: "parallel tool calls - newline separated",
msgs: []api.Message{
{Role: "user", Content: "Get weather in SF and NYC"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
ID: "call_1",
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "San Francisco"},
},
},
{
ID: "call_2",
Function: api.ToolCallFunction{
Name: "get_weather",
Arguments: map[string]any{"location": "New York"},
},
},
},
},
{Role: "tool", Content: `{"temperature": 68}`, ToolName: "get_weather"},
{Role: "tool", Content: `{"temperature": 55}`, ToolName: "get_weather"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get_weather",
Parameters: api.ToolFunctionParameters{
Type: "object",
Properties: map[string]api.ToolProperty{
"location": {Type: api.PropertyType{"string"}},
},
},
},
},
},
expected: "<|im_start|>system\n" +
"You are a helpful function-calling AI assistant. " +
"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
`<functions>[{"type": "function", "function": {"name": "get_weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}}}}]</functions><|im_end|>` + "\n" +
"<|im_start|>user\n" +
"Get weather in SF and NYC<|im_end|>\n" +
"<|im_start|>assistant\n" +
`<function_calls>get_weather(location="San Francisco")` + "\n" +
`get_weather(location="New York")</function_calls><|im_end|>` + "\n" +
"<|im_start|>environment\n" +
`{"temperature": 68}<|im_end|>` + "\n" +
"<|im_start|>environment\n" +
`{"temperature": 55}<|im_end|>` + "\n" +
"<|im_start|>assistant\n\n",
},
{
name: "tool call with multiple arguments",
msgs: []api.Message{
{Role: "user", Content: "Book a flight"},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
ID: "call_1",
Function: api.ToolCallFunction{
Name: "book_flight",
Arguments: map[string]any{
"from": "SFO",
"to": "NYC",
},
},
},
},
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "book_flight",
Parameters: api.ToolFunctionParameters{
Type: "object",
Properties: map[string]api.ToolProperty{
"from": {Type: api.PropertyType{"string"}},
"to": {Type: api.PropertyType{"string"}},
},
},
},
},
},
expected: "<|im_start|>system\n" +
"You are a helpful function-calling AI assistant. " +
"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
`<functions>[{"type": "function", "function": {"name": "book_flight", "parameters": {"type": "object", "properties": {"from": {"type": "string"}, "to": {"type": "string"}}}}}]</functions><|im_end|>` + "\n" +
"<|im_start|>user\n" +
"Book a flight<|im_end|>\n" +
"<|im_start|>assistant\n" +
`<function_calls>book_flight(from="SFO", to="NYC")</function_calls><|im_end|>` + "\n" +
"<|im_start|>assistant\n\n",
},
{
name: "assistant prefill - no generation prompt",
msgs: []api.Message{
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi there!"},
},
expected: "<|im_start|>system\n" +
"You are a helpful function-calling AI assistant. You do not currently have access to any functions. <functions></functions><|im_end|>\n" +
"<|im_start|>user\n" +
"Hello<|im_end|>\n" +
"<|im_start|>assistant\n" +
"Hi there!",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := (&Olmo3Renderer{}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@@ -59,9 +59,6 @@ func rendererForName(name string) Renderer {
case "cogito":
renderer := &CogitoRenderer{isThinking: true}
return renderer
case "olmo3":
renderer := &Olmo3Renderer{}
return renderer
default:
return nil
}

View File

@@ -110,7 +110,6 @@ func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.Thi
if err != nil {
return "", err
}
slog.Debug("rendered prompt", "renderer", m.Config.Renderer, "prompt", rendered)
return rendered, nil
}