wip

script to render templates
win: warn if ggml-base detected in PATH (#13289 )
2026-02-20 08:16:07 -05:00 · 2025-12-02 12:17:36 -08:00 · 2025-12-01 18:03:04 -08:00 · 2025-12-01 15:36:47 -08:00 · 2025-12-01 15:10:16 -08:00 · 2025-12-01 12:48:16 -08:00
21 changed files with 728 additions and 2061 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
 ml/backend/**/*.glsl linguist-vendored
 ml/backend/**/CMakeLists.txt linguist-vendored

+app/webview linguist-vendored
+
 llama/build-info.cpp linguist-generated
 ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

--- a/api/client.go
+++ b/api/client.go
@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f

 		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
-			return fmt.Errorf("unmarshal: %w", err)
+			if response.StatusCode >= http.StatusBadRequest {
+				return StatusError{
+					StatusCode:   response.StatusCode,
+					Status:       response.Status,
+					ErrorMessage: string(bts),
+				}
+			}
+			return errors.New(string(bts))
 		}

 		if response.StatusCode == http.StatusUnauthorized {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
 type testError struct {
 	message    string
 	statusCode int
+	raw        bool // if true, write message as-is instead of JSON encoding
 }

 func (e testError) Error() string {
@@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "plain text error response",
+			responses: []any{
+				"internal server error",
+			},
+			wantErr: "internal server error",
+		},
+		{
+			name: "HTML error page",
+			responses: []any{
+				"<html><body>404 Not Found</body></html>",
+			},
+			wantErr: "404 Not Found",
+		},
 	}

 	for _, tc := range testCases {
@@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
 						return
 					}

+					if str, ok := resp.(string); ok {
+						fmt.Fprintln(w, str)
+						flusher.Flush()
+						continue
+					}
+
 					if err := json.NewEncoder(w).Encode(resp); err != nil {
 						t.Fatalf("failed to encode response: %v", err)
 					}
@@ -173,9 +194,10 @@ func TestClientStream(t *testing.T) {

 func TestClientDo(t *testing.T) {
 	testCases := []struct {
-		name     string
-		response any
-		wantErr  string
+		name           string
+		response       any
+		wantErr        string
+		wantStatusCode int
 	}{
 		{
 			name: "immediate error response",
@@ -183,7 +205,8 @@ func TestClientDo(t *testing.T) {
 				message:    "test error message",
 				statusCode: http.StatusBadRequest,
 			},
-			wantErr: "test error message",
+			wantErr:        "test error message",
+			wantStatusCode: http.StatusBadRequest,
 		},
 		{
 			name: "server error response",
@@ -191,7 +214,8 @@ func TestClientDo(t *testing.T) {
 				message:    "internal error",
 				statusCode: http.StatusInternalServerError,
 			},
-			wantErr: "internal error",
+			wantErr:        "internal error",
+			wantStatusCode: http.StatusInternalServerError,
 		},
 		{
 			name: "successful response",
@@ -203,6 +227,26 @@ func TestClientDo(t *testing.T) {
 				Success: true,
 			},
 		},
+		{
+			name: "plain text error response",
+			response: testError{
+				message:    "internal server error",
+				statusCode: http.StatusInternalServerError,
+				raw:        true,
+			},
+			wantErr:        "internal server error",
+			wantStatusCode: http.StatusInternalServerError,
+		},
+		{
+			name: "HTML error page",
+			response: testError{
+				message:    "<html><body>404 Not Found</body></html>",
+				statusCode: http.StatusNotFound,
+				raw:        true,
+			},
+			wantErr:        "<html><body>404 Not Found</body></html>",
+			wantStatusCode: http.StatusNotFound,
+		},
 	}

 	for _, tc := range testCases {
@@ -210,11 +254,16 @@ func TestClientDo(t *testing.T) {
 			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				if errResp, ok := tc.response.(testError); ok {
 					w.WriteHeader(errResp.statusCode)
-					err := json.NewEncoder(w).Encode(map[string]string{
-						"error": errResp.message,
-					})
-					if err != nil {
-						t.Fatal("failed to encode error response:", err)
+					if !errResp.raw {
+						err := json.NewEncoder(w).Encode(map[string]string{
+							"error": errResp.message,
+						})
+						if err != nil {
+							t.Fatal("failed to encode error response:", err)
+						}
+					} else {
+						// Write raw message (simulates non-JSON error responses)
+						fmt.Fprint(w, errResp.message)
 					}
 					return
 				}
@@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
 				if err.Error() != tc.wantErr {
 					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
 				}
+				if tc.wantStatusCode != 0 {
+					if statusErr, ok := err.(StatusError); ok {
+						if statusErr.StatusCode != tc.wantStatusCode {
+							t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
+						}
+					} else {
+						t.Errorf("expected StatusError, got %T", err)
+					}
+				}
 				return
 			}

--- a/cmd/chat_template/chat_template.py
+++ b/cmd/chat_template/chat_template.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "transformers>=4.57.0",
+#   "jinja2",
+#   "fastapi",
+#   "uvicorn",
+#   "pydantic",
+#   "requests",
+# ]
+# ///
+"""
+Chat Template Testing Tool
+
+Test HuggingFace chat templates against Ollama renderers.
+
+Usage:
+    # Run predefined test cases against a HuggingFace model
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3
+
+    # Compare HuggingFace output with Ollama renderer
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --ollama-model intellect3
+
+    # Start server for manual curl testing
+    uv run cmd/chat_template/chat_template.py --serve
+
+    # Show chat template for a model
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --show-template
+"""
+
+import argparse
+import json
+import sys
+from typing import Any
+
+from transformers import AutoTokenizer
+
+
+TEST_CASES = [
+    {
+        "name": "basic_user_message",
+        "messages": [{"role": "user", "content": "Hello!"}],
+        "tools": None,
+    },
+    {
+        "name": "with_system_message",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "multi_turn_conversation",
+        "messages": [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "user", "content": "How are you?"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "with_tools",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "What is the weather?"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the current weather",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["location"],
+                        "properties": {
+                            "location": {"type": "string", "description": "The city"}
+                        },
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "tool_call_and_response",
+        "messages": [
+            {"role": "user", "content": "What is the weather in SF?"},
+            {
+                "role": "assistant",
+                "content": "Let me check the weather.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "San Francisco"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the current weather",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["location"],
+                        "properties": {
+                            "location": {"type": "string", "description": "The city"}
+                        },
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "parallel_tool_calls",
+        "messages": [
+            {"role": "user", "content": "Get weather in SF and NYC"},
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "San Francisco"},
+                        },
+                    },
+                    {
+                        "id": "call_2",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "New York"},
+                        },
+                    },
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
+            {"role": "tool", "content": '{"temperature": 55}', "tool_call_id": "call_2"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    },
+    # Thinking tests
+    {
+        "name": "assistant_with_thinking",
+        "messages": [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "The answer is 4.",
+                "thinking": "Let me calculate: 2 + 2 = 4. This is basic arithmetic.",
+            },
+            {"role": "user", "content": "And 3+3?"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "thinking_with_tool_call",
+        "messages": [
+            {"role": "user", "content": "What's the weather in Paris?"},
+            {
+                "role": "assistant",
+                "content": "I'll check the weather for you.",
+                "thinking": "The user wants to know the weather in Paris. I should call the get_weather function.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "Paris"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 18, "condition": "cloudy"}', "tool_call_id": "call_1"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get current weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "thinking_only_no_content",
+        "messages": [
+            {"role": "user", "content": "Think about this silently."},
+            {
+                "role": "assistant",
+                "content": "",  # HuggingFace requires content field
+                "thinking": "I'm thinking about this but won't respond with visible content.",
+            },
+            {"role": "user", "content": "What did you think?"},
+        ],
+        "tools": None,
+    },
+]
+
+# Cache for tokenizers
+_tokenizer_cache: dict[str, Any] = {}
+
+
+def get_tokenizer(model_name: str):
+    """Get or create tokenizer for the given model."""
+    if model_name not in _tokenizer_cache:
+        print(f"Loading tokenizer for {model_name}...", file=sys.stderr)
+        _tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(model_name)
+    return _tokenizer_cache[model_name]
+
+
+def apply_template(
+    model: str,
+    messages: list[dict],
+    tools: list[dict] | None = None,
+) -> str:
+    """Apply HuggingFace chat template to messages."""
+    tokenizer = get_tokenizer(model)
+
+    if tools:
+        return tokenizer.apply_chat_template(
+            messages,
+            tools=tools,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+    else:
+        return tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+
+
+def get_ollama_prompt(
+    ollama_model: str,
+    messages: list[dict],
+    tools: list[dict] | None = None,
+    ollama_host: str = "http://localhost:11434",
+) -> str | None:
+    """Get rendered prompt from Ollama using debug_render_only."""
+    import requests
+
+    # Convert messages to Ollama format
+    ollama_messages = []
+    for msg in messages:
+        ollama_msg = {"role": msg["role"]}
+        if "content" in msg:
+            ollama_msg["content"] = msg["content"]
+        if "thinking" in msg:
+            ollama_msg["thinking"] = msg["thinking"]
+        if "tool_calls" in msg:
+            # Convert tool_calls to Ollama format
+            tool_calls = []
+            for tc in msg["tool_calls"]:
+                tool_call = {
+                    "function": {
+                        "name": tc["function"]["name"],
+                        "arguments": tc["function"]["arguments"],
+                    }
+                }
+                if "id" in tc:
+                    tool_call["id"] = tc["id"]
+                tool_calls.append(tool_call)
+            ollama_msg["tool_calls"] = tool_calls
+        if "tool_call_id" in msg:
+            ollama_msg["tool_call_id"] = msg["tool_call_id"]
+        ollama_messages.append(ollama_msg)
+
+    payload = {
+        "model": ollama_model,
+        "messages": ollama_messages,
+        "stream": False,
+        "_debug_render_only": True,
+    }
+
+    if tools:
+        payload["tools"] = tools
+
+    try:
+        resp = requests.post(f"{ollama_host}/api/chat", json=payload, timeout=30)
+        resp.raise_for_status()
+        data = resp.json()
+        # Field name is _debug_info with underscore prefix
+        if "_debug_info" in data and "rendered_template" in data["_debug_info"]:
+            return data["_debug_info"]["rendered_template"]
+        return None
+    except requests.exceptions.ConnectionError:
+        print(f"  [ERROR] Cannot connect to Ollama at {ollama_host}", file=sys.stderr)
+        return None
+    except Exception as e:
+        print(f"  [ERROR] Ollama request failed: {e}", file=sys.stderr)
+        return None
+
+
+def compute_diff(hf_prompt: str, ollama_prompt: str) -> str:
+    """Compute a unified diff between HuggingFace and Ollama prompts."""
+    import difflib
+
+    hf_lines = hf_prompt.splitlines(keepends=True)
+    ollama_lines = ollama_prompt.splitlines(keepends=True)
+
+    diff = difflib.unified_diff(
+        ollama_lines,
+        hf_lines,
+        fromfile="Ollama",
+        tofile="HuggingFace",
+        lineterm="",
+    )
+    return "".join(diff)
+
+
+def print_test_output(
+    name: str,
+    messages: list[dict],
+    tools: list[dict] | None,
+    hf_prompt: str,
+    ollama_prompt: str | None = None,
+    as_repr: bool = False,
+):
+    """Print test output in a format suitable for Go test creation and LLM diffing."""
+    print(f"\n{'='*60}")
+    print(f"Test: {name}")
+    print("=" * 60)
+    print("\n--- Input Messages ---")
+    print(json.dumps(messages, indent=2))
+    if tools:
+        print("\n--- Tools ---")
+        print(json.dumps(tools, indent=2))
+
+    if ollama_prompt is not None:
+        # Comparison mode
+        if hf_prompt == ollama_prompt:
+            print("\n--- Result: MATCH ---")
+            print("\n--- Prompt (both identical) ---")
+            if as_repr:
+                print(repr(hf_prompt))
+            else:
+                print(hf_prompt)
+        else:
+            print("\n--- Result: MISMATCH ---")
+            print("\n--- HuggingFace Prompt ---")
+            if as_repr:
+                print(repr(hf_prompt))
+            else:
+                print(hf_prompt)
+            print("\n--- Ollama Prompt ---")
+            if as_repr:
+                print(repr(ollama_prompt))
+            else:
+                print(ollama_prompt)
+            print("\n--- Diff (Ollama -> HuggingFace) ---")
+            diff = compute_diff(hf_prompt, ollama_prompt)
+            if diff:
+                print(diff)
+            else:
+                print("(no line-level diff, check whitespace)")
+    else:
+        # HuggingFace only mode
+        print("\n--- HuggingFace Prompt ---")
+        if as_repr:
+            print(repr(hf_prompt))
+        else:
+            print(hf_prompt)
+
+    print("=" * 60)
+
+
+def run_tests(
+    model: str,
+    as_repr: bool = False,
+    test_filter: str | None = None,
+    ollama_model: str | None = None,
+    ollama_host: str = "http://localhost:11434",
+):
+    """Run all predefined test cases against a model."""
+    if ollama_model:
+        print(f"\nComparing HuggingFace ({model}) vs Ollama ({ollama_model})\n")
+    else:
+        print(f"\nRunning tests against: {model}\n")
+
+    matches = 0
+    mismatches = 0
+    errors = 0
+
+    for test_case in TEST_CASES:
+        name = test_case["name"]
+        messages = test_case["messages"]
+        tools = test_case["tools"]
+
+        # Filter tests if specified
+        if test_filter and test_filter.lower() not in name.lower():
+            continue
+
+        try:
+            hf_prompt = apply_template(model, messages, tools)
+
+            ollama_prompt = None
+            if ollama_model:
+                ollama_prompt = get_ollama_prompt(
+                    ollama_model, messages, tools, ollama_host
+                )
+                if ollama_prompt is None:
+                    errors += 1
+                elif hf_prompt == ollama_prompt:
+                    matches += 1
+                else:
+                    mismatches += 1
+
+            print_test_output(
+                name, messages, tools, hf_prompt, ollama_prompt, as_repr=as_repr
+            )
+        except Exception as e:
+            errors += 1
+            print(f"\n{'='*60}")
+            print(f"Test: {name} - FAILED")
+            print(f"--- Input Messages ---")
+            print(json.dumps(messages, indent=2))
+            if tools:
+                print(f"--- Tools ---")
+                print(json.dumps(tools, indent=2))
+            print(f"--- Error ---")
+            print(f"{e}")
+            print("=" * 60)
+
+    # Print summary if comparing
+    if ollama_model:
+        total = matches + mismatches + errors
+        print(f"\n{'='*60}")
+        print("SUMMARY")
+        print("=" * 60)
+        print(f"  Total:      {total}")
+        print(f"  Matches:    {matches}")
+        print(f"  Mismatches: {mismatches}")
+        print(f"  Errors:     {errors}")
+        print("=" * 60)
+
+
+def show_template(model: str):
+    """Show the chat template for a model."""
+    tokenizer = get_tokenizer(model)
+    print(f"\nChat template for {model}:\n")
+    print("-" * 60)
+    print(tokenizer.chat_template)
+    print("-" * 60)
+
+
+def start_server(host: str = "0.0.0.0", port: int = 8000):
+    """Start the FastAPI server for manual testing."""
+    from typing import Optional, List, Dict, Any as TypingAny
+
+    from fastapi import FastAPI, HTTPException
+    from pydantic import BaseModel
+    import uvicorn
+
+    class Message(BaseModel):
+        role: str
+        content: Optional[str] = None
+        tool_calls: Optional[List[Dict[str, TypingAny]]] = None
+        tool_call_id: Optional[str] = None
+
+    class GeneratePromptRequest(BaseModel):
+        messages: List[Message]
+        model: str = "PrimeIntellect/INTELLECT-3"
+        tools: Optional[List[Dict[str, TypingAny]]] = None
+        inject_tools_as_functions: bool = False
+
+    class GeneratePromptResponse(BaseModel):
+        prompt: str
+        model: str
+
+    app = FastAPI(title="HuggingFace Prompt Generator", version="1.0.0")
+
+    @app.post("/generate-prompt", response_model=GeneratePromptResponse)
+    async def generate_prompt(request: GeneratePromptRequest):
+        try:
+            messages = []
+            for msg in request.messages:
+                message_dict = {"role": msg.role}
+                if msg.content is not None:
+                    message_dict["content"] = msg.content
+                if msg.tool_calls is not None:
+                    tool_calls = []
+                    for tc in msg.tool_calls:
+                        tc_copy = tc.copy()
+                        if "function" in tc_copy and "arguments" in tc_copy["function"]:
+                            args = tc_copy["function"]["arguments"]
+                            if isinstance(args, str):
+                                try:
+                                    tc_copy["function"]["arguments"] = json.loads(args)
+                                except json.JSONDecodeError:
+                                    pass
+                        tool_calls.append(tc_copy)
+                    message_dict["tool_calls"] = tool_calls
+                if msg.tool_call_id is not None:
+                    message_dict["tool_call_id"] = msg.tool_call_id
+                messages.append(message_dict)
+
+            prompt = apply_template(request.model, messages, request.tools)
+            return GeneratePromptResponse(prompt=prompt, model=request.model)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+    @app.get("/health")
+    async def health_check():
+        return {"status": "healthy"}
+
+    print(f"Starting server on http://{host}:{port}")
+    print("Endpoints:")
+    print("  POST /generate-prompt - Generate prompt from messages")
+    print("  GET  /health          - Health check")
+    uvicorn.run(app, host=host, port=port)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="HuggingFace Prompt Testing Tool",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        help="HuggingFace model name (e.g., PrimeIntellect/INTELLECT-3)",
+    )
+    parser.add_argument(
+        "--ollama-model",
+        "-o",
+        type=str,
+        help="Ollama model name to compare against (e.g., qwen3-coder)",
+    )
+    parser.add_argument(
+        "--ollama-host",
+        type=str,
+        default="http://localhost:11434",
+        help="Ollama server URL (default: http://localhost:11434)",
+    )
+    parser.add_argument(
+        "--serve",
+        "-s",
+        action="store_true",
+        help="Start FastAPI server for manual curl testing",
+    )
+    parser.add_argument(
+        "--port",
+        "-p",
+        type=int,
+        default=8000,
+        help="Server port (default: 8000)",
+    )
+    parser.add_argument(
+        "--show-template",
+        "-t",
+        action="store_true",
+        help="Show the chat template for the model",
+    )
+    parser.add_argument(
+        "--repr",
+        "-r",
+        action="store_true",
+        help="Output prompts as Python repr (shows escape sequences)",
+    )
+    parser.add_argument(
+        "--filter",
+        "-f",
+        type=str,
+        help="Filter tests by name (substring match)",
+    )
+
+    args = parser.parse_args()
+
+    if args.serve:
+        start_server(port=args.port)
+    elif args.model:
+        if args.show_template:
+            show_template(args.model)
+        else:
+            run_tests(
+                args.model,
+                as_repr=args.repr,
+                test_filter=args.filter,
+                ollama_model=args.ollama_model,
+                ollama_host=args.ollama_host,
+            )
+    else:
+        parser.print_help()
+        print("\nExample usage:")
+        print("  uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3")
+        print("  uv run cmd/chat_template/chat_template.py --model Qwen/Qwen3-Coder-480B-A35B-Instruct --ollama-model qwen3-coder")
+        print("  uv run cmd/chat_template/chat_template.py --serve")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
--- a/cmd/testolmo/main.go
+++ b/cmd/testolmo/main.go
@@ -1,148 +0,0 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"log"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/model/input"
-	_ "github.com/ollama/ollama/model/models" // Register all models
-	"github.com/ollama/ollama/model/renderers"
-	"github.com/ollama/ollama/sample"
-)
-
-func main() {
-	modelPath := "/Users/parth/.ollama/models/blobs/sha256-a87e10578f328b087f888ac7bd1018555e26028a1130980f20312b4de3a10d70"
-
-	fmt.Println("Loading OLMo model...")
-	m, err := model.New(modelPath, ml.BackendParams{AllocMemory: true})
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	if err := m.Backend().Load(context.Background(), func(f float32) {}); err != nil {
-		log.Fatal(err)
-	}
-
-	fmt.Println("✅ Model loaded successfully!")
-
-	// Initialize the cache
-	cache := m.Config().Cache
-	if cache != nil {
-		// Initialize with reasonable defaults:
-		// - dtype: F16
-		// - maxSequences: 1 (single sequence)
-		// - capacity: 2048 (context length)
-		// - maxBatch: 512
-		cache.Init(m.Backend(), ml.DTypeF16, 1, 2048, 512)
-		fmt.Printf("✅ Cache initialized (type: %T)\n", cache)
-	}
-
-	// Use the olmo3 renderer to format the prompt properly
-	messages := []api.Message{
-		{Role: "user", Content: "wagwan"},
-	}
-	// prompt := "Question: What is machine learning? Answer:"
-	prompt, err := renderers.RenderWithRenderer("olmo3", messages, nil, nil)
-	if err != nil {
-		log.Fatal(err)
-	}
-	// prompt = prompt[:len(prompt)]
-	// prompt := "Question: What is machine learning? Answer:"
-	fmt.Printf("\nRendered prompt:\n%s\n", prompt)
-
-	tp := m.(model.TextProcessor)
-	tokens, err := tp.Encode(prompt, false)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	fmt.Printf("Tokens: %v (count: %d)\n", tokens, len(tokens))
-
-	// Generate 20 tokens
-	maxTokens := 20
-	generated := make([]int32, 0, maxTokens)
-
-	// Create sampler (temperature=0 for greedy sampling)
-	sampler := sample.NewSampler(0, 0, 0, 0, -1, nil)
-
-	for i := 0; i < maxTokens; i++ {
-		// Create a new context for each generation step to avoid memory buildup
-		ctx := m.Backend().NewContext()
-
-		var inputTokens []int32
-		var positions []int32
-
-		if i == 0 {
-			// First iteration: process all prompt tokens
-			inputTokens = tokens
-			positions = make([]int32, len(tokens))
-			for j := range positions {
-				positions[j] = int32(j)
-			}
-		} else {
-			// Subsequent iterations: only process the newly generated token
-			// The last token is at position len(tokens)-1 (its index in the sequence)
-			inputTokens = []int32{tokens[len(tokens)-1]}
-			positions = []int32{int32(len(tokens) - 1)}
-		}
-
-		sequences := make([]int, len(inputTokens))
-		// All tokens belong to sequence 0
-
-		inputsTensor := ctx.Input().FromInts(inputTokens, len(inputTokens))
-		outputs := ctx.Input().FromInts([]int32{int32(len(inputTokens) - 1)}, 1)
-
-		batch := input.Batch{
-			Inputs:    inputsTensor,
-			Positions: positions,
-			Sequences: sequences,
-			Outputs:   outputs,
-		}
-
-		// Forward pass (model.Forward handles cache.StartForward internally)
-		logits, err := model.Forward(ctx, m, batch)
-		if err != nil {
-			ctx.Close()
-			log.Fatal(err)
-		}
-
-		logits = logits.Contiguous(ctx)
-		ctx.Forward(logits).Compute(logits)
-
-		logitValues := logits.Floats()
-
-		// Sample next token
-		nextToken, err := sampler.Sample(logitValues)
-		if err != nil {
-			ctx.Close()
-			log.Fatal(err)
-		}
-
-		// Close context before moving to next iteration
-		ctx.Close()
-
-		generated = append(generated, nextToken)
-		tokens = append(tokens, nextToken)
-
-		// Decode and print
-		decoded, _ := tp.Decode([]int32{nextToken})
-		fmt.Print(decoded)
-
-		// Stop on EOS or <|im_end|>
-		if nextToken == 2 || nextToken == 1 { // Common EOS tokens
-			break
-		}
-		// Check if we generated <|im_end|> (stop token for chat)
-		if decoded == "<|im_end|>" {
-			break
-		}
-	}
-
-	fmt.Println("\n\n✅ Generation completed!")
-	fullText, _ := tp.Decode(generated)
-	fmt.Printf("Generated: %s\n", fullText)
-}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -200,8 +200,6 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
 		conv = &qwen25VLModel{}
 	case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
 		conv = &qwen3VLModel{}
-	case "OLMo2ForCausalLM", "Olmo2ForCausalLM", "OLMo3ForCausalLM", "Olmo3ForCausalLM":
-		conv = &olmoModel{}
 	case "BertModel":
 		conv = &bertModel{}
 	case "CohereForCausalLM":
--- a/convert/convert_olmo.go
+++ b/convert/convert_olmo.go
@@ -1,94 +0,0 @@
-package convert
-
-import (
-	"cmp"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type olmoModel struct {
-	ModelParameters
-
-	HiddenSize            uint32   `json:"hidden_size"`
-	NumHiddenLayers       uint32   `json:"num_hidden_layers"`
-	IntermediateSize      uint32   `json:"intermediate_size"`
-	NumAttentionHeads     uint32   `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32   `json:"num_key_value_heads"`
-	MaxPositionEmbeddings uint32   `json:"max_position_embeddings"`
-	RMSNormEPS            float32  `json:"rms_norm_eps"`
-	RopeTheta             float32  `json:"rope_theta"`
-	ClampKQV              float32  `json:"f_clamp_kqv"`
-	SlidingWindow         uint32   `json:"sliding_window"`
-	LayerTypes            []string `json:"layer_types"`
-}
-
-var _ ModelConverter = (*olmoModel)(nil)
-
-func (p *olmoModel) KV(t *Tokenizer) ggml.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "olmo"
-	kv["olmo.block_count"] = p.NumHiddenLayers
-	kv["olmo.context_length"] = p.MaxPositionEmbeddings
-	kv["olmo.embedding_length"] = p.HiddenSize
-	kv["olmo.feed_forward_length"] = p.IntermediateSize
-	kv["olmo.attention.head_count"] = p.NumAttentionHeads
-	kv["olmo.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
-
-	if p.RopeTheta > 0 {
-		kv["olmo.rope.freq_base"] = p.RopeTheta
-	} else {
-		kv["olmo.rope.freq_base"] = float32(10000.0)
-	}
-
-	if p.RMSNormEPS > 0 {
-		kv["olmo.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	}
-
-	if p.ClampKQV > 0 {
-		kv["olmo.attention.clamp_kqv"] = p.ClampKQV
-	}
-
-	if p.SlidingWindow > 0 {
-		kv["olmo.attention.sliding_window"] = p.SlidingWindow
-	}
-
-	if len(p.LayerTypes) > 0 {
-		kv["olmo.attention.layer_types"] = p.LayerTypes
-	}
-
-	return kv
-}
-
-func (p *olmoModel) Tensors(ts []Tensor) []*ggml.Tensor {
-	out := make([]*ggml.Tensor, 0, len(ts))
-	for _, t := range ts {
-		out = append(out, &ggml.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *olmoModel) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"model.embed_tokens", "token_embd",
-		"model.layers", "blk",
-		"model.norm", "output_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"self_attn.q_norm", "attn_q_norm",
-		"self_attn.k_norm", "attn_k_norm",
-		"post_attention_layernorm", "post_attention_norm",
-		"post_feedforward_layernorm", "post_ffw_norm",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-	}
-}
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		}

 		slog.Info("discovering available GPUs...")
+		detectIncompatibleLibraries()

 		// Warn if any user-overrides are set which could lead to incorrect GPU discovery
 		overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					continue
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
+				} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
+					slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
+					continue
 				} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
 					slog.Info("experimental Vulkan support disabled.  To enable, set OLLAMA_VULKAN=1")
 					continue
@@ -484,3 +488,16 @@ func overrideWarnings() {
 		slog.Warn("if GPUs are not correctly discovered, unset and try again")
 	}
 }
+
+func detectIncompatibleLibraries() {
+	if runtime.GOOS != "windows" {
+		return
+	}
+	basePath, err := exec.LookPath("ggml-base.dll")
+	if err != nil || basePath == "" {
+		return
+	}
+	if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
+		slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
+	}
+}
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -57,8 +57,13 @@ ollama ps
 ```

 <Info>
-  **Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
-  100% GPU 4 minutes from now ```
+
+**Output**:
+
+```
+NAME        ID            SIZE    PROCESSOR   UNTIL
+llama3:70b  bcfb190ca3a7  42 GB   100% GPU    4 minutes from now
+```
 </Info>

 The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation.  You
 - In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`

 **MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
+- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
--- a/docs/modelfile.mdx
+++ b/docs/modelfile.mdx
@@ -149,9 +149,6 @@ PARAMETER <parameter> <parametervalue>

 | Parameter      | Description                                                                                                                                                                                                                                                                                                                                                                     | Value Type | Example Usage        |
 | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | -------------------- |
-| mirostat       | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)                                                                                                                                                                                                                                                                 | int        | mirostat 0           |
-| mirostat_eta   | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)                                                                                                                                                | float      | mirostat_eta 0.1     |
-| mirostat_tau   | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)                                                                                                                                                                                                                                 | float      | mirostat_tau 5.0     |
 | num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                                                                                                                                            | int        | num_ctx 4096         |
 | repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                                                                                                                                                   | int        | repeat_last_n 64     |
 | repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                                                                                                                                             | float      | repeat_penalty 1.1   |
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -252,7 +252,6 @@ func (kv KV) OllamaEngineRequired() bool {
 		"deepseekocr",
 		"deepseek2",
 		"nomic-bert",
-		"olmo2",
 	}, kv.Architecture())
 }

--- a/model/models/models.go
+++ b/model/models/models.go
@@ -13,7 +13,6 @@ import (
 	_ "github.com/ollama/ollama/model/models/mistral3"
 	_ "github.com/ollama/ollama/model/models/mllama"
 	_ "github.com/ollama/ollama/model/models/nomicbert"
-	_ "github.com/ollama/ollama/model/models/olmo"
 	_ "github.com/ollama/ollama/model/models/qwen2"
 	_ "github.com/ollama/ollama/model/models/qwen25vl"
 	_ "github.com/ollama/ollama/model/models/qwen3"
--- a/model/models/olmo/model.go
+++ b/model/models/olmo/model.go
@@ -1,271 +0,0 @@
-package olmo
-
-import (
-	"cmp"
-	"math"
-
-	"github.com/ollama/ollama/fs"
-	"github.com/ollama/ollama/kvcache"
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/ml/nn"
-	"github.com/ollama/ollama/ml/nn/fast"
-	"github.com/ollama/ollama/ml/nn/rope"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/model/input"
-)
-
-type Options struct {
-	hiddenSize, numHeads, numKVHeads int
-	headDim, ropeDim                 int
-	eps, ropeBase, ropeScale         float32
-	clampKQV                         float32
-
-	originalContextLength int
-	attnFactor            float32
-	slidingWindow         int32
-	slidingWindowPattern  []bool // per-layer SWA pattern (true = SWA, false = full attention)
-}
-
-type Model struct {
-	model.Base
-	model.TextProcessor
-
-	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
-	Layers         []Layer       `gguf:"blk"`
-	OutputNorm     *nn.RMSNorm   `gguf:"output_norm"`
-	Output         *nn.Linear    `gguf:"output,alt:token_embd"`
-
-	Options
-}
-
-func New(c fs.Config) (model.Model, error) {
-	vocabulary := model.Vocabulary{
-		Values: c.Strings("tokenizer.ggml.tokens"),
-		Scores: c.Floats("tokenizer.ggml.scores"),
-		Types:  c.Ints("tokenizer.ggml.token_type"),
-		Merges: c.Strings("tokenizer.ggml.merges"),
-		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-		BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
-		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
-		EOS: append(
-			[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
-			c.Ints("tokenizer.ggml.eos_token_ids")...,
-		),
-	}
-
-	if c.String("tokenizer.ggml.model") != "gpt2" {
-		return nil, model.ErrUnsupportedTokenizer
-	}
-
-	var pretokenizers []string
-	if c.String("tokenizer.ggml.pre") != "default" {
-		pretokenizers = []string{
-			"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
-		}
-	}
-	processor := model.NewBytePairEncoding(&vocabulary, pretokenizers...)
-
-	slidingWindow := int32(c.Uint("attention.sliding_window"))
-	slidingWindowPattern := c.Bools("attention.sliding_window_pattern")
-
-	m := Model{
-		TextProcessor: processor,
-		Layers:        make([]Layer, c.Uint("block_count")),
-		Options: Options{
-			hiddenSize:            int(c.Uint("embedding_length")),
-			numHeads:              int(c.Uint("attention.head_count")),
-			numKVHeads:            int(c.Uint("attention.head_count_kv")),
-			headDim:               int(c.Uint("attention.key_length")),
-			ropeDim:               int(c.Uint("rope.dimension_count")),
-			eps:                   c.Float("attention.layer_norm_rms_epsilon"),
-			ropeBase:              c.Float("rope.freq_base", 1e4),
-			ropeScale:             c.Float("rope.scaling.factor", 1),
-			clampKQV:              c.Float("attention.clamp_kqv", 0),
-			originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
-			attnFactor:            c.Float("rope.scaling.attn_factor", 1),
-			slidingWindow:         slidingWindow,
-			slidingWindowPattern:  slidingWindowPattern,
-		},
-	}
-
-	// OLMo3 uses interleaved sliding window attention (every 4th layer is full attention)
-	m.Cache = kvcache.NewWrapperCache(
-		kvcache.NewSWACache(slidingWindow, m.Shift),
-		kvcache.NewCausalCache(m.Shift),
-	)
-
-	return &m, nil
-}
-
-type SelfAttention struct {
-	Query       *nn.Linear  `gguf:"attn_q"`
-	Key         *nn.Linear  `gguf:"attn_k"`
-	Value       *nn.Linear  `gguf:"attn_v"`
-	Output      *nn.Linear  `gguf:"attn_output"`
-	QNorm       *nn.RMSNorm `gguf:"attn_q_norm"`
-	KNorm       *nn.RMSNorm `gguf:"attn_k_norm"`
-	RopeFactors ml.Tensor   `gguf:"rope_freqs.weight"`
-}
-
-func (o *Options) ropeOptions(factors ml.Tensor, isSWA bool) []func(*rope.Options) {
-	opts := []func(*rope.Options){
-		rope.WithFactors(factors),
-	}
-
-	if o.originalContextLength > 0 {
-		if isSWA {
-			// For SWA layers, use regular rope with no YaRN scaling
-			// ext_factor=0.0, attn_factor=1.0 per llama.cpp
-			opts = append(opts,
-				rope.WithOriginalContextLength(o.originalContextLength),
-				rope.WithExtrapolationFactor(0.),
-				rope.WithAttentionFactor(1.),
-			)
-		} else {
-			// For full attention layers, use YaRN scaling
-			opts = append(opts,
-				rope.WithOriginalContextLength(o.originalContextLength),
-				rope.WithExtrapolationFactor(1.),
-				rope.WithAttentionFactor(o.attnFactor),
-			)
-		}
-	}
-
-	return opts
-}
-
-func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positions ml.Tensor, cache kvcache.Cache, opts *Options, isSWA bool) ml.Tensor {
-	batchSize := hiddenState.Dim(1)
-	headDim := cmp.Or(opts.headDim, opts.hiddenSize/opts.numHeads)
-	ropeDim := cmp.Or(opts.ropeDim, headDim)
-
-	query := sa.Query.Forward(ctx, hiddenState)
-	if sa.QNorm != nil {
-		query = sa.QNorm.Forward(ctx, query, opts.eps)
-	}
-	query = query.Reshape(ctx, headDim, opts.numHeads, batchSize)
-
-	key := sa.Key.Forward(ctx, hiddenState)
-	if sa.KNorm != nil {
-		key = sa.KNorm.Forward(ctx, key, opts.eps)
-	}
-	key = key.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
-
-	value := sa.Value.Forward(ctx, hiddenState)
-	value = value.Reshape(ctx, headDim, opts.numKVHeads, batchSize)
-
-	freqScale := float32(1.0)
-	if !isSWA {
-		freqScale = 1. / opts.ropeScale
-	}
-
-	ropeOpts := opts.ropeOptions(sa.RopeFactors, isSWA)
-	query = fast.RoPE(ctx, query, positions, ropeDim, opts.ropeBase, freqScale, ropeOpts...)
-	key = fast.RoPE(ctx, key, positions, ropeDim, opts.ropeBase, freqScale, ropeOpts...)
-
-	attention := nn.Attention(ctx, query, key, value, 1.0/math.Sqrt(float64(headDim)), cache)
-	attention = attention.Reshape(ctx, headDim*opts.numHeads, batchSize)
-	return sa.Output.Forward(ctx, attention)
-}
-
-func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
-	ropeDim := cmp.Or(m.ropeDim, m.hiddenSize/m.numHeads)
-	isSWA := m.isSWALayer(layer)
-
-	freqScale := float32(1.0)
-	if !isSWA {
-		freqScale = 1. / m.ropeScale
-	}
-
-	ropeOpts := m.Options.ropeOptions(m.Layers[layer].SelfAttention.RopeFactors, isSWA)
-	return fast.RoPE(ctx, key, shift, ropeDim, m.ropeBase, freqScale, ropeOpts...), nil
-}
-
-type MLP struct {
-	Up   *nn.Linear `gguf:"ffn_up"`
-	Down *nn.Linear `gguf:"ffn_down"`
-	Gate *nn.Linear `gguf:"ffn_gate"`
-}
-
-func (mlp *MLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
-	hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
-	return mlp.Down.Forward(ctx, hiddenState)
-}
-
-type Layer struct {
-	SelfAttention     *SelfAttention
-	PostAttentionNorm *nn.RMSNorm `gguf:"post_attention_norm"`
-	MLP               *MLP
-	PostFFWNorm       *nn.RMSNorm `gguf:"post_ffw_norm"`
-}
-
-func (l *Layer) Forward(ctx ml.Context, hiddenState, positions, outputs ml.Tensor, cache kvcache.Cache, opts *Options, isSWA bool) ml.Tensor {
-	residual := hiddenState
-
-	hiddenState = l.SelfAttention.Forward(ctx, hiddenState, positions, cache, opts, isSWA)
-
-	if outputs != nil {
-		hiddenState = hiddenState.Rows(ctx, outputs)
-		residual = residual.Rows(ctx, outputs)
-	}
-
-	if l.PostAttentionNorm != nil {
-		hiddenState = l.PostAttentionNorm.Forward(ctx, hiddenState, opts.eps)
-	}
-
-	ffnInput := hiddenState.Add(ctx, residual)
-
-	hiddenState = l.MLP.Forward(ctx, ffnInput, opts)
-
-	if l.PostFFWNorm != nil {
-		hiddenState = l.PostFFWNorm.Forward(ctx, hiddenState, opts.eps)
-	}
-
-	return hiddenState.Add(ctx, ffnInput)
-}
-
-// isSWALayer returns true if the layer uses sliding window attention.
-// Uses the sliding_window_pattern from the model config if available,
-// otherwise falls back to the default OLMo3 pattern (every 4th layer is full attention).
-func (m *Model) isSWALayer(layerIdx int) bool {
-	if len(m.slidingWindowPattern) > layerIdx {
-		return m.slidingWindowPattern[layerIdx]
-	}
-	// Fallback: OLMo3 pattern where every 4th layer (indices 3, 7, 11, ...) uses full attention
-	return (layerIdx+1)%4 != 0
-}
-
-func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
-	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
-
-	hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
-
-	for i, layer := range m.Layers {
-		m.Cache.SetLayer(i)
-
-		isSWA := m.isSWALayer(i)
-
-		// Set cache type for interleaved SWA (OLMo3)
-		if wc, ok := m.Cache.(*kvcache.WrapperCache); ok {
-			if isSWA {
-				wc.SetLayerType(0) // SWA cache
-			} else {
-				wc.SetLayerType(1) // Causal cache
-			}
-		}
-
-		var outputs ml.Tensor
-		if i == len(m.Layers)-1 {
-			outputs = batch.Outputs
-		}
-
-		hiddenState = layer.Forward(ctx, hiddenState, positions, outputs, m.Cache, &m.Options, isSWA)
-	}
-
-	hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
-	return m.Output.Forward(ctx, hiddenState), nil
-}
-
-func init() {
-	model.Register("olmo2", New)
-}
--- a/model/models/olmo/testolmo.go
+++ b/model/models/olmo/testolmo.go
@@ -1,132 +0,0 @@
-package olmo
-
-import (
-	"context"
-	"fmt"
-	"log"
-
-	"github.com/ollama/ollama/ml"
-	"github.com/ollama/ollama/model"
-	"github.com/ollama/ollama/model/input"
-	"github.com/ollama/ollama/sample"
-)
-
-func main() {
-	modelPath := "/Users/nicole/models/Olmo-3-7B-Think/olmo-3-7b-think-q8_0.gguf"
-
-	fmt.Println("Loading OLMo model...")
-	m, err := model.New(modelPath, ml.BackendParams{AllocMemory: true})
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	if err := m.Backend().Load(context.Background(), func(f float32) {}); err != nil {
-		log.Fatal(err)
-	}
-
-	fmt.Println("✅ Model loaded successfully!")
-
-	// Initialize the cache
-	cache := m.Config().Cache
-	if cache != nil {
-		// Initialize with reasonable defaults:
-		// - dtype: F16
-		// - maxSequences: 1 (single sequence)
-		// - capacity: 2048 (context length)
-		// - maxBatch: 512
-		cache.Init(m.Backend(), ml.DTypeF16, 1, 2048, 512)
-		fmt.Printf("✅ Cache initialized (type: %T)\n", cache)
-	}
-
-	// Test generation
-	prompt := "Question: What is machine learning? Answer:"
-	fmt.Printf("\nPrompt: %s\n", prompt)
-
-	tp := m.(model.TextProcessor)
-	tokens, err := tp.Encode(prompt, true)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	fmt.Printf("Tokens: %v (count: %d)\n", tokens, len(tokens))
-
-	// Generate 20 tokens
-	maxTokens := 20
-	generated := make([]int32, 0, maxTokens)
-
-	// Create sampler (temperature=0 for greedy sampling)
-	sampler := sample.NewSampler(0, 0, 0, 0, -1, nil)
-
-	for i := 0; i < maxTokens; i++ {
-		// Create a new context for each generation step to avoid memory buildup
-		ctx := m.Backend().NewContext()
-
-		var inputTokens []int32
-		var positions []int32
-
-		if i == 0 {
-			// First iteration: process all prompt tokens
-			inputTokens = tokens
-			positions = make([]int32, len(tokens))
-			for j := range positions {
-				positions[j] = int32(j)
-			}
-		} else {
-			// Subsequent iterations: only process the newly generated token
-			// The last token is at position len(tokens)-1 (its index in the sequence)
-			inputTokens = []int32{tokens[len(tokens)-1]}
-			positions = []int32{int32(len(tokens) - 1)}
-		}
-
-		sequences := make([]int, len(inputTokens))
-		// All tokens belong to sequence 0
-
-		inputsTensor := ctx.Input().FromInts(inputTokens, len(inputTokens))
-		outputs := ctx.Input().FromInts([]int32{int32(len(inputTokens) - 1)}, 1)
-
-		batch := input.Batch{
-			Inputs:    inputsTensor,
-			Positions: positions,
-			Sequences: sequences,
-			Outputs:   outputs,
-		}
-
-		// Forward pass (model.Forward handles cache.StartForward internally)
-		logits, err := model.Forward(ctx, m, batch)
-		if err != nil {
-			ctx.Close()
-			log.Fatal(err)
-		}
-
-		logits = logits.Contiguous(ctx)
-		ctx.Forward(logits).Compute(logits)
-
-		logitValues := logits.Floats()
-
-		// Sample next token
-		nextToken, err := sampler.Sample(logitValues)
-		if err != nil {
-			ctx.Close()
-			log.Fatal(err)
-		}
-
-		// Close context before moving to next iteration
-		ctx.Close()
-
-		generated = append(generated, nextToken)
-		tokens = append(tokens, nextToken)
-
-		// Decode and print
-		decoded, _ := tp.Decode([]int32{nextToken})
-		fmt.Print(decoded)
-
-		// Stop on EOS
-		if nextToken == 2 || nextToken == 1 { // Common EOS tokens
-			break
-		}
-	}
-
-	fmt.Println("\n\n✅ Generation completed!")
-	fullText, _ := tp.Decode(generated)
-	fmt.Printf("Generated: %s\n", fullText)
-}
--- a/model/parsers/olmo3.go
+++ b/model/parsers/olmo3.go
@@ -1,469 +0,0 @@
-package parsers
-
-import (
-	"context"
-	"fmt"
-	"log/slog"
-	"regexp"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/logutil"
-)
-
-type olmo3ParserState int
-
-const (
-	olmo3StateContent olmo3ParserState = iota
-	olmo3StateToolCalls
-	olmo3StateToolCallsDone
-)
-
-const (
-	olmo3FuncCallsOpenTag  = "<function_calls>"
-	olmo3FuncCallsCloseTag = "</function_calls>"
-)
-
-type Olmo3Parser struct {
-	state  olmo3ParserState
-	buffer strings.Builder
-}
-
-func (p *Olmo3Parser) HasToolSupport() bool {
-	return true
-}
-
-func (p *Olmo3Parser) HasThinkingSupport() bool {
-	return false
-}
-
-func (p *Olmo3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.state = olmo3StateContent
-	return tools
-}
-
-type olmo3ParserEvent interface {
-	isOlmo3ParserEvent()
-}
-
-type olmo3ParserEventContent struct {
-	content string
-}
-
-type olmo3ParserEventToolCalls struct {
-	calls []api.ToolCall
-}
-
-func (olmo3ParserEventContent) isOlmo3ParserEvent()   {}
-func (olmo3ParserEventToolCalls) isOlmo3ParserEvent() {}
-
-func (p *Olmo3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	p.buffer.WriteString(s)
-
-	if done {
-		// Drain any remaining content
-		bufStr := p.buffer.String()
-		p.buffer.Reset()
-		if p.state == olmo3StateContent && len(bufStr) > 0 {
-			return bufStr, "", nil, nil
-		}
-		return "", "", nil, nil
-	}
-
-	events := p.parseEvents()
-
-	var contentSb strings.Builder
-	var allCalls []api.ToolCall
-	for _, event := range events {
-		switch event := event.(type) {
-		case olmo3ParserEventContent:
-			contentSb.WriteString(event.content)
-		case olmo3ParserEventToolCalls:
-			allCalls = append(allCalls, event.calls...)
-		}
-	}
-
-	return contentSb.String(), "", allCalls, nil
-}
-
-func (p *Olmo3Parser) parseEvents() []olmo3ParserEvent {
-	var all []olmo3ParserEvent
-
-	keepLooping := true
-	for keepLooping {
-		var events []olmo3ParserEvent
-		events, keepLooping = p.eat()
-		if len(events) > 0 {
-			all = append(all, events...)
-		}
-	}
-
-	if len(all) > 0 {
-		slog.Log(context.TODO(), logutil.LevelTrace, "olmo3 events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
-	}
-
-	return all
-}
-
-func (p *Olmo3Parser) eat() ([]olmo3ParserEvent, bool) {
-	var events []olmo3ParserEvent
-	bufStr := p.buffer.String()
-	if bufStr == "" {
-		return events, false
-	}
-
-	switch p.state {
-	case olmo3StateContent:
-		if strings.Contains(bufStr, olmo3FuncCallsOpenTag) {
-			// Found <function_calls> tag
-			split := strings.SplitN(bufStr, olmo3FuncCallsOpenTag, 2)
-			content := split[0]
-			remaining := split[1]
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = olmo3StateToolCalls
-
-			if len(content) > 0 {
-				events = append(events, olmo3ParserEventContent{content: content})
-			}
-			return events, true
-		} else if overlapLen := overlap(bufStr, olmo3FuncCallsOpenTag); overlapLen > 0 {
-			// Partial <function_calls> tag - withhold ambiguous content
-			unambiguous := bufStr[:len(bufStr)-overlapLen]
-			ambiguous := bufStr[len(bufStr)-overlapLen:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			if len(unambiguous) > 0 {
-				events = append(events, olmo3ParserEventContent{content: unambiguous})
-			}
-			return events, false
-		} else {
-			// Regular content - emit all
-			p.buffer.Reset()
-			if len(bufStr) > 0 {
-				events = append(events, olmo3ParserEventContent{content: bufStr})
-			}
-			return events, false
-		}
-
-	case olmo3StateToolCalls:
-		if strings.Contains(bufStr, olmo3FuncCallsCloseTag) {
-			// Found </function_calls> tag
-			split := strings.SplitN(bufStr, olmo3FuncCallsCloseTag, 2)
-			toolCallsStr := split[0]
-			remaining := split[1]
-
-			p.buffer.Reset()
-			p.buffer.WriteString(remaining)
-			p.state = olmo3StateToolCallsDone
-
-			// Parse the function calls
-			calls, err := parseOlmo3FunctionCalls(toolCallsStr)
-			if err != nil {
-				slog.Log(context.TODO(), logutil.LevelTrace, "failed to parse olmo3 function calls", "error", err, "content", toolCallsStr)
-			} else if len(calls) > 0 {
-				events = append(events, olmo3ParserEventToolCalls{calls: calls})
-			}
-			return events, true
-		} else if overlapLen := overlap(bufStr, olmo3FuncCallsCloseTag); overlapLen > 0 {
-			// Partial </function_calls> tag - wait for more
-			return events, false
-		}
-		// Still collecting tool calls, wait for close tag
-		return events, false
-
-	case olmo3StateToolCallsDone:
-		// After tool calls, emit remaining content
-		p.buffer.Reset()
-		p.state = olmo3StateContent
-		if len(bufStr) > 0 {
-			events = append(events, olmo3ParserEventContent{content: bufStr})
-		}
-		return events, false
-	}
-
-	return events, false
-}
-
-// parseOlmo3FunctionCalls parses function calls in Python-esque format:
-// func_name(arg1="value1", arg2=123)
-// Multiple calls are separated by newlines
-func parseOlmo3FunctionCalls(s string) ([]api.ToolCall, error) {
-	var calls []api.ToolCall
-	s = strings.TrimSpace(s)
-	if s == "" {
-		return calls, nil
-	}
-
-	// Split by newlines for multiple function calls
-	lines := strings.Split(s, "\n")
-	for _, line := range lines {
-		line = strings.TrimSpace(line)
-		if line == "" {
-			continue
-		}
-
-		call, err := parseOlmo3SingleFunctionCall(line)
-		if err != nil {
-			return nil, fmt.Errorf("failed to parse function call %q: %w", line, err)
-		}
-		calls = append(calls, call)
-	}
-
-	return calls, nil
-}
-
-// Regex to match function call: func_name(args)
-var funcCallRegex = regexp.MustCompile(`^(\w+)\((.*)\)$`)
-
-// Regex to match a single argument: key=value
-// Value can be: "string", 'string', number, true, false, null, or nested structures
-var argRegex = regexp.MustCompile(`^(\w+)=(.+)$`)
-
-func parseOlmo3SingleFunctionCall(s string) (api.ToolCall, error) {
-	matches := funcCallRegex.FindStringSubmatch(s)
-	if matches == nil {
-		return api.ToolCall{}, fmt.Errorf("invalid function call format")
-	}
-
-	funcName := matches[1]
-	argsStr := matches[2]
-
-	args, err := parseOlmo3Arguments(argsStr)
-	if err != nil {
-		return api.ToolCall{}, fmt.Errorf("failed to parse arguments: %w", err)
-	}
-
-	return api.ToolCall{
-		Function: api.ToolCallFunction{
-			Name:      funcName,
-			Arguments: args,
-		},
-	}, nil
-}
-
-// parseOlmo3Arguments parses comma-separated key=value pairs
-// Handles nested parentheses, brackets, braces, and quoted strings
-func parseOlmo3Arguments(s string) (map[string]any, error) {
-	args := make(map[string]any)
-	s = strings.TrimSpace(s)
-	if s == "" {
-		return args, nil
-	}
-
-	// Split by commas, but respect nested structures and quotes
-	parts := splitArguments(s)
-
-	for _, part := range parts {
-		part = strings.TrimSpace(part)
-		if part == "" {
-			continue
-		}
-
-		// Find the first = sign
-		eqIdx := strings.Index(part, "=")
-		if eqIdx == -1 {
-			return nil, fmt.Errorf("invalid argument format: %s", part)
-		}
-
-		key := strings.TrimSpace(part[:eqIdx])
-		valueStr := strings.TrimSpace(part[eqIdx+1:])
-
-		value, err := parseOlmo3Value(valueStr)
-		if err != nil {
-			return nil, fmt.Errorf("failed to parse value for %s: %w", key, err)
-		}
-
-		args[key] = value
-	}
-
-	return args, nil
-}
-
-// splitArguments splits arguments by commas, respecting quotes and nested structures
-func splitArguments(s string) []string {
-	var parts []string
-	var current strings.Builder
-	depth := 0
-	inString := false
-	stringChar := byte(0)
-	escaped := false
-
-	for i := 0; i < len(s); i++ {
-		c := s[i]
-
-		if escaped {
-			current.WriteByte(c)
-			escaped = false
-			continue
-		}
-
-		if c == '\\' && inString {
-			current.WriteByte(c)
-			escaped = true
-			continue
-		}
-
-		if (c == '"' || c == '\'') && !inString {
-			inString = true
-			stringChar = c
-			current.WriteByte(c)
-			continue
-		}
-
-		if c == stringChar && inString {
-			inString = false
-			stringChar = 0
-			current.WriteByte(c)
-			continue
-		}
-
-		if !inString {
-			switch c {
-			case '(', '[', '{':
-				depth++
-				current.WriteByte(c)
-			case ')', ']', '}':
-				depth--
-				current.WriteByte(c)
-			case ',':
-				if depth == 0 {
-					parts = append(parts, current.String())
-					current.Reset()
-					continue
-				}
-				current.WriteByte(c)
-			default:
-				current.WriteByte(c)
-			}
-		} else {
-			current.WriteByte(c)
-		}
-	}
-
-	if current.Len() > 0 {
-		parts = append(parts, current.String())
-	}
-
-	return parts
-}
-
-// parseOlmo3Value parses a value which can be a string, number, boolean, null, array, or object
-func parseOlmo3Value(s string) (any, error) {
-	s = strings.TrimSpace(s)
-
-	// Check for quoted string
-	if (strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`)) ||
-		(strings.HasPrefix(s, `'`) && strings.HasSuffix(s, `'`)) {
-		// Remove quotes and unescape
-		inner := s[1 : len(s)-1]
-		return unescapeString(inner), nil
-	}
-
-	// Check for boolean
-	if s == "true" || s == "True" {
-		return true, nil
-	}
-	if s == "false" || s == "False" {
-		return false, nil
-	}
-
-	// Check for null/None
-	if s == "null" || s == "None" || s == "nil" {
-		return nil, nil
-	}
-
-	// Check for number
-	if i, err := strconv.ParseInt(s, 10, 64); err == nil {
-		return i, nil
-	}
-	if f, err := strconv.ParseFloat(s, 64); err == nil {
-		return f, nil
-	}
-
-	// Check for array [...]
-	if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") {
-		return parseOlmo3Array(s[1 : len(s)-1])
-	}
-
-	// Check for object {...}
-	if strings.HasPrefix(s, "{") && strings.HasSuffix(s, "}") {
-		return parseOlmo3Object(s[1 : len(s)-1])
-	}
-
-	// Default to string without quotes
-	return s, nil
-}
-
-func parseOlmo3Array(s string) ([]any, error) {
-	s = strings.TrimSpace(s)
-	if s == "" {
-		return []any{}, nil
-	}
-
-	parts := splitArguments(s)
-	var arr []any
-	for _, part := range parts {
-		val, err := parseOlmo3Value(part)
-		if err != nil {
-			return nil, err
-		}
-		arr = append(arr, val)
-	}
-	return arr, nil
-}
-
-func parseOlmo3Object(s string) (map[string]any, error) {
-	s = strings.TrimSpace(s)
-	if s == "" {
-		return map[string]any{}, nil
-	}
-
-	// Objects use key: value or "key": value format
-	obj := make(map[string]any)
-	parts := splitArguments(s)
-	for _, part := range parts {
-		part = strings.TrimSpace(part)
-		if part == "" {
-			continue
-		}
-
-		// Find colon separator
-		colonIdx := strings.Index(part, ":")
-		if colonIdx == -1 {
-			return nil, fmt.Errorf("invalid object entry: %s", part)
-		}
-
-		keyStr := strings.TrimSpace(part[:colonIdx])
-		valueStr := strings.TrimSpace(part[colonIdx+1:])
-
-		// Remove quotes from key if present
-		if (strings.HasPrefix(keyStr, `"`) && strings.HasSuffix(keyStr, `"`)) ||
-			(strings.HasPrefix(keyStr, `'`) && strings.HasSuffix(keyStr, `'`)) {
-			keyStr = keyStr[1 : len(keyStr)-1]
-		}
-
-		val, err := parseOlmo3Value(valueStr)
-		if err != nil {
-			return nil, fmt.Errorf("failed to parse value for key %s: %w", keyStr, err)
-		}
-
-		obj[keyStr] = val
-	}
-
-	return obj, nil
-}
-
-func unescapeString(s string) string {
-	// Handle common escape sequences
-	s = strings.ReplaceAll(s, `\\`, "\x00") // Placeholder for backslash
-	s = strings.ReplaceAll(s, `\"`, `"`)
-	s = strings.ReplaceAll(s, `\'`, `'`)
-	s = strings.ReplaceAll(s, `\n`, "\n")
-	s = strings.ReplaceAll(s, `\t`, "\t")
-	s = strings.ReplaceAll(s, `\r`, "\r")
-	s = strings.ReplaceAll(s, "\x00", `\`) // Restore backslash
-	return s
-}
--- a/model/parsers/olmo3_test.go
+++ b/model/parsers/olmo3_test.go
@@ -1,483 +0,0 @@
-package parsers
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestOlmo3Parser(t *testing.T) {
-	tests := []struct {
-		name             string
-		input            string
-		expectedContent  string
-		expectedThinking string
-		expectedCalls    []api.ToolCall
-	}{
-		{
-			name:            "simple content",
-			input:           "Hello, how can I help you?",
-			expectedContent: "Hello, how can I help you?",
-		},
-		{
-			name:  "simple tool call",
-			input: `<function_calls>get_weather(location="San Francisco")</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "San Francisco"},
-					},
-				},
-			},
-		},
-		{
-			name:            "content then tool call",
-			input:           `Let me check the weather.<function_calls>get_weather(location="NYC")</function_calls>`,
-			expectedContent: "Let me check the weather.",
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "NYC"},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with multiple arguments",
-			input: `<function_calls>book_flight(from="SFO", to="NYC", date="2024-01-15")</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "book_flight",
-						Arguments: map[string]any{
-							"from": "SFO",
-							"to":   "NYC",
-							"date": "2024-01-15",
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "multiple tool calls",
-			input: `<function_calls>get_weather(location="San Francisco")
-get_weather(location="New York")</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "San Francisco"},
-					},
-				},
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "New York"},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with numeric argument",
-			input: `<function_calls>set_temperature(value=72)</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "set_temperature",
-						Arguments: map[string]any{"value": int64(72)},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with float argument",
-			input: `<function_calls>set_price(amount=19.99)</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "set_price",
-						Arguments: map[string]any{"amount": 19.99},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with boolean argument",
-			input: `<function_calls>toggle_setting(enabled=true)</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "toggle_setting",
-						Arguments: map[string]any{"enabled": true},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with null argument",
-			input: `<function_calls>clear_value(field=null)</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "clear_value",
-						Arguments: map[string]any{"field": nil},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with array argument",
-			input: `<function_calls>process_items(items=["apple", "banana", "cherry"])</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "process_items",
-						Arguments: map[string]any{"items": []any{"apple", "banana", "cherry"}},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with dict argument",
-			input: `<function_calls>update_config(settings={"theme": "dark", "fontSize": 14})</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "update_config",
-						Arguments: map[string]any{
-							"settings": map[string]any{
-								"theme":    "dark",
-								"fontSize": int64(14),
-							},
-						},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with nested dict",
-			input: `<function_calls>create_request(data={"user": {"name": "John", "age": 30}, "active": true})</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "create_request",
-						Arguments: map[string]any{
-							"data": map[string]any{
-								"user": map[string]any{
-									"name": "John",
-									"age":  int64(30),
-								},
-								"active": true,
-							},
-						},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with no arguments",
-			input: `<function_calls>get_current_time()</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_current_time",
-						Arguments: map[string]any{},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with single quotes",
-			input: `<function_calls>search(query='hello world')</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "search",
-						Arguments: map[string]any{"query": "hello world"},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with escaped quotes",
-			input: `<function_calls>search(query="say \"hello\"")</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "search",
-						Arguments: map[string]any{"query": `say "hello"`},
-					},
-				},
-			},
-		},
-		{
-			name:  "tool call with mixed argument types",
-			input: `<function_calls>create_user(name="John", age=30, active=true)</function_calls>`,
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "create_user",
-						Arguments: map[string]any{
-							"name":   "John",
-							"age":    int64(30),
-							"active": true,
-						},
-					},
-				},
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			p := &Olmo3Parser{}
-			p.Init(nil, nil, nil)
-
-			content, thinking, calls, err := p.Add(tt.input, false)
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-
-			// Drain remaining content
-			finalContent, finalThinking, finalCalls, err := p.Add("", true)
-			if err != nil {
-				t.Fatalf("unexpected error on done: %v", err)
-			}
-			content += finalContent
-			thinking += finalThinking
-			calls = append(calls, finalCalls...)
-
-			if diff := cmp.Diff(content, tt.expectedContent); diff != "" {
-				t.Errorf("content mismatch (-got +want):\n%s", diff)
-			}
-			if diff := cmp.Diff(thinking, tt.expectedThinking); diff != "" {
-				t.Errorf("thinking mismatch (-got +want):\n%s", diff)
-			}
-			if diff := cmp.Diff(calls, tt.expectedCalls); diff != "" {
-				t.Errorf("calls mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestOlmo3Parser_Streaming(t *testing.T) {
-	tests := []struct {
-		name            string
-		chunks          []string
-		expectedContent string
-		expectedCalls   []api.ToolCall
-	}{
-		{
-			name:            "streaming content",
-			chunks:          []string{"Hello, ", "how ", "can I help?"},
-			expectedContent: "Hello, how can I help?",
-		},
-		{
-			name:   "streaming tool call",
-			chunks: []string{"<function_", "calls>get_weather", "(location=\"SF\")", "</function_calls>"},
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "SF"},
-					},
-				},
-			},
-		},
-		{
-			name:            "streaming content then tool call",
-			chunks:          []string{"Let me check.", "<function_calls>", "get_weather(location=\"NYC\")", "</function_calls>"},
-			expectedContent: "Let me check.",
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "NYC"},
-					},
-				},
-			},
-		},
-		{
-			name:   "tool call tag split across chunks",
-			chunks: []string{"<func", "tion_calls>test()</function_calls>"},
-			expectedCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "test",
-						Arguments: map[string]any{},
-					},
-				},
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			p := &Olmo3Parser{}
-			p.Init(nil, nil, nil)
-
-			var allContent string
-			var allCalls []api.ToolCall
-
-			for _, chunk := range tt.chunks {
-				content, _, calls, err := p.Add(chunk, false)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				allContent += content
-				allCalls = append(allCalls, calls...)
-			}
-
-			// Drain
-			content, _, calls, err := p.Add("", true)
-			if err != nil {
-				t.Fatalf("unexpected error on done: %v", err)
-			}
-			allContent += content
-			allCalls = append(allCalls, calls...)
-
-			if diff := cmp.Diff(allContent, tt.expectedContent); diff != "" {
-				t.Errorf("content mismatch (-got +want):\n%s", diff)
-			}
-			if diff := cmp.Diff(allCalls, tt.expectedCalls); diff != "" {
-				t.Errorf("calls mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestOlmo3Parser_HasToolSupport(t *testing.T) {
-	p := &Olmo3Parser{}
-	if !p.HasToolSupport() {
-		t.Error("expected HasToolSupport to return true")
-	}
-}
-
-func TestOlmo3Parser_HasThinkingSupport(t *testing.T) {
-	p := &Olmo3Parser{}
-	if p.HasThinkingSupport() {
-		t.Error("expected HasThinkingSupport to return false")
-	}
-}
-
-func TestParseOlmo3FunctionCalls(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    string
-		expected []api.ToolCall
-		wantErr  bool
-	}{
-		{
-			name:  "simple call",
-			input: `get_weather(location="SF")`,
-			expected: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "SF"},
-					},
-				},
-			},
-		},
-		{
-			name:  "multiple args",
-			input: `send_email(to="user@example.com", subject="Hello", body="Test message")`,
-			expected: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "send_email",
-						Arguments: map[string]any{
-							"to":      "user@example.com",
-							"subject": "Hello",
-							"body":    "Test message",
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "multiple calls with newlines",
-			input: `get_weather(location="SF")
-get_time(timezone="PST")`,
-			expected: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_weather",
-						Arguments: map[string]any{"location": "SF"},
-					},
-				},
-				{
-					Function: api.ToolCallFunction{
-						Name:      "get_time",
-						Arguments: map[string]any{"timezone": "PST"},
-					},
-				},
-			},
-		},
-		{
-			name:     "empty input",
-			input:    "",
-			expected: nil,
-		},
-		{
-			name:     "whitespace only",
-			input:    "   \n   ",
-			expected: nil,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			calls, err := parseOlmo3FunctionCalls(tt.input)
-			if (err != nil) != tt.wantErr {
-				t.Errorf("parseOlmo3FunctionCalls() error = %v, wantErr %v", err, tt.wantErr)
-				return
-			}
-			if diff := cmp.Diff(calls, tt.expected); diff != "" {
-				t.Errorf("calls mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
-
-func TestParseOlmo3Value(t *testing.T) {
-	tests := []struct {
-		name     string
-		input    string
-		expected any
-	}{
-		{"string double quotes", `"hello"`, "hello"},
-		{"string single quotes", `'hello'`, "hello"},
-		{"integer", "42", int64(42)},
-		{"negative integer", "-10", int64(-10)},
-		{"float", "3.14", 3.14},
-		{"boolean true", "true", true},
-		{"boolean True", "True", true},
-		{"boolean false", "false", false},
-		{"null", "null", nil},
-		{"None", "None", nil},
-		{"empty array", "[]", []any{}},
-		{"array with strings", `["a", "b"]`, []any{"a", "b"}},
-		{"array with numbers", "[1, 2, 3]", []any{int64(1), int64(2), int64(3)}},
-		{"empty object", "{}", map[string]any{}},
-		{"simple object", `{"name": "John"}`, map[string]any{"name": "John"}},
-		{"object with number", `{"age": 30}`, map[string]any{"age": int64(30)}},
-		{"object with multiple keys", `{"a": 1, "b": 2}`, map[string]any{"a": int64(1), "b": int64(2)}},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := parseOlmo3Value(tt.input)
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-			if diff := cmp.Diff(result, tt.expected); diff != "" {
-				t.Errorf("value mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -54,8 +54,6 @@ func ParserForName(name string) Parser {
 		return harmony.NewHarmonyMessageHandler()
 	case "cogito":
 		return &CogitoParser{}
-	case "olmo3":
-		return &Olmo3Parser{}
 	default:
 		return nil
 	}
--- a/model/renderers/olmo3.go
+++ b/model/renderers/olmo3.go
@@ -1,147 +0,0 @@
-package renderers
-
-import (
-	"encoding/json"
-	"fmt"
-	"sort"
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-const (
-	olmo3DefaultSystemMessage = "You are a helpful function-calling AI assistant. "
-	olmo3NoFunctionsMessage   = "You do not currently have access to any functions. "
-	olmo3WithFunctionsMessage = "You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions."
-)
-
-type Olmo3Renderer struct{}
-
-func (r *Olmo3Renderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
-	var sb strings.Builder
-
-	var systemMessage *api.Message
-	filteredMessages := make([]api.Message, 0, len(messages))
-	for i, message := range messages {
-		if message.Role == "system" {
-			if systemMessage == nil {
-				systemMessage = &messages[i]
-			}
-			continue
-		}
-		filteredMessages = append(filteredMessages, message)
-	}
-
-	// Render system message
-	if systemMessage != nil {
-		// Custom system message - single newline after "system"
-		sb.WriteString("<|im_start|>system\n")
-		sb.WriteString(systemMessage.Content)
-
-		if len(tools) > 0 {
-			functionsJSON, err := marshalWithSpaces(tools)
-			if err != nil {
-				return "", err
-			}
-			sb.WriteString("<functions>")
-			sb.WriteString(string(functionsJSON))
-			sb.WriteString("</functions>")
-		}
-		sb.WriteString("<|im_end|>\n")
-	} else {
-		// Default system message - single newline after "system"
-		sb.WriteString("<|im_start|>system\n")
-		sb.WriteString(olmo3DefaultSystemMessage)
-
-		if len(tools) > 0 {
-			functionsJSON, err := marshalWithSpaces(tools)
-			if err != nil {
-				return "", err
-			}
-			sb.WriteString(olmo3WithFunctionsMessage)
-			sb.WriteString("<functions>")
-			sb.WriteString(string(functionsJSON))
-			sb.WriteString("</functions>")
-		} else {
-			sb.WriteString(olmo3NoFunctionsMessage)
-			sb.WriteString("<functions></functions>")
-		}
-		sb.WriteString("<|im_end|>\n")
-	}
-
-	for i, message := range filteredMessages {
-		lastMessage := i == len(filteredMessages)-1
-
-		switch message.Role {
-		case "user":
-			sb.WriteString("<|im_start|>user\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("<|im_end|>\n")
-
-		case "assistant":
-			sb.WriteString("<|im_start|>assistant\n")
-
-			if message.Content != "" {
-				sb.WriteString(message.Content)
-			}
-
-			if len(message.ToolCalls) > 0 {
-				sb.WriteString("<function_calls>")
-				for j, tc := range message.ToolCalls {
-					// Format as function_name(arg1="value1", arg2="value2")
-					sb.WriteString(tc.Function.Name)
-					sb.WriteString("(")
-
-					// Get sorted keys for deterministic output
-					keys := make([]string, 0, len(tc.Function.Arguments))
-					for k := range tc.Function.Arguments {
-						keys = append(keys, k)
-					}
-					sort.Strings(keys)
-
-					for k, key := range keys {
-						if k > 0 {
-							sb.WriteString(", ")
-						}
-						value, err := json.Marshal(tc.Function.Arguments[key])
-						if err != nil {
-							return "", err
-						}
-						sb.WriteString(fmt.Sprintf("%s=%s", key, string(value)))
-					}
-					sb.WriteString(")")
-
-					if j < len(message.ToolCalls)-1 {
-						sb.WriteString("\n")
-					}
-				}
-				sb.WriteString("</function_calls>")
-			}
-
-			// Add end tag unless it's the last message with content only (prefill)
-			if !lastMessage || len(message.ToolCalls) > 0 {
-				sb.WriteString("<|im_end|>\n")
-			}
-
-		case "tool":
-			sb.WriteString("<|im_start|>environment\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("<|im_end|>\n")
-		}
-	}
-
-	// Add generation prompt if needed
-	needsGenerationPrompt := true
-	if len(filteredMessages) > 0 {
-		lastMsg := filteredMessages[len(filteredMessages)-1]
-		if lastMsg.Role == "assistant" && len(lastMsg.ToolCalls) == 0 && lastMsg.Content != "" {
-			needsGenerationPrompt = false
-		}
-	}
-
-	if needsGenerationPrompt {
-		sb.WriteString("<|im_start|>assistant\n")
-	}
-
-	return sb.String(), nil
-}
--- a/model/renderers/olmo3_test.go
+++ b/model/renderers/olmo3_test.go
@@ -1,290 +0,0 @@
-package renderers
-
-import (
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestOlmo3Renderer(t *testing.T) {
-	tests := []struct {
-		name     string
-		msgs     []api.Message
-		tools    []api.Tool
-		expected string
-	}{
-		{
-			name: "basic without system - adds default system",
-			msgs: []api.Message{
-				{Role: "user", Content: "Hello!"},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful function-calling AI assistant. You do not currently have access to any functions. <functions></functions><|im_end|>\n" +
-				"<|im_start|>user\n" +
-				"Hello!<|im_end|>\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "with system message no tools",
-			msgs: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "Hello!"},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful assistant.<|im_end|>\n" +
-				"<|im_start|>user\n" +
-				"Hello!<|im_end|>\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "with system message and tools",
-			msgs: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "What is the weather?"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get the current weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:     "object",
-							Required: []string{"location"},
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}, Description: "The city"},
-							},
-						},
-					},
-				},
-			},
-			expected: "<|im_start|>system\n" +
-				`You are a helpful assistant.<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
-				"<|im_start|>user\n" +
-				"What is the weather?<|im_end|>\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "default system with tools - includes function instruction",
-			msgs: []api.Message{
-				{Role: "user", Content: "What is the weather?"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get the current weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:     "object",
-							Required: []string{"location"},
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}, Description: "The city"},
-							},
-						},
-					},
-				},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful function-calling AI assistant. " +
-				"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
-				`<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
-				"<|im_start|>user\n" +
-				"What is the weather?<|im_end|>\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "assistant with tool calls - function call syntax",
-			msgs: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "What is the weather in SF?"},
-				{
-					Role:    "assistant",
-					Content: "Let me check the weather.",
-					ToolCalls: []api.ToolCall{
-						{
-							ID: "call_1",
-							Function: api.ToolCallFunction{
-								Name: "get_weather",
-								Arguments: map[string]any{
-									"location": "San Francisco",
-								},
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: `{"temperature": 68}`, ToolName: "get_weather"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name:        "get_weather",
-						Description: "Get the current weather",
-						Parameters: api.ToolFunctionParameters{
-							Type:     "object",
-							Required: []string{"location"},
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}, Description: "The city"},
-							},
-						},
-					},
-				},
-			},
-			expected: "<|im_start|>system\n" +
-				`You are a helpful assistant.<functions>[{"type": "function", "function": {"name": "get_weather", "description": "Get the current weather", "parameters": {"type": "object", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city"}}}}}]</functions><|im_end|>` + "\n" +
-				"<|im_start|>user\n" +
-				"What is the weather in SF?<|im_end|>\n" +
-				"<|im_start|>assistant\n" +
-				`Let me check the weather.<function_calls>get_weather(location="San Francisco")</function_calls><|im_end|>` + "\n" +
-				"<|im_start|>environment\n" +
-				`{"temperature": 68}<|im_end|>` + "\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "multi-turn conversation",
-			msgs: []api.Message{
-				{Role: "system", Content: "You are a helpful assistant."},
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: "Hi there!"},
-				{Role: "user", Content: "How are you?"},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful assistant.<|im_end|>\n" +
-				"<|im_start|>user\n" +
-				"Hello<|im_end|>\n" +
-				"<|im_start|>assistant\n" +
-				"Hi there!<|im_end|>\n" +
-				"<|im_start|>user\n" +
-				"How are you?<|im_end|>\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "parallel tool calls - newline separated",
-			msgs: []api.Message{
-				{Role: "user", Content: "Get weather in SF and NYC"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							ID: "call_1",
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: map[string]any{"location": "San Francisco"},
-							},
-						},
-						{
-							ID: "call_2",
-							Function: api.ToolCallFunction{
-								Name:      "get_weather",
-								Arguments: map[string]any{"location": "New York"},
-							},
-						},
-					},
-				},
-				{Role: "tool", Content: `{"temperature": 68}`, ToolName: "get_weather"},
-				{Role: "tool", Content: `{"temperature": 55}`, ToolName: "get_weather"},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name: "get_weather",
-						Parameters: api.ToolFunctionParameters{
-							Type: "object",
-							Properties: map[string]api.ToolProperty{
-								"location": {Type: api.PropertyType{"string"}},
-							},
-						},
-					},
-				},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful function-calling AI assistant. " +
-				"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
-				`<functions>[{"type": "function", "function": {"name": "get_weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}}}}]</functions><|im_end|>` + "\n" +
-				"<|im_start|>user\n" +
-				"Get weather in SF and NYC<|im_end|>\n" +
-				"<|im_start|>assistant\n" +
-				`<function_calls>get_weather(location="San Francisco")` + "\n" +
-				`get_weather(location="New York")</function_calls><|im_end|>` + "\n" +
-				"<|im_start|>environment\n" +
-				`{"temperature": 68}<|im_end|>` + "\n" +
-				"<|im_start|>environment\n" +
-				`{"temperature": 55}<|im_end|>` + "\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "tool call with multiple arguments",
-			msgs: []api.Message{
-				{Role: "user", Content: "Book a flight"},
-				{
-					Role: "assistant",
-					ToolCalls: []api.ToolCall{
-						{
-							ID: "call_1",
-							Function: api.ToolCallFunction{
-								Name: "book_flight",
-								Arguments: map[string]any{
-									"from": "SFO",
-									"to":   "NYC",
-								},
-							},
-						},
-					},
-				},
-			},
-			tools: []api.Tool{
-				{
-					Type: "function",
-					Function: api.ToolFunction{
-						Name: "book_flight",
-						Parameters: api.ToolFunctionParameters{
-							Type: "object",
-							Properties: map[string]api.ToolProperty{
-								"from": {Type: api.PropertyType{"string"}},
-								"to":   {Type: api.PropertyType{"string"}},
-							},
-						},
-					},
-				},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful function-calling AI assistant. " +
-				"You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions." +
-				`<functions>[{"type": "function", "function": {"name": "book_flight", "parameters": {"type": "object", "properties": {"from": {"type": "string"}, "to": {"type": "string"}}}}}]</functions><|im_end|>` + "\n" +
-				"<|im_start|>user\n" +
-				"Book a flight<|im_end|>\n" +
-				"<|im_start|>assistant\n" +
-				`<function_calls>book_flight(from="SFO", to="NYC")</function_calls><|im_end|>` + "\n" +
-				"<|im_start|>assistant\n\n",
-		},
-		{
-			name: "assistant prefill - no generation prompt",
-			msgs: []api.Message{
-				{Role: "user", Content: "Hello"},
-				{Role: "assistant", Content: "Hi there!"},
-			},
-			expected: "<|im_start|>system\n" +
-				"You are a helpful function-calling AI assistant. You do not currently have access to any functions. <functions></functions><|im_end|>\n" +
-				"<|im_start|>user\n" +
-				"Hello<|im_end|>\n" +
-				"<|im_start|>assistant\n" +
-				"Hi there!",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			rendered, err := (&Olmo3Renderer{}).Render(tt.msgs, tt.tools, nil)
-			if err != nil {
-				t.Fatal(err)
-			}
-			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
-				t.Errorf("mismatch (-got +want):\n%s", diff)
-			}
-		})
-	}
-}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -59,9 +59,6 @@ func rendererForName(name string) Renderer {
 	case "cogito":
 		renderer := &CogitoRenderer{isThinking: true}
 		return renderer
-	case "olmo3":
-		renderer := &Olmo3Renderer{}
-		return renderer
 	default:
 		return nil
 	}
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -110,7 +110,6 @@ func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.Thi
 		if err != nil {
 			return "", err
 		}
-		slog.Debug("rendered prompt", "renderer", m.Config.Renderer, "prompt", rendered)
 		return rendered, nil
 	}
Author	SHA1	Message	Date
ParthSareen	92af238208	wip	2025-12-02 12:17:36 -08:00
ParthSareen	7461faf651	script to render templates	2025-12-01 18:03:04 -08:00
Daniel Hiltgen	554172759c	win: warn if ggml-base detected in PATH (#13289 ) If the user has somehow installed another GGML based app which places a ggml-base lib somewhere in their PATH, we can experience runtime problems due to incompatibilities. This change adds a warning message if we detect a ggml-base outside of our install location to aid in troubleshooting.	2025-12-01 15:36:47 -08:00
Bruce MacDonald	5b6a8e6001	api/client: handle non-json streaming errors (#13007 ) While processing the response stream during a chat or generation if an error is occurred it is parsed and returned to the user. The issue with the existing code is that this assumed the response would be valid JSON, which is not a safe assumption and caused cryptic error messages to be displayed due to parsing failures: `invalid character 'i' looking for beginning of value` This change updates the stream function to return the raw error string if it cant be parsed as JSON. This should help with debugging issues by making sure the actual error reaches the user.	2025-12-01 15:10:16 -08:00
Daniel Hiltgen	467bbc0dd5	jetpack: require exact match or skip cuda_jetpack* (#13288 ) The cuda_jetpack libs will enumerate discrete GPUs on SBSA systems which leads to runtime failures of missing kernels. This fix requires an exact match to enable jetpacks instead of relying on enumeration to filter out supported libraries.	2025-12-01 12:48:16 -08:00
Jeffrey Morgan	6d9f9323c5	.gitattributes: add app/webview to linguist-vendored (#13274 )	2025-11-29 23:46:10 -05:00
Ondrej Kokes	0c2489605d	docs: fix output formatting in faq.mdx (#13231 ) There were a few Markdown typos in one FAQ answer. It now renders as a proper ascii table.	2025-11-28 19:19:21 -05:00
EntropyYue	8b1b89a984	docs: remove deprecated parameters (#13237 )	2025-11-26 11:03:09 +09:00