wip

script to render templates
win: warn if ggml-base detected in PATH (#13289 )
2026-01-14 10:28:31 -05:00 · 2025-12-02 12:17:36 -08:00 · 2025-12-01 18:03:04 -08:00 · 2025-12-01 15:36:47 -08:00 · 2025-12-01 15:10:16 -08:00 · 2025-12-01 12:48:16 -08:00
15 changed files with 728 additions and 1361 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -19,6 +19,8 @@ ml/backend/**/*.comp linguist-vendored
 ml/backend/**/*.glsl linguist-vendored
 ml/backend/**/CMakeLists.txt linguist-vendored

+app/webview linguist-vendored
+
 llama/build-info.cpp linguist-generated
 ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.s linguist-generated

--- a/api/client.go
+++ b/api/client.go
@@ -226,7 +226,14 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f

 		bts := scanner.Bytes()
 		if err := json.Unmarshal(bts, &errorResponse); err != nil {
-			return fmt.Errorf("unmarshal: %w", err)
+			if response.StatusCode >= http.StatusBadRequest {
+				return StatusError{
+					StatusCode:   response.StatusCode,
+					Status:       response.Status,
+					ErrorMessage: string(bts),
+				}
+			}
+			return errors.New(string(bts))
 		}

 		if response.StatusCode == http.StatusUnauthorized {
--- a/api/client_test.go
+++ b/api/client_test.go
@@ -55,6 +55,7 @@ func TestClientFromEnvironment(t *testing.T) {
 type testError struct {
 	message    string
 	statusCode int
+	raw        bool // if true, write message as-is instead of JSON encoding
 }

 func (e testError) Error() string {
@@ -111,6 +112,20 @@ func TestClientStream(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "plain text error response",
+			responses: []any{
+				"internal server error",
+			},
+			wantErr: "internal server error",
+		},
+		{
+			name: "HTML error page",
+			responses: []any{
+				"<html><body>404 Not Found</body></html>",
+			},
+			wantErr: "404 Not Found",
+		},
 	}

 	for _, tc := range testCases {
@@ -135,6 +150,12 @@ func TestClientStream(t *testing.T) {
 						return
 					}

+					if str, ok := resp.(string); ok {
+						fmt.Fprintln(w, str)
+						flusher.Flush()
+						continue
+					}
+
 					if err := json.NewEncoder(w).Encode(resp); err != nil {
 						t.Fatalf("failed to encode response: %v", err)
 					}
@@ -173,9 +194,10 @@ func TestClientStream(t *testing.T) {

 func TestClientDo(t *testing.T) {
 	testCases := []struct {
-		name     string
-		response any
-		wantErr  string
+		name           string
+		response       any
+		wantErr        string
+		wantStatusCode int
 	}{
 		{
 			name: "immediate error response",
@@ -183,7 +205,8 @@ func TestClientDo(t *testing.T) {
 				message:    "test error message",
 				statusCode: http.StatusBadRequest,
 			},
-			wantErr: "test error message",
+			wantErr:        "test error message",
+			wantStatusCode: http.StatusBadRequest,
 		},
 		{
 			name: "server error response",
@@ -191,7 +214,8 @@ func TestClientDo(t *testing.T) {
 				message:    "internal error",
 				statusCode: http.StatusInternalServerError,
 			},
-			wantErr: "internal error",
+			wantErr:        "internal error",
+			wantStatusCode: http.StatusInternalServerError,
 		},
 		{
 			name: "successful response",
@@ -203,6 +227,26 @@ func TestClientDo(t *testing.T) {
 				Success: true,
 			},
 		},
+		{
+			name: "plain text error response",
+			response: testError{
+				message:    "internal server error",
+				statusCode: http.StatusInternalServerError,
+				raw:        true,
+			},
+			wantErr:        "internal server error",
+			wantStatusCode: http.StatusInternalServerError,
+		},
+		{
+			name: "HTML error page",
+			response: testError{
+				message:    "<html><body>404 Not Found</body></html>",
+				statusCode: http.StatusNotFound,
+				raw:        true,
+			},
+			wantErr:        "<html><body>404 Not Found</body></html>",
+			wantStatusCode: http.StatusNotFound,
+		},
 	}

 	for _, tc := range testCases {
@@ -210,11 +254,16 @@ func TestClientDo(t *testing.T) {
 			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 				if errResp, ok := tc.response.(testError); ok {
 					w.WriteHeader(errResp.statusCode)
-					err := json.NewEncoder(w).Encode(map[string]string{
-						"error": errResp.message,
-					})
-					if err != nil {
-						t.Fatal("failed to encode error response:", err)
+					if !errResp.raw {
+						err := json.NewEncoder(w).Encode(map[string]string{
+							"error": errResp.message,
+						})
+						if err != nil {
+							t.Fatal("failed to encode error response:", err)
+						}
+					} else {
+						// Write raw message (simulates non-JSON error responses)
+						fmt.Fprint(w, errResp.message)
 					}
 					return
 				}
@@ -241,6 +290,15 @@ func TestClientDo(t *testing.T) {
 				if err.Error() != tc.wantErr {
 					t.Errorf("error message mismatch: got %q, want %q", err.Error(), tc.wantErr)
 				}
+				if tc.wantStatusCode != 0 {
+					if statusErr, ok := err.(StatusError); ok {
+						if statusErr.StatusCode != tc.wantStatusCode {
+							t.Errorf("status code mismatch: got %d, want %d", statusErr.StatusCode, tc.wantStatusCode)
+						}
+					} else {
+						t.Errorf("expected StatusError, got %T", err)
+					}
+				}
 				return
 			}

--- a/cmd/chat_template/chat_template.py
+++ b/cmd/chat_template/chat_template.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "transformers>=4.57.0",
+#   "jinja2",
+#   "fastapi",
+#   "uvicorn",
+#   "pydantic",
+#   "requests",
+# ]
+# ///
+"""
+Chat Template Testing Tool
+
+Test HuggingFace chat templates against Ollama renderers.
+
+Usage:
+    # Run predefined test cases against a HuggingFace model
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3
+
+    # Compare HuggingFace output with Ollama renderer
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --ollama-model intellect3
+
+    # Start server for manual curl testing
+    uv run cmd/chat_template/chat_template.py --serve
+
+    # Show chat template for a model
+    uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3 --show-template
+"""
+
+import argparse
+import json
+import sys
+from typing import Any
+
+from transformers import AutoTokenizer
+
+
+TEST_CASES = [
+    {
+        "name": "basic_user_message",
+        "messages": [{"role": "user", "content": "Hello!"}],
+        "tools": None,
+    },
+    {
+        "name": "with_system_message",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "multi_turn_conversation",
+        "messages": [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+            {"role": "user", "content": "How are you?"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "with_tools",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "What is the weather?"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the current weather",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["location"],
+                        "properties": {
+                            "location": {"type": "string", "description": "The city"}
+                        },
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "tool_call_and_response",
+        "messages": [
+            {"role": "user", "content": "What is the weather in SF?"},
+            {
+                "role": "assistant",
+                "content": "Let me check the weather.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "San Francisco"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get the current weather",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["location"],
+                        "properties": {
+                            "location": {"type": "string", "description": "The city"}
+                        },
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "parallel_tool_calls",
+        "messages": [
+            {"role": "user", "content": "Get weather in SF and NYC"},
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "San Francisco"},
+                        },
+                    },
+                    {
+                        "id": "call_2",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "New York"},
+                        },
+                    },
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 68}', "tool_call_id": "call_1"},
+            {"role": "tool", "content": '{"temperature": 55}', "tool_call_id": "call_2"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    },
+    # Thinking tests
+    {
+        "name": "assistant_with_thinking",
+        "messages": [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "The answer is 4.",
+                "thinking": "Let me calculate: 2 + 2 = 4. This is basic arithmetic.",
+            },
+            {"role": "user", "content": "And 3+3?"},
+        ],
+        "tools": None,
+    },
+    {
+        "name": "thinking_with_tool_call",
+        "messages": [
+            {"role": "user", "content": "What's the weather in Paris?"},
+            {
+                "role": "assistant",
+                "content": "I'll check the weather for you.",
+                "thinking": "The user wants to know the weather in Paris. I should call the get_weather function.",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "arguments": {"location": "Paris"},
+                        },
+                    }
+                ],
+            },
+            {"role": "tool", "content": '{"temperature": 18, "condition": "cloudy"}', "tool_call_id": "call_1"},
+        ],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get current weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    },
+    {
+        "name": "thinking_only_no_content",
+        "messages": [
+            {"role": "user", "content": "Think about this silently."},
+            {
+                "role": "assistant",
+                "content": "",  # HuggingFace requires content field
+                "thinking": "I'm thinking about this but won't respond with visible content.",
+            },
+            {"role": "user", "content": "What did you think?"},
+        ],
+        "tools": None,
+    },
+]
+
+# Cache for tokenizers
+_tokenizer_cache: dict[str, Any] = {}
+
+
+def get_tokenizer(model_name: str):
+    """Get or create tokenizer for the given model."""
+    if model_name not in _tokenizer_cache:
+        print(f"Loading tokenizer for {model_name}...", file=sys.stderr)
+        _tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(model_name)
+    return _tokenizer_cache[model_name]
+
+
+def apply_template(
+    model: str,
+    messages: list[dict],
+    tools: list[dict] | None = None,
+) -> str:
+    """Apply HuggingFace chat template to messages."""
+    tokenizer = get_tokenizer(model)
+
+    if tools:
+        return tokenizer.apply_chat_template(
+            messages,
+            tools=tools,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+    else:
+        return tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+
+
+def get_ollama_prompt(
+    ollama_model: str,
+    messages: list[dict],
+    tools: list[dict] | None = None,
+    ollama_host: str = "http://localhost:11434",
+) -> str | None:
+    """Get rendered prompt from Ollama using debug_render_only."""
+    import requests
+
+    # Convert messages to Ollama format
+    ollama_messages = []
+    for msg in messages:
+        ollama_msg = {"role": msg["role"]}
+        if "content" in msg:
+            ollama_msg["content"] = msg["content"]
+        if "thinking" in msg:
+            ollama_msg["thinking"] = msg["thinking"]
+        if "tool_calls" in msg:
+            # Convert tool_calls to Ollama format
+            tool_calls = []
+            for tc in msg["tool_calls"]:
+                tool_call = {
+                    "function": {
+                        "name": tc["function"]["name"],
+                        "arguments": tc["function"]["arguments"],
+                    }
+                }
+                if "id" in tc:
+                    tool_call["id"] = tc["id"]
+                tool_calls.append(tool_call)
+            ollama_msg["tool_calls"] = tool_calls
+        if "tool_call_id" in msg:
+            ollama_msg["tool_call_id"] = msg["tool_call_id"]
+        ollama_messages.append(ollama_msg)
+
+    payload = {
+        "model": ollama_model,
+        "messages": ollama_messages,
+        "stream": False,
+        "_debug_render_only": True,
+    }
+
+    if tools:
+        payload["tools"] = tools
+
+    try:
+        resp = requests.post(f"{ollama_host}/api/chat", json=payload, timeout=30)
+        resp.raise_for_status()
+        data = resp.json()
+        # Field name is _debug_info with underscore prefix
+        if "_debug_info" in data and "rendered_template" in data["_debug_info"]:
+            return data["_debug_info"]["rendered_template"]
+        return None
+    except requests.exceptions.ConnectionError:
+        print(f"  [ERROR] Cannot connect to Ollama at {ollama_host}", file=sys.stderr)
+        return None
+    except Exception as e:
+        print(f"  [ERROR] Ollama request failed: {e}", file=sys.stderr)
+        return None
+
+
+def compute_diff(hf_prompt: str, ollama_prompt: str) -> str:
+    """Compute a unified diff between HuggingFace and Ollama prompts."""
+    import difflib
+
+    hf_lines = hf_prompt.splitlines(keepends=True)
+    ollama_lines = ollama_prompt.splitlines(keepends=True)
+
+    diff = difflib.unified_diff(
+        ollama_lines,
+        hf_lines,
+        fromfile="Ollama",
+        tofile="HuggingFace",
+        lineterm="",
+    )
+    return "".join(diff)
+
+
+def print_test_output(
+    name: str,
+    messages: list[dict],
+    tools: list[dict] | None,
+    hf_prompt: str,
+    ollama_prompt: str | None = None,
+    as_repr: bool = False,
+):
+    """Print test output in a format suitable for Go test creation and LLM diffing."""
+    print(f"\n{'='*60}")
+    print(f"Test: {name}")
+    print("=" * 60)
+    print("\n--- Input Messages ---")
+    print(json.dumps(messages, indent=2))
+    if tools:
+        print("\n--- Tools ---")
+        print(json.dumps(tools, indent=2))
+
+    if ollama_prompt is not None:
+        # Comparison mode
+        if hf_prompt == ollama_prompt:
+            print("\n--- Result: MATCH ---")
+            print("\n--- Prompt (both identical) ---")
+            if as_repr:
+                print(repr(hf_prompt))
+            else:
+                print(hf_prompt)
+        else:
+            print("\n--- Result: MISMATCH ---")
+            print("\n--- HuggingFace Prompt ---")
+            if as_repr:
+                print(repr(hf_prompt))
+            else:
+                print(hf_prompt)
+            print("\n--- Ollama Prompt ---")
+            if as_repr:
+                print(repr(ollama_prompt))
+            else:
+                print(ollama_prompt)
+            print("\n--- Diff (Ollama -> HuggingFace) ---")
+            diff = compute_diff(hf_prompt, ollama_prompt)
+            if diff:
+                print(diff)
+            else:
+                print("(no line-level diff, check whitespace)")
+    else:
+        # HuggingFace only mode
+        print("\n--- HuggingFace Prompt ---")
+        if as_repr:
+            print(repr(hf_prompt))
+        else:
+            print(hf_prompt)
+
+    print("=" * 60)
+
+
+def run_tests(
+    model: str,
+    as_repr: bool = False,
+    test_filter: str | None = None,
+    ollama_model: str | None = None,
+    ollama_host: str = "http://localhost:11434",
+):
+    """Run all predefined test cases against a model."""
+    if ollama_model:
+        print(f"\nComparing HuggingFace ({model}) vs Ollama ({ollama_model})\n")
+    else:
+        print(f"\nRunning tests against: {model}\n")
+
+    matches = 0
+    mismatches = 0
+    errors = 0
+
+    for test_case in TEST_CASES:
+        name = test_case["name"]
+        messages = test_case["messages"]
+        tools = test_case["tools"]
+
+        # Filter tests if specified
+        if test_filter and test_filter.lower() not in name.lower():
+            continue
+
+        try:
+            hf_prompt = apply_template(model, messages, tools)
+
+            ollama_prompt = None
+            if ollama_model:
+                ollama_prompt = get_ollama_prompt(
+                    ollama_model, messages, tools, ollama_host
+                )
+                if ollama_prompt is None:
+                    errors += 1
+                elif hf_prompt == ollama_prompt:
+                    matches += 1
+                else:
+                    mismatches += 1
+
+            print_test_output(
+                name, messages, tools, hf_prompt, ollama_prompt, as_repr=as_repr
+            )
+        except Exception as e:
+            errors += 1
+            print(f"\n{'='*60}")
+            print(f"Test: {name} - FAILED")
+            print(f"--- Input Messages ---")
+            print(json.dumps(messages, indent=2))
+            if tools:
+                print(f"--- Tools ---")
+                print(json.dumps(tools, indent=2))
+            print(f"--- Error ---")
+            print(f"{e}")
+            print("=" * 60)
+
+    # Print summary if comparing
+    if ollama_model:
+        total = matches + mismatches + errors
+        print(f"\n{'='*60}")
+        print("SUMMARY")
+        print("=" * 60)
+        print(f"  Total:      {total}")
+        print(f"  Matches:    {matches}")
+        print(f"  Mismatches: {mismatches}")
+        print(f"  Errors:     {errors}")
+        print("=" * 60)
+
+
+def show_template(model: str):
+    """Show the chat template for a model."""
+    tokenizer = get_tokenizer(model)
+    print(f"\nChat template for {model}:\n")
+    print("-" * 60)
+    print(tokenizer.chat_template)
+    print("-" * 60)
+
+
+def start_server(host: str = "0.0.0.0", port: int = 8000):
+    """Start the FastAPI server for manual testing."""
+    from typing import Optional, List, Dict, Any as TypingAny
+
+    from fastapi import FastAPI, HTTPException
+    from pydantic import BaseModel
+    import uvicorn
+
+    class Message(BaseModel):
+        role: str
+        content: Optional[str] = None
+        tool_calls: Optional[List[Dict[str, TypingAny]]] = None
+        tool_call_id: Optional[str] = None
+
+    class GeneratePromptRequest(BaseModel):
+        messages: List[Message]
+        model: str = "PrimeIntellect/INTELLECT-3"
+        tools: Optional[List[Dict[str, TypingAny]]] = None
+        inject_tools_as_functions: bool = False
+
+    class GeneratePromptResponse(BaseModel):
+        prompt: str
+        model: str
+
+    app = FastAPI(title="HuggingFace Prompt Generator", version="1.0.0")
+
+    @app.post("/generate-prompt", response_model=GeneratePromptResponse)
+    async def generate_prompt(request: GeneratePromptRequest):
+        try:
+            messages = []
+            for msg in request.messages:
+                message_dict = {"role": msg.role}
+                if msg.content is not None:
+                    message_dict["content"] = msg.content
+                if msg.tool_calls is not None:
+                    tool_calls = []
+                    for tc in msg.tool_calls:
+                        tc_copy = tc.copy()
+                        if "function" in tc_copy and "arguments" in tc_copy["function"]:
+                            args = tc_copy["function"]["arguments"]
+                            if isinstance(args, str):
+                                try:
+                                    tc_copy["function"]["arguments"] = json.loads(args)
+                                except json.JSONDecodeError:
+                                    pass
+                        tool_calls.append(tc_copy)
+                    message_dict["tool_calls"] = tool_calls
+                if msg.tool_call_id is not None:
+                    message_dict["tool_call_id"] = msg.tool_call_id
+                messages.append(message_dict)
+
+            prompt = apply_template(request.model, messages, request.tools)
+            return GeneratePromptResponse(prompt=prompt, model=request.model)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+    @app.get("/health")
+    async def health_check():
+        return {"status": "healthy"}
+
+    print(f"Starting server on http://{host}:{port}")
+    print("Endpoints:")
+    print("  POST /generate-prompt - Generate prompt from messages")
+    print("  GET  /health          - Health check")
+    uvicorn.run(app, host=host, port=port)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="HuggingFace Prompt Testing Tool",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--model",
+        "-m",
+        type=str,
+        help="HuggingFace model name (e.g., PrimeIntellect/INTELLECT-3)",
+    )
+    parser.add_argument(
+        "--ollama-model",
+        "-o",
+        type=str,
+        help="Ollama model name to compare against (e.g., qwen3-coder)",
+    )
+    parser.add_argument(
+        "--ollama-host",
+        type=str,
+        default="http://localhost:11434",
+        help="Ollama server URL (default: http://localhost:11434)",
+    )
+    parser.add_argument(
+        "--serve",
+        "-s",
+        action="store_true",
+        help="Start FastAPI server for manual curl testing",
+    )
+    parser.add_argument(
+        "--port",
+        "-p",
+        type=int,
+        default=8000,
+        help="Server port (default: 8000)",
+    )
+    parser.add_argument(
+        "--show-template",
+        "-t",
+        action="store_true",
+        help="Show the chat template for the model",
+    )
+    parser.add_argument(
+        "--repr",
+        "-r",
+        action="store_true",
+        help="Output prompts as Python repr (shows escape sequences)",
+    )
+    parser.add_argument(
+        "--filter",
+        "-f",
+        type=str,
+        help="Filter tests by name (substring match)",
+    )
+
+    args = parser.parse_args()
+
+    if args.serve:
+        start_server(port=args.port)
+    elif args.model:
+        if args.show_template:
+            show_template(args.model)
+        else:
+            run_tests(
+                args.model,
+                as_repr=args.repr,
+                test_filter=args.filter,
+                ollama_model=args.ollama_model,
+                ollama_host=args.ollama_host,
+            )
+    else:
+        parser.print_help()
+        print("\nExample usage:")
+        print("  uv run cmd/chat_template/chat_template.py --model PrimeIntellect/INTELLECT-3")
+        print("  uv run cmd/chat_template/chat_template.py --model Qwen/Qwen3-Coder-480B-A35B-Instruct --ollama-model qwen3-coder")
+        print("  uv run cmd/chat_template/chat_template.py --serve")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
--- a/cmd/eval/README.md
+++ b/cmd/eval/README.md
@@ -1,50 +0,0 @@
-# eval
-
-Evaluation tool for testing Ollama models.
-
-## Usage
-
-Run all tests:
-
-```bash
-go run . -model llama3.2:latest
-```
-
-Run specific suite:
-
-```bash
-go run . -model llama3.2:latest -suite tool-calling-basic -v
-```
-
-List available suites:
-
-```bash
-go run . -list
-```
-
-## Adding Tests
-
-Edit `suites.go` to add new test suites. Each test needs:
-
- `Name`: test identifier
- `Prompt`: what to send to the model
- `Check`: function to validate the response
-
-Example:
-
-```go
-{
-    Name:   "my-test",
-    Prompt: "What is 2+2?",
-    Check:  Contains("4"),
-}
-```
-
-Available check functions:
-
- `HasResponse()` - response is non-empty
- `Contains(s)` - response contains substring
- `CallsTool(name)` - model called specific tool
- `NoTools()` - model called no tools
- `MinTools(n)` - model called at least n tools
- `All(checks...)` - all checks pass
--- a/cmd/eval/eval.go
+++ b/cmd/eval/eval.go
@@ -1,151 +0,0 @@
-package main
-
-import (
-	"context"
-	"strings"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-// Test is a single evaluation test
-type Test struct {
-	Name    string
-	Prompt  string
-	System  string
-	Tools   []api.Tool
-	Think   bool
-	Options map[string]any
-	Check   func(response string, tools []api.ToolCall) bool
-}
-
-// Suite is a collection of tests
-type Suite struct {
-	Name  string
-	Tests []Test
-}
-
-// Result holds test execution results
-type Result struct {
-	Name      string
-	Passed    bool
-	Error     error
-	Duration  time.Duration
-	Response  string
-	Tools     []string
-	ToolCalls []api.ToolCall
-	Thinking  bool
-}
-
-// Run executes a test against a model
-func Run(ctx context.Context, client *api.Client, model string, test Test) Result {
-	result := Result{Name: test.Name}
-
-	req := &api.ChatRequest{
-		Model: model,
-		Messages: []api.Message{
-			{Role: "user", Content: test.Prompt},
-		},
-		Options: test.Options,
-	}
-
-	if test.System != "" {
-		req.Messages = append([]api.Message{
-			{Role: "system", Content: test.System},
-		}, req.Messages...)
-	}
-
-	if len(test.Tools) > 0 {
-		req.Tools = test.Tools
-	}
-
-	if test.Think {
-		req.Think = &api.ThinkValue{Value: true}
-	}
-
-	var resp strings.Builder
-	var toolCalls []api.ToolCall
-
-	start := time.Now()
-	err := client.Chat(ctx, req, func(r api.ChatResponse) error {
-		resp.WriteString(r.Message.Content)
-		if r.Message.Thinking != "" {
-			result.Thinking = true
-		}
-		toolCalls = append(toolCalls, r.Message.ToolCalls...)
-		return nil
-	})
-	result.Duration = time.Since(start)
-
-	if err != nil {
-		result.Error = err
-		return result
-	}
-
-	result.Response = resp.String()
-	result.Tools = uniqueToolNames(toolCalls)
-	result.ToolCalls = toolCalls
-	result.Passed = test.Check(result.Response, toolCalls)
-
-	return result
-}
-
-func uniqueToolNames(calls []api.ToolCall) []string {
-	seen := make(map[string]bool)
-	var names []string
-	for _, c := range calls {
-		if !seen[c.Function.Name] {
-			seen[c.Function.Name] = true
-			names = append(names, c.Function.Name)
-		}
-	}
-	return names
-}
-
-// Check functions for common test patterns
-
-func HasResponse() func(string, []api.ToolCall) bool {
-	return func(resp string, _ []api.ToolCall) bool {
-		return strings.TrimSpace(resp) != ""
-	}
-}
-
-func Contains(s string) func(string, []api.ToolCall) bool {
-	return func(resp string, _ []api.ToolCall) bool {
-		return strings.Contains(strings.ToLower(resp), strings.ToLower(s))
-	}
-}
-
-func CallsTool(name string) func(string, []api.ToolCall) bool {
-	return func(_ string, tools []api.ToolCall) bool {
-		for _, t := range tools {
-			if t.Function.Name == name {
-				return true
-			}
-		}
-		return false
-	}
-}
-
-func NoTools() func(string, []api.ToolCall) bool {
-	return func(_ string, tools []api.ToolCall) bool {
-		return len(tools) == 0
-	}
-}
-
-func MinTools(n int) func(string, []api.ToolCall) bool {
-	return func(_ string, tools []api.ToolCall) bool {
-		return len(tools) >= n
-	}
-}
-
-func All(checks ...func(string, []api.ToolCall) bool) func(string, []api.ToolCall) bool {
-	return func(resp string, tools []api.ToolCall) bool {
-		for _, check := range checks {
-			if !check(resp, tools) {
-				return false
-			}
-		}
-		return true
-	}
-}
--- a/cmd/eval/main.go
+++ b/cmd/eval/main.go
@@ -1,217 +0,0 @@
-package main
-
-import (
-	"context"
-	"encoding/json"
-	"flag"
-	"fmt"
-	"os"
-	"strings"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-func main() {
-	model := flag.String("model", "", "model to evaluate")
-	suite := flag.String("suite", "", "comma-separated list of suites to run (empty runs all)")
-	list := flag.Bool("list", false, "list available suites")
-	verbose := flag.Bool("v", false, "verbose output")
-	timeout := flag.Int("timeout", 60, "timeout per test in seconds")
-	export := flag.String("export", "eval-results.json", "export results to file")
-	flag.Parse()
-
-	if *list {
-		for _, s := range suites {
-			fmt.Printf("%s (%d tests)\n", s.Name, len(s.Tests))
-		}
-		return
-	}
-
-	if *model == "" {
-		fmt.Fprintf(os.Stderr, "error: -model parameter is required\n")
-		os.Exit(1)
-	}
-
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "error: %v\n", err)
-		os.Exit(1)
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	if err := client.Heartbeat(ctx); err != nil {
-		cancel()
-		fmt.Fprintf(os.Stderr, "error: cannot connect to ollama\n")
-		os.Exit(1)
-	}
-	cancel()
-
-	selected := suites
-	if *suite != "" {
-		suiteNames := strings.Split(*suite, ",")
-		selected = []Suite{}
-		var notFound []string
-
-		for _, name := range suiteNames {
-			name = strings.TrimSpace(name)
-			if name == "" {
-				continue
-			}
-
-			found := false
-			for _, s := range suites {
-				if s.Name == name {
-					selected = append(selected, s)
-					found = true
-					break
-				}
-			}
-			if !found {
-				notFound = append(notFound, name)
-			}
-		}
-
-		if len(notFound) > 0 {
-			fmt.Fprintf(os.Stderr, "error: suite(s) not found: %s\n", strings.Join(notFound, ", "))
-			os.Exit(1)
-		}
-	}
-
-	var results []Result
-	for _, s := range selected {
-		if *verbose {
-			fmt.Printf("\n%s (%d tests)\n", s.Name, len(s.Tests))
-		}
-		for i, test := range s.Tests {
-			if test.Options == nil {
-				test.Options = map[string]any{"temperature": 0.1}
-			}
-			if test.Check == nil {
-				test.Check = HasResponse()
-			}
-
-			if *verbose {
-				fmt.Printf("  [%d/%d] %s... ", i+1, len(s.Tests), test.Name)
-			}
-
-			ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*timeout)*time.Second)
-			result := Run(ctx, client, *model, test)
-			cancel()
-
-			results = append(results, result)
-
-			if *verbose {
-				if result.Error != nil {
-					fmt.Printf("ERROR: %v\n", result.Error)
-				} else if result.Passed {
-					fmt.Printf("PASS (%.2fs)", result.Duration.Seconds())
-					if len(result.Tools) > 0 || result.Thinking {
-						fmt.Printf(" [")
-						if len(result.Tools) > 0 {
-							fmt.Printf("tools: %s", strings.Join(result.Tools, ","))
-						}
-						if result.Thinking {
-							if len(result.Tools) > 0 {
-								fmt.Printf(", ")
-							}
-							fmt.Printf("thinking")
-						}
-						fmt.Printf("]")
-					}
-					fmt.Println()
-
-					// Print tool calls with details
-					if len(result.ToolCalls) > 0 {
-						fmt.Printf("    Tool Calls:\n")
-						for _, tc := range result.ToolCalls {
-							argsJSON, _ := json.Marshal(tc.Function.Arguments)
-							fmt.Printf("      - %s: %s\n", tc.Function.Name, string(argsJSON))
-						}
-					}
-
-					// Print response if there is one
-					if result.Response != "" {
-						fmt.Printf("    Response: %s\n", result.Response)
-					}
-				} else {
-					fmt.Printf("FAIL (%.2fs)\n", result.Duration.Seconds())
-
-					// Print tool calls with details even on failure
-					if len(result.ToolCalls) > 0 {
-						fmt.Printf("    Tool Calls:\n")
-						for _, tc := range result.ToolCalls {
-							argsJSON, _ := json.Marshal(tc.Function.Arguments)
-							fmt.Printf("      - %s: %s\n", tc.Function.Name, string(argsJSON))
-						}
-					}
-
-					// Print response even on failure
-					if result.Response != "" {
-						fmt.Printf("    Response: %s\n", result.Response)
-					}
-				}
-			}
-		}
-	}
-
-	printSummary(results)
-
-	if *export != "" {
-		if err := writeJSON(*export, results); err != nil {
-			fmt.Fprintf(os.Stderr, "warning: export failed: %v\n", err)
-		} else if *verbose {
-			fmt.Printf("\nResults: %s\n", *export)
-		}
-	}
-
-	if anyFailed(results) {
-		os.Exit(1)
-	}
-}
-
-func printSummary(results []Result) {
-	var passed, failed, errors int
-	for _, r := range results {
-		if r.Error != nil {
-			errors++
-		} else if r.Passed {
-			passed++
-		} else {
-			failed++
-		}
-	}
-
-	total := len(results)
-	rate := 0.0
-	if total > 0 {
-		rate = float64(passed) / float64(total) * 100
-	}
-
-	fmt.Printf("\n%d/%d passed (%.1f%%)", passed, total, rate)
-	if errors > 0 {
-		fmt.Printf(", %d errors", errors)
-	}
-	fmt.Println()
-}
-
-func anyFailed(results []Result) bool {
-	for _, r := range results {
-		if !r.Passed || r.Error != nil {
-			return true
-		}
-	}
-	return false
-}
-
-func writeJSON(path string, results []Result) error {
-	f, err := os.Create(path)
-	if err != nil {
-		return err
-	}
-	defer f.Close()
-
-	enc := json.NewEncoder(f)
-	enc.SetIndent("", "  ")
-	return enc.Encode(results)
-}
--- a/cmd/eval/suites.go
+++ b/cmd/eval/suites.go
@@ -1,178 +0,0 @@
-package main
-
-import "github.com/ollama/ollama/api"
-
-var suites = []Suite{
-	{
-		Name: "basic-qa",
-		Tests: []Test{
-			{
-				Name:   "simple-math",
-				Prompt: "What is 2+2? Reply with just the number.",
-				Check:  Contains("4"),
-			},
-			{
-				Name:   "capital-city",
-				Prompt: "What is the capital of France? Reply with just the city name.",
-				Check:  Contains("Paris"),
-			},
-			{
-				Name:   "greeting",
-				Prompt: "Say hello",
-				Check:  HasResponse(),
-			},
-		},
-	},
-	{
-		Name: "reasoning",
-		Tests: []Test{
-			{
-				Name:   "logic-puzzle",
-				Prompt: "If all roses are flowers and some flowers fade quickly, can we conclude that some roses fade quickly? Answer yes or no.",
-				Check:  Contains("no"),
-			},
-			{
-				Name:   "counting",
-				Prompt: "How many letters are in the word 'HELLO'?",
-				Check:  Contains("5"),
-			},
-		},
-	},
-	{
-		Name: "instruction-following",
-		Tests: []Test{
-			{
-				Name:   "json-output",
-				Prompt: "Reply with a JSON object containing a 'status' field set to 'ok'.",
-				Check:  All(Contains("status"), Contains("ok")),
-			},
-			{
-				Name:   "system-prompt",
-				Prompt: "What is your name?",
-				System: "You are a helpful assistant named TestBot. When asked your name, always respond with 'TestBot'.",
-				Check:  Contains("TestBot"),
-			},
-		},
-	},
-	{
-		Name: "tool-calling-basic",
-		Tests: []Test{
-			{
-				Name:   "single-tool",
-				Prompt: "What's the weather like in San Francisco?",
-				Tools:  []api.Tool{weatherTool},
-				Check:  CallsTool("get_weather"),
-			},
-			{
-				Name:   "tool-selection",
-				Prompt: "What time is it in Tokyo?",
-				Tools:  []api.Tool{weatherTool, timeTool},
-				Check:  CallsTool("get_time"),
-			},
-			{
-				Name:   "no-tool-needed",
-				Prompt: "What is 2+2?",
-				Tools:  []api.Tool{weatherTool, timeTool},
-				Check:  NoTools(),
-			},
-		},
-	},
-	{
-		Name: "tool-calling-advanced",
-		Tests: []Test{
-			{
-				Name:   "parallel-calls",
-				Prompt: "Get the weather in both New York and Los Angeles.",
-				Tools:  []api.Tool{weatherTool},
-				Check:  All(CallsTool("get_weather"), MinTools(2)),
-			},
-			{
-				Name:   "multi-param",
-				Prompt: "Search for Italian restaurants with prices between $20 and $40.",
-				Tools:  []api.Tool{restaurantTool},
-				Check:  CallsTool("search_restaurants"),
-			},
-		},
-	},
-	{
-		Name: "tool-calling-thinking",
-		Tests: []Test{
-			{
-				Name:   "thinking-before-tool",
-				Prompt: "I need to know the weather in Paris before I decide what to pack.",
-				Tools:  []api.Tool{weatherTool},
-				Think:  true,
-				Check:  CallsTool("get_weather"),
-			},
-			{
-				Name:   "thinking-multi-tool",
-				Prompt: "I'm planning a trip to London. I need to know what time it is there and what the weather is like.",
-				Tools:  []api.Tool{weatherTool, timeTool},
-				Think:  true,
-				Check:  MinTools(1),
-			},
-		},
-	},
-}
-
-var weatherTool = api.Tool{
-	Type: "function",
-	Function: api.ToolFunction{
-		Name:        "get_weather",
-		Description: "Get the current weather in a given location",
-		Parameters: api.ToolFunctionParameters{
-			Type:     "object",
-			Required: []string{"location"},
-			Properties: map[string]api.ToolProperty{
-				"location": {
-					Type:        api.PropertyType{"string"},
-					Description: "The city and state",
-				},
-			},
-		},
-	},
-}
-
-var timeTool = api.Tool{
-	Type: "function",
-	Function: api.ToolFunction{
-		Name:        "get_time",
-		Description: "Get the current time in a timezone",
-		Parameters: api.ToolFunctionParameters{
-			Type:     "object",
-			Required: []string{"timezone"},
-			Properties: map[string]api.ToolProperty{
-				"timezone": {
-					Type:        api.PropertyType{"string"},
-					Description: "The timezone name",
-				},
-			},
-		},
-	},
-}
-
-var restaurantTool = api.Tool{
-	Type: "function",
-	Function: api.ToolFunction{
-		Name:        "search_restaurants",
-		Description: "Search for restaurants",
-		Parameters: api.ToolFunctionParameters{
-			Type:     "object",
-			Required: []string{"cuisine"},
-			Properties: map[string]api.ToolProperty{
-				"cuisine": {
-					Type:        api.PropertyType{"string"},
-					Description: "Type of cuisine",
-				},
-				"min_price": {
-					Type:        api.PropertyType{"number"},
-					Description: "Minimum price",
-				},
-				"max_price": {
-					Type:        api.PropertyType{"number"},
-					Description: "Maximum price",
-				},
-			},
-		},
-	},
-}
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -65,6 +65,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		}

 		slog.Info("discovering available GPUs...")
+		detectIncompatibleLibraries()

 		// Warn if any user-overrides are set which could lead to incorrect GPU discovery
 		overrideWarnings()
@@ -98,6 +99,9 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 					continue
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
+				} else if jetpack == "" && strings.Contains(filepath.Base(dir), "cuda_jetpack") {
+					slog.Debug("jetpack not detected (set JETSON_JETPACK or OLLAMA_LLM_LIBRARY to override), skipping", "libDir", dir)
+					continue
 				} else if !envconfig.EnableVulkan() && strings.Contains(filepath.Base(dir), "vulkan") {
 					slog.Info("experimental Vulkan support disabled.  To enable, set OLLAMA_VULKAN=1")
 					continue
@@ -484,3 +488,16 @@ func overrideWarnings() {
 		slog.Warn("if GPUs are not correctly discovered, unset and try again")
 	}
 }
+
+func detectIncompatibleLibraries() {
+	if runtime.GOOS != "windows" {
+		return
+	}
+	basePath, err := exec.LookPath("ggml-base.dll")
+	if err != nil || basePath == "" {
+		return
+	}
+	if !strings.HasPrefix(basePath, ml.LibOllamaPath) {
+		slog.Warn("potentially incompatible library detected in PATH", "location", basePath)
+	}
+}
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -57,8 +57,13 @@ ollama ps
 ```

 <Info>
-  **Output**: ``` NAME ID SIZE PROCESSOR UNTIL llama3:70b bcfb190ca3a7 42 GB
-  100% GPU 4 minutes from now ```
+
+**Output**:
+
+```
+NAME        ID            SIZE    PROCESSOR   UNTIL
+llama3:70b  bcfb190ca3a7  42 GB   100% GPU    4 minutes from now
+```
 </Info>

 The `Processor` column will show which memory the model was loaded in to:
@@ -385,4 +390,4 @@ Ollama for Windows and macOS register as a login item during installation.  You
 - In `Task Manager` go to the `Startup apps` tab, search for `ollama` then click `Disable`

 **MacOS**
- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
+- Open `Settings` and search for "Login Items", find the `Ollama` entry under "Allow in the Background`, then click the slider to disable.
--- a/model/parsers/intellect3.go
+++ b/model/parsers/intellect3.go
@@ -1,44 +0,0 @@
-package parsers
-
-import (
-	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/thinking"
-)
-
-// Intellect3Parser combines thinking support using
-// the built-in thinking parser, with tool call support
-// via qwen3-coder's parser.
-type Intellect3Parser struct {
-	thinkingParser thinking.Parser
-	toolParser     Qwen3CoderParser
-}
-
-func (p *Intellect3Parser) HasToolSupport() bool {
-	return true
-}
-
-func (p *Intellect3Parser) HasThinkingSupport() bool {
-	return true
-}
-
-func (p *Intellect3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
-	p.thinkingParser = thinking.Parser{
-		OpeningTag: "<think>",
-		ClosingTag: "</think>",
-	}
-	p.toolParser = Qwen3CoderParser{}
-	return p.toolParser.Init(tools, lastMessage, thinkValue)
-}
-
-func (p *Intellect3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
-	// First extract thinking content
-	thinkingContent, remainingContent := p.thinkingParser.AddContent(s)
-
-	// Then process the remaining content for tool calls
-	toolContent, _, toolCalls, err := p.toolParser.Add(remainingContent, done)
-	if err != nil {
-		return "", thinkingContent, nil, err
-	}
-
-	return toolContent, thinkingContent, toolCalls, nil
-}
--- a/model/parsers/intellect3_test.go
+++ b/model/parsers/intellect3_test.go
@@ -1,542 +0,0 @@
-package parsers
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestIntellect3ParserThinkingOnly(t *testing.T) {
-	cases := []struct {
-		desc      string
-		chunks    []string
-		wantText  string
-		wantThink string
-	}{
-		{
-			desc:      "simple thinking content",
-			chunks:    []string{"<think>I need to analyze this</think>Here is my response"},
-			wantText:  "Here is my response",
-			wantThink: "I need to analyze this",
-		},
-		{
-			desc:      "thinking with whitespace",
-			chunks:    []string{"<think>\n  Some thoughts  \n</think>\n\nContent"},
-			wantText:  "Content",
-			wantThink: "Some thoughts  \n", // Thinking parser preserves internal whitespace
-		},
-		{
-			desc:      "thinking only",
-			chunks:    []string{"<think>Just thinking</think>"},
-			wantText:  "",
-			wantThink: "Just thinking",
-		},
-		{
-			desc:      "no thinking tags",
-			chunks:    []string{"Just regular content"},
-			wantText:  "Just regular content",
-			wantThink: "",
-		},
-		{
-			desc:      "streaming thinking content",
-			chunks:    []string{"<think>Fir", "st part", " second part</think>Content"},
-			wantText:  "Content",
-			wantThink: "First part second part",
-		},
-		{
-			desc:      "partial opening tag",
-			chunks:    []string{"<thi", "nk>Thinking</think>Content"},
-			wantText:  "Content",
-			wantThink: "Thinking",
-		},
-		{
-			desc:      "partial closing tag",
-			chunks:    []string{"<think>Thinking</thi", "nk>Content"},
-			wantText:  "Content",
-			wantThink: "Thinking",
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := Intellect3Parser{}
-			parser.Init(nil, nil, nil)
-
-			var gotText, gotThink string
-			for i, chunk := range tc.chunks {
-				isLast := i == len(tc.chunks)-1
-				text, think, calls, err := parser.Add(chunk, isLast)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				gotText += text
-				gotThink += think
-				if len(calls) > 0 {
-					t.Fatalf("expected no tool calls, got %v", calls)
-				}
-			}
-
-			if gotText != tc.wantText {
-				t.Errorf("content: got %q, want %q", gotText, tc.wantText)
-			}
-			if gotThink != tc.wantThink {
-				t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
-			}
-		})
-	}
-}
-
-func TestIntellect3ParserToolCallsOnly(t *testing.T) {
-	tools := []api.Tool{
-		tool("get_weather", map[string]api.ToolProperty{
-			"location": {Type: api.PropertyType{"string"}},
-			"unit":     {Type: api.PropertyType{"string"}},
-		}),
-	}
-
-	cases := []struct {
-		desc      string
-		chunks    []string
-		wantText  string
-		wantCalls []api.ToolCall
-	}{
-		{
-			desc: "simple tool call",
-			chunks: []string{
-				"Let me check the weather<tool_call><function=get_weather>\n<parameter=location>\nSan Francisco\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
-			},
-			wantText: "Let me check the weather",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "San Francisco",
-							"unit":     "celsius",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc: "tool call streaming",
-			chunks: []string{
-				"Checking<tool_call><function=get_wea",
-				"ther>\n<parameter=location>\nNew York\n</param", // nolint:all
-				"eter>\n<parameter=unit>\nfahrenheit\n</parameter>\n</function></tool_call>Done",
-			},
-			wantText: "CheckingDone",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "New York",
-							"unit":     "fahrenheit",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc: "multiple tool calls",
-			chunks: []string{
-				"<tool_call><function=get_weather>\n<parameter=location>\nBoston\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
-				"<tool_call><function=get_weather>\n<parameter=location>\nSeattle\n</parameter>\n<parameter=unit>\nfahrenheit\n</parameter>\n</function></tool_call>",
-			},
-			wantText: "",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Boston",
-							"unit":     "celsius",
-						},
-					},
-				},
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Seattle",
-							"unit":     "fahrenheit",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc:      "no tool calls",
-			chunks:    []string{"Just regular content"},
-			wantText:  "Just regular content",
-			wantCalls: nil,
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := Intellect3Parser{}
-			parser.Init(tools, nil, nil)
-
-			var gotText string
-			var gotCalls []api.ToolCall
-			for i, chunk := range tc.chunks {
-				isLast := i == len(tc.chunks)-1
-				text, think, calls, err := parser.Add(chunk, isLast)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				gotText += text
-				gotCalls = append(gotCalls, calls...)
-				if think != "" {
-					t.Fatalf("expected no thinking, got %q", think)
-				}
-			}
-
-			if gotText != tc.wantText {
-				t.Errorf("content: got %q, want %q", gotText, tc.wantText)
-			}
-			if !reflect.DeepEqual(gotCalls, tc.wantCalls) {
-				t.Errorf("tool calls: got %#v, want %#v", gotCalls, tc.wantCalls)
-			}
-		})
-	}
-}
-
-func TestIntellect3ParserCombined(t *testing.T) {
-	tools := []api.Tool{
-		tool("get_weather", map[string]api.ToolProperty{
-			"location": {Type: api.PropertyType{"string"}},
-			"unit":     {Type: api.PropertyType{"string"}},
-		}),
-	}
-
-	cases := []struct {
-		desc      string
-		chunks    []string
-		wantText  string
-		wantThink string
-		wantCalls []api.ToolCall
-	}{
-		{
-			desc: "thinking then tool call",
-			chunks: []string{
-				"<think>Need to get weather data</think>Let me check<tool_call><function=get_weather>\n<parameter=location>\nParis\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
-			},
-			wantText:  "Let me check",
-			wantThink: "Need to get weather data",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Paris",
-							"unit":     "celsius",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc: "thinking, tool call, and final content",
-			chunks: []string{
-				"<think>User wants weather info</think>Checking weather<tool_call><function=get_weather>\n<parameter=location>\nTokyo\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>Done!",
-			},
-			wantText:  "Checking weatherDone!",
-			wantThink: "User wants weather info",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Tokyo",
-							"unit":     "celsius",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc: "streaming combined content",
-			chunks: []string{
-				"<think>Analyzing",
-				" the request</think>",
-				"Let me help<tool_call>",
-				"<function=get_weather>\n<parameter=location>\nLondon",
-				"\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function>",
-				"</tool_call>There you go!",
-			},
-			wantText:  "Let me helpThere you go!",
-			wantThink: "Analyzing the request",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "London",
-							"unit":     "celsius",
-						},
-					},
-				},
-			},
-		},
-		{
-			desc: "multiple tool calls with thinking",
-			chunks: []string{
-				"<think>Need multiple locations</think>",
-				"<tool_call><function=get_weather>\n<parameter=location>\nBoston\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
-				"and<tool_call><function=get_weather>\n<parameter=location>\nBerlin\n</parameter>\n<parameter=unit>\ncelsius\n</parameter>\n</function></tool_call>",
-			},
-			wantText:  "and",
-			wantThink: "Need multiple locations",
-			wantCalls: []api.ToolCall{
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Boston",
-							"unit":     "celsius",
-						},
-					},
-				},
-				{
-					Function: api.ToolCallFunction{
-						Name: "get_weather",
-						Arguments: map[string]any{
-							"location": "Berlin",
-							"unit":     "celsius",
-						},
-					},
-				},
-			},
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := Intellect3Parser{}
-			parser.Init(tools, nil, nil)
-
-			var gotText, gotThink string
-			var gotCalls []api.ToolCall
-			for i, chunk := range tc.chunks {
-				isLast := i == len(tc.chunks)-1
-				text, think, calls, err := parser.Add(chunk, isLast)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				gotText += text
-				gotThink += think
-				gotCalls = append(gotCalls, calls...)
-			}
-
-			if gotText != tc.wantText {
-				t.Errorf("content: got %q, want %q", gotText, tc.wantText)
-			}
-			if gotThink != tc.wantThink {
-				t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
-			}
-			if !reflect.DeepEqual(gotCalls, tc.wantCalls) {
-				t.Errorf("tool calls: got %#v, want %#v", gotCalls, tc.wantCalls)
-			}
-		})
-	}
-}
-
-func TestIntellect3ParserEdgeCases(t *testing.T) {
-	tools := []api.Tool{
-		tool("test_func", map[string]api.ToolProperty{
-			"param": {Type: api.PropertyType{"string"}},
-		}),
-	}
-
-	cases := []struct {
-		desc      string
-		chunks    []string
-		wantText  string
-		wantThink string
-		wantCalls int
-	}{
-		{
-			desc:      "empty input",
-			chunks:    []string{""},
-			wantText:  "",
-			wantThink: "",
-			wantCalls: 0,
-		},
-		{
-			desc:      "only whitespace",
-			chunks:    []string{"   \n  \t  "},
-			wantText:  "",
-			wantThink: "",
-			wantCalls: 0,
-		},
-		{
-			desc:      "unclosed thinking tag",
-			chunks:    []string{"<think>Never closes"},
-			wantText:  "",
-			wantThink: "Never closes",
-			wantCalls: 0,
-		},
-		{
-			desc:      "unclosed tool call tag",
-			chunks:    []string{"<tool_call><function=test_func>\n<parameter=param>\nvalue\n</parameter>\n</function>"},
-			wantText:  "", // Qwen3CoderParser waits for closing tag, doesn't emit partial tool calls
-			wantThink: "",
-			wantCalls: 0, // Won't be parsed until </tool_call> is seen
-		},
-		{
-			desc:      "unicode in thinking",
-			chunks:    []string{"<think>思考中 🤔</think>答案是 42"},
-			wantText:  "答案是 42",
-			wantThink: "思考中 🤔",
-			wantCalls: 0,
-		},
-		{
-			desc:      "fake thinking tag",
-			chunks:    []string{"<thinking>This is not the right tag</thinking>Content"},
-			wantText:  "<thinking>This is not the right tag</thinking>Content",
-			wantThink: "",
-			wantCalls: 0,
-		},
-		{
-			desc:      "fake tool call tag",
-			chunks:    []string{"<tool>Not a tool call</tool>"},
-			wantText:  "<tool>Not a tool call</tool>",
-			wantThink: "",
-			wantCalls: 0,
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := Intellect3Parser{}
-			parser.Init(tools, nil, nil)
-
-			var gotText, gotThink string
-			var gotCalls []api.ToolCall
-			for i, chunk := range tc.chunks {
-				isLast := i == len(tc.chunks)-1
-				text, think, calls, err := parser.Add(chunk, isLast)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				gotText += text
-				gotThink += think
-				gotCalls = append(gotCalls, calls...)
-			}
-
-			if gotText != tc.wantText {
-				t.Errorf("content: got %q, want %q", gotText, tc.wantText)
-			}
-			if gotThink != tc.wantThink {
-				t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
-			}
-			if len(gotCalls) != tc.wantCalls {
-				t.Errorf("tool calls count: got %d, want %d", len(gotCalls), tc.wantCalls)
-			}
-		})
-	}
-}
-
-func TestIntellect3ParserCapabilities(t *testing.T) {
-	parser := Intellect3Parser{}
-
-	if !parser.HasToolSupport() {
-		t.Error("Intellect3Parser should have tool support")
-	}
-
-	if !parser.HasThinkingSupport() {
-		t.Error("Intellect3Parser should have thinking support")
-	}
-}
-
-func TestIntellect3ParserInit(t *testing.T) {
-	parser := Intellect3Parser{}
-
-	tools := []api.Tool{
-		tool("test", map[string]api.ToolProperty{
-			"param": {Type: api.PropertyType{"string"}},
-		}),
-	}
-
-	returnedTools := parser.Init(tools, nil, nil)
-
-	// Should return tools unchanged (delegated to Qwen3CoderParser)
-	if !reflect.DeepEqual(returnedTools, tools) {
-		t.Errorf("Init should return tools unchanged")
-	}
-}
-
-func TestIntellect3ParserWhitespaceHandling(t *testing.T) {
-	tools := []api.Tool{
-		tool("test", map[string]api.ToolProperty{
-			"param": {Type: api.PropertyType{"string"}},
-		}),
-	}
-
-	cases := []struct {
-		desc      string
-		chunks    []string
-		wantText  string
-		wantThink string
-	}{
-		{
-			desc:      "whitespace between thinking and content",
-			chunks:    []string{"<think>Thinking</think>\n\n\nContent"},
-			wantText:  "Content",
-			wantThink: "Thinking",
-		},
-		{
-			desc:      "whitespace inside thinking tags",
-			chunks:    []string{"<think>  \n  Thinking  \n  </think>Content"},
-			wantText:  "Content",
-			wantThink: "Thinking  \n  ", // Thinking parser preserves internal whitespace
-		},
-		{
-			desc:      "leading whitespace before thinking",
-			chunks:    []string{"   <think>Thinking</think>Content"},
-			wantText:  "Content",
-			wantThink: "Thinking",
-		},
-		{
-			desc:      "whitespace before tool call",
-			chunks:    []string{"Text   <tool_call><function=test>\n<parameter=param>\nvalue\n</parameter>\n</function></tool_call>"},
-			wantText:  "Text",
-			wantThink: "",
-		},
-		{
-			desc:      "whitespace after tool call",
-			chunks:    []string{"<tool_call><function=test>\n<parameter=param>\nvalue\n</parameter>\n</function></tool_call>   Text"},
-			wantText:  "Text",
-			wantThink: "",
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.desc, func(t *testing.T) {
-			parser := Intellect3Parser{}
-			parser.Init(tools, nil, nil)
-
-			var gotText, gotThink string
-			for i, chunk := range tc.chunks {
-				isLast := i == len(tc.chunks)-1
-				text, think, _, err := parser.Add(chunk, isLast)
-				if err != nil {
-					t.Fatalf("unexpected error: %v", err)
-				}
-				gotText += text
-				gotThink += think
-			}
-
-			if gotText != tc.wantText {
-				t.Errorf("content: got %q, want %q", gotText, tc.wantText)
-			}
-			if gotThink != tc.wantThink {
-				t.Errorf("thinking: got %q, want %q", gotThink, tc.wantThink)
-			}
-		})
-	}
-}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -54,8 +54,6 @@ func ParserForName(name string) Parser {
 		return harmony.NewHarmonyMessageHandler()
 	case "cogito":
 		return &CogitoParser{}
-	case "intellect-3":
-		return &Intellect3Parser{}
 	default:
 		return nil
 	}
--- a/model/renderers/intellect3.go
+++ b/model/renderers/intellect3.go
@@ -1,160 +0,0 @@
-package renderers
-
-import (
-	"strings"
-
-	"github.com/ollama/ollama/api"
-)
-
-type Intellect3Renderer struct{}
-
-func (r *Intellect3Renderer) Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
-	var sb strings.Builder
-
-	// filter out system messages and choose the first (if any) to win
-	var systemMessage string
-	var filteredMessages []api.Message
-	for _, message := range messages {
-		if message.Role != "system" {
-			filteredMessages = append(filteredMessages, message)
-			continue
-		}
-
-		if systemMessage == "" {
-			systemMessage = message.Content
-		}
-	}
-
-	if systemMessage != "" || len(tools) > 0 {
-		sb.WriteString(imStartTag + "system\n")
-
-		sb.WriteString(systemMessage)
-
-		if len(tools) > 0 {
-			sb.WriteString("\n\n# Tools\n\nYou have access to the following functions:\n\n")
-			sb.WriteString("<tools>")
-			for _, tool := range tools {
-				sb.WriteString("\n")
-				sb.WriteString("<function>\n")
-				sb.WriteString("<name>" + tool.Function.Name + "</name>")
-				if tool.Function.Description != "" {
-					sb.WriteString("\n<description>" + tool.Function.Description + "</description>")
-				}
-				sb.WriteString("\n<parameters>")
-
-				for name, prop := range tool.Function.Parameters.Properties {
-					sb.WriteString("\n<parameter>")
-					sb.WriteString("\n<name>" + name + "</name>")
-
-					if len(prop.Type) > 0 {
-						sb.WriteString("\n<type>" + formatToolDefinitionType(prop.Type) + "</type>")
-					}
-
-					if prop.Description != "" {
-						sb.WriteString("\n<description>" + prop.Description + "</description>")
-					}
-
-					// Render any additional keys not already handled
-					handledKeys := map[string]bool{
-						"type":        true,
-						"description": true,
-					}
-					sb.WriteString(renderAdditionalKeys(prop, handledKeys))
-
-					sb.WriteString("\n</parameter>")
-				}
-
-				// Render extra keys for parameters (everything except 'type' and 'properties')
-				paramHandledKeys := map[string]bool{
-					"type":       true,
-					"properties": true,
-				}
-				sb.WriteString(renderAdditionalKeys(tool.Function.Parameters, paramHandledKeys))
-
-				sb.WriteString("\n</parameters>")
-				sb.WriteString("\n</function>")
-			}
-			sb.WriteString("\n</tools>")
-			sb.WriteString("\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>")
-		}
-
-		sb.WriteString(imEndTag + "\n")
-	}
-
-	for i, message := range filteredMessages {
-		lastMessage := i == len(filteredMessages)-1
-		prefill := lastMessage && message.Role == "assistant"
-		switch message.Role {
-		case "assistant":
-			if len(message.ToolCalls) > 0 {
-				sb.WriteString(imStartTag + "assistant")
-
-				// Add thinking tags if present
-				if message.Thinking != "" {
-					sb.WriteString("\n<think>" + strings.TrimSpace(message.Thinking) + "</think>")
-				}
-
-				if message.Content != "" {
-					sb.WriteString("\n" + strings.TrimSpace(message.Content) + "\n")
-				}
-
-				for _, toolCall := range message.ToolCalls {
-					sb.WriteString("\n<tool_call>\n<function=" + toolCall.Function.Name + ">")
-					for name, value := range toolCall.Function.Arguments {
-						valueStr := formatToolCallArgument(value)
-						sb.WriteString("\n<parameter=" + name + ">\n" + valueStr + "\n</parameter>")
-					}
-					sb.WriteString("\n</function>\n</tool_call>")
-				}
-				sb.WriteString("<|im_end|>\n")
-			} else {
-				sb.WriteString(imStartTag + "assistant")
-
-				// Add thinking tags if present
-				if message.Thinking != "" {
-					sb.WriteString("\n<think>" + strings.TrimSpace(message.Thinking) + "</think>")
-				}
-
-				// Add content if present
-				if message.Content != "" {
-					if message.Thinking != "" {
-						sb.WriteString("\n" + strings.TrimSpace(message.Content))
-					} else {
-						sb.WriteString("\n" + message.Content)
-					}
-				}
-
-				if !prefill {
-					sb.WriteString(imEndTag + "\n")
-				}
-			}
-		case "tool":
-			// consecutive tool responses should share a single `<im_start>user`, but
-			// have their own <tool_response> tags
-
-			// only start a new user block if this is the first tool response
-			if i == 0 || filteredMessages[i-1].Role != "tool" {
-				sb.WriteString(imStartTag + "user\n")
-			}
-
-			sb.WriteString("<tool_response>\n")
-			sb.WriteString(message.Content)
-			sb.WriteString("\n</tool_response>\n")
-
-			// close the user block only if this is the last tool response
-			if i == len(filteredMessages)-1 || filteredMessages[i+1].Role != "tool" {
-				sb.WriteString(imEndTag + "\n")
-			}
-		default:
-			sb.WriteString(imStartTag + message.Role + "\n")
-			sb.WriteString(message.Content)
-			sb.WriteString(imEndTag + "\n")
-		}
-
-		if lastMessage && !prefill {
-			sb.WriteString(imStartTag + "assistant\n<think>")
-		}
-	}
-
-	return sb.String(), nil
-}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -59,9 +59,6 @@ func rendererForName(name string) Renderer {
 	case "cogito":
 		renderer := &CogitoRenderer{isThinking: true}
 		return renderer
-	case "intellect-3":
-		renderer := &Intellect3Renderer{}
-		return renderer
 	default:
 		return nil
 	}
Author	SHA1	Message	Date
ParthSareen	92af238208	wip	2025-12-02 12:17:36 -08:00
ParthSareen	7461faf651	script to render templates	2025-12-01 18:03:04 -08:00
Daniel Hiltgen	554172759c	win: warn if ggml-base detected in PATH (#13289 ) If the user has somehow installed another GGML based app which places a ggml-base lib somewhere in their PATH, we can experience runtime problems due to incompatibilities. This change adds a warning message if we detect a ggml-base outside of our install location to aid in troubleshooting.	2025-12-01 15:36:47 -08:00
Bruce MacDonald	5b6a8e6001	api/client: handle non-json streaming errors (#13007 ) While processing the response stream during a chat or generation if an error is occurred it is parsed and returned to the user. The issue with the existing code is that this assumed the response would be valid JSON, which is not a safe assumption and caused cryptic error messages to be displayed due to parsing failures: `invalid character 'i' looking for beginning of value` This change updates the stream function to return the raw error string if it cant be parsed as JSON. This should help with debugging issues by making sure the actual error reaches the user.	2025-12-01 15:10:16 -08:00
Daniel Hiltgen	467bbc0dd5	jetpack: require exact match or skip cuda_jetpack* (#13288 ) The cuda_jetpack libs will enumerate discrete GPUs on SBSA systems which leads to runtime failures of missing kernels. This fix requires an exact match to enable jetpacks instead of relying on enumeration to filter out supported libraries.	2025-12-01 12:48:16 -08:00
Jeffrey Morgan	6d9f9323c5	.gitattributes: add app/webview to linguist-vendored (#13274 )	2025-11-29 23:46:10 -05:00
Ondrej Kokes	0c2489605d	docs: fix output formatting in faq.mdx (#13231 ) There were a few Markdown typos in one FAQ answer. It now renders as a proper ascii table.	2025-11-28 19:19:21 -05:00