anthropic: add unit and integration tests

- Unit tests for transformation functions (FromMessagesRequest, ToMessagesResponse) - Unit tests for error handling and edge cases - Middleware integration tests with httptest - Fix lint issues (gofmt) - Fix unused struct fields in StreamConverter - Add fallback for crypto/rand errors
api: add Anthropic Messages API compatibility layer
2026-01-02 04:29:51 -05:00 · 2026-01-02 01:55:04 -05:00 · 2026-01-02 01:09:46 -05:00 · 2025-12-23 12:44:55 -05:00 · 2025-12-23 09:30:05 -08:00 · 2025-12-19 15:48:15 -08:00
41 changed files with 4182 additions and 415 deletions
--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -0,0 +1,779 @@
+// Package anthropic provides core transformation logic for compatibility with the Anthropic Messages API
+package anthropic
+
+import (
+	"crypto/rand"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+// Error types matching Anthropic API
+type Error struct {
+	Type    string `json:"type"`
+	Message string `json:"message"`
+}
+
+type ErrorResponse struct {
+	Type      string `json:"type"` // always "error"
+	Error     Error  `json:"error"`
+	RequestID string `json:"request_id,omitempty"`
+}
+
+// NewError creates a new ErrorResponse with the appropriate error type based on HTTP status code
+func NewError(code int, message string) ErrorResponse {
+	var etype string
+	switch code {
+	case http.StatusBadRequest:
+		etype = "invalid_request_error"
+	case http.StatusUnauthorized:
+		etype = "authentication_error"
+	case http.StatusForbidden:
+		etype = "permission_error"
+	case http.StatusNotFound:
+		etype = "not_found_error"
+	case http.StatusTooManyRequests:
+		etype = "rate_limit_error"
+	case http.StatusServiceUnavailable, 529:
+		etype = "overloaded_error"
+	default:
+		etype = "api_error"
+	}
+
+	return ErrorResponse{
+		Type:      "error",
+		Error:     Error{Type: etype, Message: message},
+		RequestID: generateID("req"),
+	}
+}
+
+// Request types
+
+// MessagesRequest represents an Anthropic Messages API request
+type MessagesRequest struct {
+	Model         string          `json:"model"`
+	MaxTokens     int             `json:"max_tokens"`
+	Messages      []MessageParam  `json:"messages"`
+	System        any             `json:"system,omitempty"` // string or []ContentBlock
+	Stream        bool            `json:"stream,omitempty"`
+	Temperature   *float64        `json:"temperature,omitempty"`
+	TopP          *float64        `json:"top_p,omitempty"`
+	TopK          *int            `json:"top_k,omitempty"`
+	StopSequences []string        `json:"stop_sequences,omitempty"`
+	Tools         []Tool          `json:"tools,omitempty"`
+	ToolChoice    *ToolChoice     `json:"tool_choice,omitempty"`
+	Thinking      *ThinkingConfig `json:"thinking,omitempty"`
+	Metadata      *Metadata       `json:"metadata,omitempty"`
+}
+
+// MessageParam represents a message in the request
+type MessageParam struct {
+	Role    string `json:"role"`    // "user" or "assistant"
+	Content any    `json:"content"` // string or []ContentBlock
+}
+
+// ContentBlock represents a content block in a message
+type ContentBlock struct {
+	Type string `json:"type"` // text, image, tool_use, tool_result, thinking
+
+	// For text blocks
+	Text string `json:"text,omitempty"`
+
+	// For image blocks
+	Source *ImageSource `json:"source,omitempty"`
+
+	// For tool_use blocks
+	ID    string `json:"id,omitempty"`
+	Name  string `json:"name,omitempty"`
+	Input any    `json:"input,omitempty"`
+
+	// For tool_result blocks
+	ToolUseID string `json:"tool_use_id,omitempty"`
+	Content   any    `json:"content,omitempty"` // string or []ContentBlock
+	IsError   bool   `json:"is_error,omitempty"`
+
+	// For thinking blocks
+	Thinking  string `json:"thinking,omitempty"`
+	Signature string `json:"signature,omitempty"`
+}
+
+// ImageSource represents the source of an image
+type ImageSource struct {
+	Type      string `json:"type"` // "base64" or "url"
+	MediaType string `json:"media_type,omitempty"`
+	Data      string `json:"data,omitempty"`
+	URL       string `json:"url,omitempty"`
+}
+
+// Tool represents a tool definition
+type Tool struct {
+	Type        string          `json:"type,omitempty"` // "custom" for user-defined tools
+	Name        string          `json:"name"`
+	Description string          `json:"description,omitempty"`
+	InputSchema json.RawMessage `json:"input_schema,omitempty"`
+}
+
+// ToolChoice controls how the model uses tools
+type ToolChoice struct {
+	Type                   string `json:"type"` // "auto", "any", "tool", "none"
+	Name                   string `json:"name,omitempty"`
+	DisableParallelToolUse bool   `json:"disable_parallel_tool_use,omitempty"`
+}
+
+// ThinkingConfig controls extended thinking
+type ThinkingConfig struct {
+	Type         string `json:"type"` // "enabled" or "disabled"
+	BudgetTokens int    `json:"budget_tokens,omitempty"`
+}
+
+// Metadata for the request
+type Metadata struct {
+	UserID string `json:"user_id,omitempty"`
+}
+
+// Response types
+
+// MessagesResponse represents an Anthropic Messages API response
+type MessagesResponse struct {
+	ID           string         `json:"id"`
+	Type         string         `json:"type"` // "message"
+	Role         string         `json:"role"` // "assistant"
+	Model        string         `json:"model"`
+	Content      []ContentBlock `json:"content"`
+	StopReason   string         `json:"stop_reason,omitempty"`
+	StopSequence string         `json:"stop_sequence,omitempty"`
+	Usage        Usage          `json:"usage"`
+}
+
+// Usage contains token usage information
+type Usage struct {
+	InputTokens  int `json:"input_tokens"`
+	OutputTokens int `json:"output_tokens"`
+}
+
+// Streaming event types
+
+// MessageStartEvent is sent at the start of streaming
+type MessageStartEvent struct {
+	Type    string           `json:"type"` // "message_start"
+	Message MessagesResponse `json:"message"`
+}
+
+// ContentBlockStartEvent signals the start of a content block
+type ContentBlockStartEvent struct {
+	Type         string       `json:"type"` // "content_block_start"
+	Index        int          `json:"index"`
+	ContentBlock ContentBlock `json:"content_block"`
+}
+
+// ContentBlockDeltaEvent contains incremental content updates
+type ContentBlockDeltaEvent struct {
+	Type  string `json:"type"` // "content_block_delta"
+	Index int    `json:"index"`
+	Delta Delta  `json:"delta"`
+}
+
+// Delta represents an incremental update
+type Delta struct {
+	Type        string `json:"type"` // "text_delta", "input_json_delta", "thinking_delta", "signature_delta"
+	Text        string `json:"text,omitempty"`
+	PartialJSON string `json:"partial_json,omitempty"`
+	Thinking    string `json:"thinking,omitempty"`
+	Signature   string `json:"signature,omitempty"`
+}
+
+// ContentBlockStopEvent signals the end of a content block
+type ContentBlockStopEvent struct {
+	Type  string `json:"type"` // "content_block_stop"
+	Index int    `json:"index"`
+}
+
+// MessageDeltaEvent contains updates to the message
+type MessageDeltaEvent struct {
+	Type  string       `json:"type"` // "message_delta"
+	Delta MessageDelta `json:"delta"`
+	Usage DeltaUsage   `json:"usage"`
+}
+
+// MessageDelta contains stop information
+type MessageDelta struct {
+	StopReason   string `json:"stop_reason,omitempty"`
+	StopSequence string `json:"stop_sequence,omitempty"`
+}
+
+// DeltaUsage contains cumulative token usage
+type DeltaUsage struct {
+	OutputTokens int `json:"output_tokens"`
+}
+
+// MessageStopEvent signals the end of the message
+type MessageStopEvent struct {
+	Type string `json:"type"` // "message_stop"
+}
+
+// PingEvent is a keepalive event
+type PingEvent struct {
+	Type string `json:"type"` // "ping"
+}
+
+// StreamErrorEvent is an error during streaming
+type StreamErrorEvent struct {
+	Type  string `json:"type"` // "error"
+	Error Error  `json:"error"`
+}
+
+// FromMessagesRequest converts an Anthropic MessagesRequest to an Ollama api.ChatRequest
+func FromMessagesRequest(r MessagesRequest) (*api.ChatRequest, error) {
+	var messages []api.Message
+
+	// Handle system prompt
+	if r.System != nil {
+		switch sys := r.System.(type) {
+		case string:
+			if sys != "" {
+				messages = append(messages, api.Message{Role: "system", Content: sys})
+			}
+		case []any:
+			// System can be an array of content blocks
+			var content strings.Builder
+			for _, block := range sys {
+				if blockMap, ok := block.(map[string]any); ok {
+					if blockMap["type"] == "text" {
+						if text, ok := blockMap["text"].(string); ok {
+							content.WriteString(text)
+						}
+					}
+				}
+			}
+			if content.Len() > 0 {
+				messages = append(messages, api.Message{Role: "system", Content: content.String()})
+			}
+		}
+	}
+
+	// Convert messages
+	for _, msg := range r.Messages {
+		converted, err := convertMessage(msg)
+		if err != nil {
+			return nil, err
+		}
+		messages = append(messages, converted...)
+	}
+
+	// Build options
+	options := make(map[string]any)
+
+	options["num_predict"] = r.MaxTokens
+
+	if r.Temperature != nil {
+		options["temperature"] = *r.Temperature
+	}
+
+	if r.TopP != nil {
+		options["top_p"] = *r.TopP
+	}
+
+	if r.TopK != nil {
+		options["top_k"] = *r.TopK
+	}
+
+	if len(r.StopSequences) > 0 {
+		options["stop"] = r.StopSequences
+	}
+
+	// Convert tools
+	var tools api.Tools
+	for _, t := range r.Tools {
+		tool, err := convertTool(t)
+		if err != nil {
+			return nil, err
+		}
+		tools = append(tools, tool)
+	}
+
+	// Handle thinking
+	var think *api.ThinkValue
+	if r.Thinking != nil && r.Thinking.Type == "enabled" {
+		think = &api.ThinkValue{Value: true}
+	}
+
+	stream := r.Stream
+
+	return &api.ChatRequest{
+		Model:    r.Model,
+		Messages: messages,
+		Options:  options,
+		Stream:   &stream,
+		Tools:    tools,
+		Think:    think,
+	}, nil
+}
+
+// convertMessage converts an Anthropic MessageParam to Ollama api.Message(s)
+func convertMessage(msg MessageParam) ([]api.Message, error) {
+	var messages []api.Message
+	role := strings.ToLower(msg.Role)
+
+	switch content := msg.Content.(type) {
+	case string:
+		messages = append(messages, api.Message{Role: role, Content: content})
+
+	case []any:
+		// Handle array of content blocks
+		var textContent strings.Builder
+		var images []api.ImageData
+		var toolCalls []api.ToolCall
+		var thinking string
+		var toolResults []api.Message
+
+		for _, block := range content {
+			blockMap, ok := block.(map[string]any)
+			if !ok {
+				return nil, errors.New("invalid content block format")
+			}
+
+			blockType, _ := blockMap["type"].(string)
+
+			switch blockType {
+			case "text":
+				if text, ok := blockMap["text"].(string); ok {
+					textContent.WriteString(text)
+				}
+
+			case "image":
+				source, ok := blockMap["source"].(map[string]any)
+				if !ok {
+					return nil, errors.New("invalid image source")
+				}
+
+				sourceType, _ := source["type"].(string)
+				if sourceType == "base64" {
+					data, _ := source["data"].(string)
+					decoded, err := base64.StdEncoding.DecodeString(data)
+					if err != nil {
+						return nil, fmt.Errorf("invalid base64 image data: %w", err)
+					}
+					images = append(images, decoded)
+				}
+				// URL images would need to be fetched - skip for now
+
+			case "tool_use":
+				id, ok := blockMap["id"].(string)
+				if !ok {
+					return nil, errors.New("tool_use block missing required 'id' field")
+				}
+				name, ok := blockMap["name"].(string)
+				if !ok {
+					return nil, errors.New("tool_use block missing required 'name' field")
+				}
+				tc := api.ToolCall{
+					ID: id,
+					Function: api.ToolCallFunction{
+						Name: name,
+					},
+				}
+				if input, ok := blockMap["input"].(map[string]any); ok {
+					tc.Function.Arguments = api.ToolCallFunctionArguments(input)
+				}
+				toolCalls = append(toolCalls, tc)
+
+			case "tool_result":
+				toolUseID, _ := blockMap["tool_use_id"].(string)
+				var resultContent string
+
+				switch c := blockMap["content"].(type) {
+				case string:
+					resultContent = c
+				case []any:
+					// Extract text from content blocks
+					for _, cb := range c {
+						if cbMap, ok := cb.(map[string]any); ok {
+							if cbMap["type"] == "text" {
+								if text, ok := cbMap["text"].(string); ok {
+									resultContent += text
+								}
+							}
+						}
+					}
+				}
+
+				toolResults = append(toolResults, api.Message{
+					Role:       "tool",
+					Content:    resultContent,
+					ToolCallID: toolUseID,
+				})
+
+			case "thinking":
+				if t, ok := blockMap["thinking"].(string); ok {
+					thinking = t
+				}
+			}
+		}
+
+		// Build the main message
+		if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 {
+			m := api.Message{
+				Role:      role,
+				Content:   textContent.String(),
+				Images:    images,
+				ToolCalls: toolCalls,
+				Thinking:  thinking,
+			}
+			messages = append(messages, m)
+		}
+
+		// Add tool results as separate messages
+		messages = append(messages, toolResults...)
+
+	default:
+		return nil, fmt.Errorf("invalid message content type: %T", content)
+	}
+
+	return messages, nil
+}
+
+// convertTool converts an Anthropic Tool to an Ollama api.Tool
+func convertTool(t Tool) (api.Tool, error) {
+	var params api.ToolFunctionParameters
+	if len(t.InputSchema) > 0 {
+		if err := json.Unmarshal(t.InputSchema, &params); err != nil {
+			return api.Tool{}, fmt.Errorf("invalid input_schema for tool %q: %w", t.Name, err)
+		}
+	}
+
+	return api.Tool{
+		Type: "function",
+		Function: api.ToolFunction{
+			Name:        t.Name,
+			Description: t.Description,
+			Parameters:  params,
+		},
+	}, nil
+}
+
+// ToMessagesResponse converts an Ollama api.ChatResponse to an Anthropic MessagesResponse
+func ToMessagesResponse(id string, r api.ChatResponse) MessagesResponse {
+	var content []ContentBlock
+
+	// Add thinking block if present
+	if r.Message.Thinking != "" {
+		content = append(content, ContentBlock{
+			Type:     "thinking",
+			Thinking: r.Message.Thinking,
+		})
+	}
+
+	// Add text content if present
+	if r.Message.Content != "" {
+		content = append(content, ContentBlock{
+			Type: "text",
+			Text: r.Message.Content,
+		})
+	}
+
+	// Add tool use blocks
+	for _, tc := range r.Message.ToolCalls {
+		content = append(content, ContentBlock{
+			Type:  "tool_use",
+			ID:    tc.ID,
+			Name:  tc.Function.Name,
+			Input: tc.Function.Arguments,
+		})
+	}
+
+	// Map stop reason
+	stopReason := mapStopReason(r.DoneReason, len(r.Message.ToolCalls) > 0)
+
+	return MessagesResponse{
+		ID:         id,
+		Type:       "message",
+		Role:       "assistant",
+		Model:      r.Model,
+		Content:    content,
+		StopReason: stopReason,
+		Usage: Usage{
+			InputTokens:  r.Metrics.PromptEvalCount,
+			OutputTokens: r.Metrics.EvalCount,
+		},
+	}
+}
+
+// mapStopReason converts Ollama done_reason to Anthropic stop_reason
+func mapStopReason(reason string, hasToolCalls bool) string {
+	if hasToolCalls {
+		return "tool_use"
+	}
+
+	switch reason {
+	case "stop":
+		return "end_turn"
+	case "length":
+		return "max_tokens"
+	default:
+		if reason != "" {
+			return "stop_sequence"
+		}
+		return ""
+	}
+}
+
+// StreamConverter manages state for converting Ollama streaming responses to Anthropic format
+type StreamConverter struct {
+	ID              string
+	Model           string
+	firstWrite      bool
+	contentIndex    int
+	inputTokens     int
+	outputTokens    int
+	thinkingStarted bool
+	thinkingDone    bool
+	textStarted     bool
+	toolCallsSent   map[string]bool
+}
+
+// NewStreamConverter creates a new StreamConverter
+func NewStreamConverter(id, model string) *StreamConverter {
+	return &StreamConverter{
+		ID:            id,
+		Model:         model,
+		firstWrite:    true,
+		toolCallsSent: make(map[string]bool),
+	}
+}
+
+// StreamEvent represents a streaming event to be sent to the client
+type StreamEvent struct {
+	Event string
+	Data  any
+}
+
+// Process converts an Ollama ChatResponse to Anthropic streaming events
+func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
+	var events []StreamEvent
+
+	// First write: emit message_start
+	if c.firstWrite {
+		c.firstWrite = false
+		c.inputTokens = r.Metrics.PromptEvalCount
+
+		events = append(events, StreamEvent{
+			Event: "message_start",
+			Data: MessageStartEvent{
+				Type: "message_start",
+				Message: MessagesResponse{
+					ID:      c.ID,
+					Type:    "message",
+					Role:    "assistant",
+					Model:   c.Model,
+					Content: []ContentBlock{},
+					Usage: Usage{
+						InputTokens:  c.inputTokens,
+						OutputTokens: 0,
+					},
+				},
+			},
+		})
+	}
+
+	// Handle thinking content
+	if r.Message.Thinking != "" && !c.thinkingDone {
+		if !c.thinkingStarted {
+			c.thinkingStarted = true
+			events = append(events, StreamEvent{
+				Event: "content_block_start",
+				Data: ContentBlockStartEvent{
+					Type:  "content_block_start",
+					Index: c.contentIndex,
+					ContentBlock: ContentBlock{
+						Type:     "thinking",
+						Thinking: "",
+					},
+				},
+			})
+		}
+
+		events = append(events, StreamEvent{
+			Event: "content_block_delta",
+			Data: ContentBlockDeltaEvent{
+				Type:  "content_block_delta",
+				Index: c.contentIndex,
+				Delta: Delta{
+					Type:     "thinking_delta",
+					Thinking: r.Message.Thinking,
+				},
+			},
+		})
+	}
+
+	// Handle text content
+	if r.Message.Content != "" {
+		// Close thinking block if it was open
+		if c.thinkingStarted && !c.thinkingDone {
+			c.thinkingDone = true
+			events = append(events, StreamEvent{
+				Event: "content_block_stop",
+				Data: ContentBlockStopEvent{
+					Type:  "content_block_stop",
+					Index: c.contentIndex,
+				},
+			})
+			c.contentIndex++
+		}
+
+		if !c.textStarted {
+			c.textStarted = true
+			events = append(events, StreamEvent{
+				Event: "content_block_start",
+				Data: ContentBlockStartEvent{
+					Type:  "content_block_start",
+					Index: c.contentIndex,
+					ContentBlock: ContentBlock{
+						Type: "text",
+						Text: "",
+					},
+				},
+			})
+		}
+
+		events = append(events, StreamEvent{
+			Event: "content_block_delta",
+			Data: ContentBlockDeltaEvent{
+				Type:  "content_block_delta",
+				Index: c.contentIndex,
+				Delta: Delta{
+					Type: "text_delta",
+					Text: r.Message.Content,
+				},
+			},
+		})
+	}
+
+	// Handle tool calls
+	for _, tc := range r.Message.ToolCalls {
+		if c.toolCallsSent[tc.ID] {
+			continue
+		}
+
+		// Close any previous block
+		if c.textStarted {
+			events = append(events, StreamEvent{
+				Event: "content_block_stop",
+				Data: ContentBlockStopEvent{
+					Type:  "content_block_stop",
+					Index: c.contentIndex,
+				},
+			})
+			c.contentIndex++
+			c.textStarted = false
+		}
+
+		// Start tool use block
+		events = append(events, StreamEvent{
+			Event: "content_block_start",
+			Data: ContentBlockStartEvent{
+				Type:  "content_block_start",
+				Index: c.contentIndex,
+				ContentBlock: ContentBlock{
+					Type:  "tool_use",
+					ID:    tc.ID,
+					Name:  tc.Function.Name,
+					Input: map[string]any{},
+				},
+			},
+		})
+
+		// Send input as JSON delta
+		argsJSON, _ := json.Marshal(tc.Function.Arguments)
+		events = append(events, StreamEvent{
+			Event: "content_block_delta",
+			Data: ContentBlockDeltaEvent{
+				Type:  "content_block_delta",
+				Index: c.contentIndex,
+				Delta: Delta{
+					Type:        "input_json_delta",
+					PartialJSON: string(argsJSON),
+				},
+			},
+		})
+
+		// Close tool use block
+		events = append(events, StreamEvent{
+			Event: "content_block_stop",
+			Data: ContentBlockStopEvent{
+				Type:  "content_block_stop",
+				Index: c.contentIndex,
+			},
+		})
+
+		c.toolCallsSent[tc.ID] = true
+		c.contentIndex++
+	}
+
+	// Handle done
+	if r.Done {
+		// Close any open block
+		if c.textStarted {
+			events = append(events, StreamEvent{
+				Event: "content_block_stop",
+				Data: ContentBlockStopEvent{
+					Type:  "content_block_stop",
+					Index: c.contentIndex,
+				},
+			})
+		} else if c.thinkingStarted && !c.thinkingDone {
+			events = append(events, StreamEvent{
+				Event: "content_block_stop",
+				Data: ContentBlockStopEvent{
+					Type:  "content_block_stop",
+					Index: c.contentIndex,
+				},
+			})
+		}
+
+		c.outputTokens = r.Metrics.EvalCount
+		stopReason := mapStopReason(r.DoneReason, len(c.toolCallsSent) > 0)
+
+		events = append(events, StreamEvent{
+			Event: "message_delta",
+			Data: MessageDeltaEvent{
+				Type: "message_delta",
+				Delta: MessageDelta{
+					StopReason: stopReason,
+				},
+				Usage: DeltaUsage{
+					OutputTokens: c.outputTokens,
+				},
+			},
+		})
+
+		events = append(events, StreamEvent{
+			Event: "message_stop",
+			Data: MessageStopEvent{
+				Type: "message_stop",
+			},
+		})
+	}
+
+	return events
+}
+
+// generateID generates a unique ID with the given prefix using crypto/rand
+func generateID(prefix string) string {
+	b := make([]byte, 12)
+	if _, err := rand.Read(b); err != nil {
+		// Fallback to time-based ID if crypto/rand fails
+		return fmt.Sprintf("%s_%d", prefix, time.Now().UnixNano())
+	}
+	return fmt.Sprintf("%s_%x", prefix, b)
+}
+
+// GenerateMessageID generates a unique message ID
+func GenerateMessageID() string {
+	return generateID("msg")
+}
--- a/anthropic/anthropic_test.go
+++ b/anthropic/anthropic_test.go
@@ -0,0 +1,667 @@
+package anthropic
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/api"
+)
+
+const (
+	testImage = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
+)
+
+func TestFromMessagesRequest_Basic(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{Role: "user", Content: "Hello"},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "test-model" {
+		t.Errorf("expected model 'test-model', got %q", result.Model)
+	}
+
+	if len(result.Messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(result.Messages))
+	}
+
+	if result.Messages[0].Role != "user" || result.Messages[0].Content != "Hello" {
+		t.Errorf("unexpected message: %+v", result.Messages[0])
+	}
+
+	if numPredict, ok := result.Options["num_predict"].(int); !ok || numPredict != 1024 {
+		t.Errorf("expected num_predict 1024, got %v", result.Options["num_predict"])
+	}
+}
+
+func TestFromMessagesRequest_WithSystemPrompt(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		System:    "You are a helpful assistant.",
+		Messages: []MessageParam{
+			{Role: "user", Content: "Hello"},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Messages) != 2 {
+		t.Fatalf("expected 2 messages, got %d", len(result.Messages))
+	}
+
+	if result.Messages[0].Role != "system" || result.Messages[0].Content != "You are a helpful assistant." {
+		t.Errorf("unexpected system message: %+v", result.Messages[0])
+	}
+}
+
+func TestFromMessagesRequest_WithSystemPromptArray(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		System: []any{
+			map[string]any{"type": "text", "text": "You are helpful."},
+			map[string]any{"type": "text", "text": " Be concise."},
+		},
+		Messages: []MessageParam{
+			{Role: "user", Content: "Hello"},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Messages) != 2 {
+		t.Fatalf("expected 2 messages, got %d", len(result.Messages))
+	}
+
+	if result.Messages[0].Content != "You are helpful. Be concise." {
+		t.Errorf("unexpected system message content: %q", result.Messages[0].Content)
+	}
+}
+
+func TestFromMessagesRequest_WithOptions(t *testing.T) {
+	temp := 0.7
+	topP := 0.9
+	topK := 40
+	req := MessagesRequest{
+		Model:         "test-model",
+		MaxTokens:     2048,
+		Messages:      []MessageParam{{Role: "user", Content: "Hello"}},
+		Temperature:   &temp,
+		TopP:          &topP,
+		TopK:          &topK,
+		StopSequences: []string{"\n", "END"},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Options["temperature"] != 0.7 {
+		t.Errorf("expected temperature 0.7, got %v", result.Options["temperature"])
+	}
+	if result.Options["top_p"] != 0.9 {
+		t.Errorf("expected top_p 0.9, got %v", result.Options["top_p"])
+	}
+	if result.Options["top_k"] != 40 {
+		t.Errorf("expected top_k 40, got %v", result.Options["top_k"])
+	}
+	if diff := cmp.Diff([]string{"\n", "END"}, result.Options["stop"]); diff != "" {
+		t.Errorf("stop sequences mismatch: %s", diff)
+	}
+}
+
+func TestFromMessagesRequest_WithImage(t *testing.T) {
+	imgData, _ := base64.StdEncoding.DecodeString(testImage)
+
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{
+				Role: "user",
+				Content: []any{
+					map[string]any{"type": "text", "text": "What's in this image?"},
+					map[string]any{
+						"type": "image",
+						"source": map[string]any{
+							"type":       "base64",
+							"media_type": "image/png",
+							"data":       testImage,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(result.Messages))
+	}
+
+	if result.Messages[0].Content != "What's in this image?" {
+		t.Errorf("expected content 'What's in this image?', got %q", result.Messages[0].Content)
+	}
+
+	if len(result.Messages[0].Images) != 1 {
+		t.Fatalf("expected 1 image, got %d", len(result.Messages[0].Images))
+	}
+
+	if string(result.Messages[0].Images[0]) != string(imgData) {
+		t.Error("image data mismatch")
+	}
+}
+
+func TestFromMessagesRequest_WithToolUse(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{Role: "user", Content: "What's the weather in Paris?"},
+			{
+				Role: "assistant",
+				Content: []any{
+					map[string]any{
+						"type":  "tool_use",
+						"id":    "call_123",
+						"name":  "get_weather",
+						"input": map[string]any{"location": "Paris"},
+					},
+				},
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Messages) != 2 {
+		t.Fatalf("expected 2 messages, got %d", len(result.Messages))
+	}
+
+	if len(result.Messages[1].ToolCalls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(result.Messages[1].ToolCalls))
+	}
+
+	tc := result.Messages[1].ToolCalls[0]
+	if tc.ID != "call_123" {
+		t.Errorf("expected tool call ID 'call_123', got %q", tc.ID)
+	}
+	if tc.Function.Name != "get_weather" {
+		t.Errorf("expected tool name 'get_weather', got %q", tc.Function.Name)
+	}
+}
+
+func TestFromMessagesRequest_WithToolResult(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{
+				Role: "user",
+				Content: []any{
+					map[string]any{
+						"type":        "tool_result",
+						"tool_use_id": "call_123",
+						"content":     "The weather in Paris is sunny, 22°C",
+					},
+				},
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(result.Messages))
+	}
+
+	msg := result.Messages[0]
+	if msg.Role != "tool" {
+		t.Errorf("expected role 'tool', got %q", msg.Role)
+	}
+	if msg.ToolCallID != "call_123" {
+		t.Errorf("expected tool_call_id 'call_123', got %q", msg.ToolCallID)
+	}
+	if msg.Content != "The weather in Paris is sunny, 22°C" {
+		t.Errorf("unexpected content: %q", msg.Content)
+	}
+}
+
+func TestFromMessagesRequest_WithTools(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Tools: []Tool{
+			{
+				Name:        "get_weather",
+				Description: "Get current weather",
+				InputSchema: json.RawMessage(`{"type":"object","properties":{"location":{"type":"string"}},"required":["location"]}`),
+			},
+		},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Tools) != 1 {
+		t.Fatalf("expected 1 tool, got %d", len(result.Tools))
+	}
+
+	tool := result.Tools[0]
+	if tool.Type != "function" {
+		t.Errorf("expected type 'function', got %q", tool.Type)
+	}
+	if tool.Function.Name != "get_weather" {
+		t.Errorf("expected name 'get_weather', got %q", tool.Function.Name)
+	}
+	if tool.Function.Description != "Get current weather" {
+		t.Errorf("expected description 'Get current weather', got %q", tool.Function.Description)
+	}
+}
+
+func TestFromMessagesRequest_WithThinking(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Thinking:  &ThinkingConfig{Type: "enabled", BudgetTokens: 1000},
+	}
+
+	result, err := FromMessagesRequest(req)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Think == nil {
+		t.Fatal("expected Think to be set")
+	}
+	if v, ok := result.Think.Value.(bool); !ok || !v {
+		t.Errorf("expected Think.Value to be true, got %v", result.Think.Value)
+	}
+}
+
+func TestFromMessagesRequest_ToolUseMissingID(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{
+				Role: "assistant",
+				Content: []any{
+					map[string]any{
+						"type": "tool_use",
+						"name": "get_weather",
+					},
+				},
+			},
+		},
+	}
+
+	_, err := FromMessagesRequest(req)
+	if err == nil {
+		t.Fatal("expected error for missing tool_use id")
+	}
+	if err.Error() != "tool_use block missing required 'id' field" {
+		t.Errorf("unexpected error message: %v", err)
+	}
+}
+
+func TestFromMessagesRequest_ToolUseMissingName(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages: []MessageParam{
+			{
+				Role: "assistant",
+				Content: []any{
+					map[string]any{
+						"type": "tool_use",
+						"id":   "call_123",
+					},
+				},
+			},
+		},
+	}
+
+	_, err := FromMessagesRequest(req)
+	if err == nil {
+		t.Fatal("expected error for missing tool_use name")
+	}
+	if err.Error() != "tool_use block missing required 'name' field" {
+		t.Errorf("unexpected error message: %v", err)
+	}
+}
+
+func TestFromMessagesRequest_InvalidToolSchema(t *testing.T) {
+	req := MessagesRequest{
+		Model:     "test-model",
+		MaxTokens: 1024,
+		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Tools: []Tool{
+			{
+				Name:        "bad_tool",
+				InputSchema: json.RawMessage(`{invalid json`),
+			},
+		},
+	}
+
+	_, err := FromMessagesRequest(req)
+	if err == nil {
+		t.Fatal("expected error for invalid tool schema")
+	}
+}
+
+func TestToMessagesResponse_Basic(t *testing.T) {
+	resp := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role:    "assistant",
+			Content: "Hello there!",
+		},
+		Done:       true,
+		DoneReason: "stop",
+		Metrics: api.Metrics{
+			PromptEvalCount: 10,
+			EvalCount:       5,
+		},
+	}
+
+	result := ToMessagesResponse("msg_123", resp)
+
+	if result.ID != "msg_123" {
+		t.Errorf("expected ID 'msg_123', got %q", result.ID)
+	}
+	if result.Type != "message" {
+		t.Errorf("expected type 'message', got %q", result.Type)
+	}
+	if result.Role != "assistant" {
+		t.Errorf("expected role 'assistant', got %q", result.Role)
+	}
+	if len(result.Content) != 1 {
+		t.Fatalf("expected 1 content block, got %d", len(result.Content))
+	}
+	if result.Content[0].Type != "text" || result.Content[0].Text != "Hello there!" {
+		t.Errorf("unexpected content: %+v", result.Content[0])
+	}
+	if result.StopReason != "end_turn" {
+		t.Errorf("expected stop_reason 'end_turn', got %q", result.StopReason)
+	}
+	if result.Usage.InputTokens != 10 || result.Usage.OutputTokens != 5 {
+		t.Errorf("unexpected usage: %+v", result.Usage)
+	}
+}
+
+func TestToMessagesResponse_WithToolCalls(t *testing.T) {
+	resp := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role: "assistant",
+			ToolCalls: []api.ToolCall{
+				{
+					ID: "call_123",
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: map[string]any{"location": "Paris"},
+					},
+				},
+			},
+		},
+		Done:       true,
+		DoneReason: "stop",
+	}
+
+	result := ToMessagesResponse("msg_123", resp)
+
+	if len(result.Content) != 1 {
+		t.Fatalf("expected 1 content block, got %d", len(result.Content))
+	}
+	if result.Content[0].Type != "tool_use" {
+		t.Errorf("expected type 'tool_use', got %q", result.Content[0].Type)
+	}
+	if result.Content[0].ID != "call_123" {
+		t.Errorf("expected ID 'call_123', got %q", result.Content[0].ID)
+	}
+	if result.Content[0].Name != "get_weather" {
+		t.Errorf("expected name 'get_weather', got %q", result.Content[0].Name)
+	}
+	if result.StopReason != "tool_use" {
+		t.Errorf("expected stop_reason 'tool_use', got %q", result.StopReason)
+	}
+}
+
+func TestToMessagesResponse_WithThinking(t *testing.T) {
+	resp := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role:     "assistant",
+			Content:  "The answer is 42.",
+			Thinking: "Let me think about this...",
+		},
+		Done:       true,
+		DoneReason: "stop",
+	}
+
+	result := ToMessagesResponse("msg_123", resp)
+
+	if len(result.Content) != 2 {
+		t.Fatalf("expected 2 content blocks, got %d", len(result.Content))
+	}
+	if result.Content[0].Type != "thinking" {
+		t.Errorf("expected first block type 'thinking', got %q", result.Content[0].Type)
+	}
+	if result.Content[0].Thinking != "Let me think about this..." {
+		t.Errorf("unexpected thinking content: %q", result.Content[0].Thinking)
+	}
+	if result.Content[1].Type != "text" {
+		t.Errorf("expected second block type 'text', got %q", result.Content[1].Type)
+	}
+}
+
+func TestMapStopReason(t *testing.T) {
+	tests := []struct {
+		reason       string
+		hasToolCalls bool
+		want         string
+	}{
+		{"stop", false, "end_turn"},
+		{"length", false, "max_tokens"},
+		{"stop", true, "tool_use"},
+		{"other", false, "stop_sequence"},
+		{"", false, ""},
+	}
+
+	for _, tt := range tests {
+		got := mapStopReason(tt.reason, tt.hasToolCalls)
+		if got != tt.want {
+			t.Errorf("mapStopReason(%q, %v) = %q, want %q", tt.reason, tt.hasToolCalls, got, tt.want)
+		}
+	}
+}
+
+func TestNewError(t *testing.T) {
+	tests := []struct {
+		code int
+		want string
+	}{
+		{400, "invalid_request_error"},
+		{401, "authentication_error"},
+		{403, "permission_error"},
+		{404, "not_found_error"},
+		{429, "rate_limit_error"},
+		{500, "api_error"},
+		{503, "overloaded_error"},
+		{529, "overloaded_error"},
+	}
+
+	for _, tt := range tests {
+		result := NewError(tt.code, "test message")
+		if result.Type != "error" {
+			t.Errorf("NewError(%d) type = %q, want 'error'", tt.code, result.Type)
+		}
+		if result.Error.Type != tt.want {
+			t.Errorf("NewError(%d) error.type = %q, want %q", tt.code, result.Error.Type, tt.want)
+		}
+		if result.Error.Message != "test message" {
+			t.Errorf("NewError(%d) message = %q, want 'test message'", tt.code, result.Error.Message)
+		}
+		if result.RequestID == "" {
+			t.Errorf("NewError(%d) request_id should not be empty", tt.code)
+		}
+	}
+}
+
+func TestGenerateMessageID(t *testing.T) {
+	id1 := GenerateMessageID()
+	id2 := GenerateMessageID()
+
+	if id1 == "" {
+		t.Error("GenerateMessageID returned empty string")
+	}
+	if id1 == id2 {
+		t.Error("GenerateMessageID returned duplicate IDs")
+	}
+	if len(id1) < 10 {
+		t.Errorf("GenerateMessageID returned short ID: %q", id1)
+	}
+	if id1[:4] != "msg_" {
+		t.Errorf("GenerateMessageID should start with 'msg_', got %q", id1[:4])
+	}
+}
+
+func TestStreamConverter_Basic(t *testing.T) {
+	conv := NewStreamConverter("msg_123", "test-model")
+
+	// First chunk
+	resp1 := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role:    "assistant",
+			Content: "Hello",
+		},
+		Metrics: api.Metrics{PromptEvalCount: 10},
+	}
+
+	events1 := conv.Process(resp1)
+	if len(events1) < 3 {
+		t.Fatalf("expected at least 3 events for first chunk, got %d", len(events1))
+	}
+
+	// Should have message_start, content_block_start, content_block_delta
+	if events1[0].Event != "message_start" {
+		t.Errorf("expected first event 'message_start', got %q", events1[0].Event)
+	}
+	if events1[1].Event != "content_block_start" {
+		t.Errorf("expected second event 'content_block_start', got %q", events1[1].Event)
+	}
+	if events1[2].Event != "content_block_delta" {
+		t.Errorf("expected third event 'content_block_delta', got %q", events1[2].Event)
+	}
+
+	// Final chunk
+	resp2 := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role:    "assistant",
+			Content: " world!",
+		},
+		Done:       true,
+		DoneReason: "stop",
+		Metrics:    api.Metrics{EvalCount: 5},
+	}
+
+	events2 := conv.Process(resp2)
+
+	// Should have content_block_delta, content_block_stop, message_delta, message_stop
+	hasStop := false
+	for _, e := range events2 {
+		if e.Event == "message_stop" {
+			hasStop = true
+		}
+	}
+	if !hasStop {
+		t.Error("expected message_stop event in final chunk")
+	}
+}
+
+func TestStreamConverter_WithToolCalls(t *testing.T) {
+	conv := NewStreamConverter("msg_123", "test-model")
+
+	resp := api.ChatResponse{
+		Model: "test-model",
+		Message: api.Message{
+			Role: "assistant",
+			ToolCalls: []api.ToolCall{
+				{
+					ID: "call_123",
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: map[string]any{"location": "Paris"},
+					},
+				},
+			},
+		},
+		Done:       true,
+		DoneReason: "stop",
+		Metrics:    api.Metrics{PromptEvalCount: 10, EvalCount: 5},
+	}
+
+	events := conv.Process(resp)
+
+	hasToolStart := false
+	hasToolDelta := false
+	for _, e := range events {
+		if e.Event == "content_block_start" {
+			if start, ok := e.Data.(ContentBlockStartEvent); ok {
+				if start.ContentBlock.Type == "tool_use" {
+					hasToolStart = true
+				}
+			}
+		}
+		if e.Event == "content_block_delta" {
+			if delta, ok := e.Data.(ContentBlockDeltaEvent); ok {
+				if delta.Delta.Type == "input_json_delta" {
+					hasToolDelta = true
+				}
+			}
+		}
+	}
+
+	if !hasToolStart {
+		t.Error("expected tool_use content_block_start event")
+	}
+	if !hasToolDelta {
+		t.Error("expected input_json_delta event")
+	}
+}
--- a/api/types.go
+++ b/api/types.go
@@ -554,6 +554,9 @@ type CreateRequest struct {
 	Renderer string `json:"renderer,omitempty"`
 	Parser   string `json:"parser,omitempty"`

+	// Requires is the minimum version of Ollama required by the model.
+	Requires string `json:"requires,omitempty"`
+
 	// Info is a map of additional information for the model
 	Info map[string]any `json:"info,omitempty"`

@@ -604,6 +607,7 @@ type ShowResponse struct {
 	Tensors       []Tensor           `json:"tensors,omitempty"`
 	Capabilities  []model.Capability `json:"capabilities,omitempty"`
 	ModifiedAt    time.Time          `json:"modified_at,omitempty"`
+	Requires      string             `json:"requires,omitempty"`
 }

 // CopyRequest is the request passed to [Client.Copy].
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -943,6 +943,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
 			rows = append(rows, []string{"", "parameters", resp.Details.ParameterSize})
 		}
 		rows = append(rows, []string{"", "quantization", resp.Details.QuantizationLevel})
+		if resp.Requires != "" {
+			rows = append(rows, []string{"", "requires", resp.Requires})
+		}
 		return
 	})

--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -291,6 +291,31 @@ Weigh anchor!
 			t.Errorf("unexpected output (-want +got):\n%s", diff)
 		}
 	})
+
+	t.Run("min version", func(t *testing.T) {
+		var b bytes.Buffer
+		if err := showInfo(&api.ShowResponse{
+			Details: api.ModelDetails{
+				Family:            "test",
+				ParameterSize:     "7B",
+				QuantizationLevel: "FP16",
+			},
+			Requires: "0.14.0",
+		}, false, &b); err != nil {
+			t.Fatal(err)
+		}
+
+		expect := `  Model
+    architecture    test      
+    parameters      7B        
+    quantization    FP16      
+    requires        0.14.0    
+
+`
+		if diff := cmp.Diff(expect, b.String()); diff != "" {
+			t.Errorf("unexpected output (-want +got):\n%s", diff)
+		}
+	})
 }

 func TestDeleteHandler(t *testing.T) {
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -216,8 +216,6 @@ func ConvertModel(fsys fs.FS, f *os.File) error {
 		conv = &deepseekocr{}
 	case "DeepseekV3ForCausalLM":
 		conv = &deepseek2Model{}
-	case "MistralForCausalLM":
-		conv = &mistralLarge3Model{}
 	default:
 		return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
 	}
--- a/convert/convert_mistrallarge3.go
+++ b/convert/convert_mistrallarge3.go
@@ -1,286 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"fmt"
-	"log/slog"
-	"regexp"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/fs/ggml"
-)
-
-type mistralLarge3Model struct {
-	ModelParameters
-	Dim            uint32  `json:"dim"`
-	NumLayers      uint32  `json:"n_layers"`
-	HeadDim        uint32  `json:"head_dim"`
-	HiddenDim      uint32  `json:"hidden_dim"`
-	NumHeads       uint32  `json:"n_heads"`
-	NumKVHeads     uint32  `json:"n_kv_heads"`
-	RopeTheta      float32 `json:"rope_theta"`
-	NormEps        float32 `json:"norm_eps"`
-	VocabSize      uint32  `json:"vocab_size"`
-	TiedEmbeddings bool    `json:"tied_embeddings"`
-	MaxPosEmbed    uint32  `json:"max_position_embeddings"`
-	MaxSeqLen      uint32  `json:"max_seq_len"`
-
-	// LoRA attention parameters (DeepSeek-style)
-	QLoraRank     uint32 `json:"q_lora_rank"`
-	QKRopeHeadDim uint32 `json:"qk_rope_head_dim"`
-	QKNopeHeadDim uint32 `json:"qk_nope_head_dim"`
-	KVLoraRank    uint32 `json:"kv_lora_rank"`
-	VHeadDim      uint32 `json:"v_head_dim"`
-
-	// ROPE scaling configurations
-	Llama4Scaling struct {
-		OrigMaxPosEmbed uint32  `json:"original_max_position_embeddings"`
-		Beta            float32 `json:"beta"`
-	} `json:"llama_4_scaling"`
-
-	Yarn struct {
-		OrigMaxPosEmbed uint32  `json:"original_max_position_embeddings"`
-		Factor          float32 `json:"factor"`
-		ApplyScale      bool    `json:"apply_scale"`
-		Beta            float32 `json:"beta"`
-		Alpha           float32 `json:"alpha"`
-	} `json:"yarn"`
-
-	// MOE configuration
-	MOE struct {
-		ExpertParallel        uint32  `json:"expert_parallel"`
-		ExpertModelParallel   uint32  `json:"expert_model_parallel"`
-		RouteEveryN           uint32  `json:"route_every_n"`
-		FirstKDenseReplace    uint32  `json:"first_k_dense_replace"`
-		NumExperts            uint32  `json:"num_experts"`
-		NumExpertsPerTok      uint32  `json:"num_experts_per_tok"`
-		NumExpertGroups       uint32  `json:"num_expert_groups"`
-		NumExpertGroupsPerTok uint32  `json:"num_expert_groups_per_tok"`
-		RoutedScale           float32 `json:"routed_scale"`
-		ExpertHiddenDim       uint32  `json:"expert_hidden_dim"`
-		NumSharedExperts      uint32  `json:"num_shared_experts"`
-	} `json:"moe"`
-
-	// Vision encoder configuration
-	VisionEncoder struct {
-		ImageTokenID               uint32  `json:"image_token_id"`
-		ImageBreakTokenID          uint32  `json:"image_break_token_id"`
-		ImageEndTokenID            uint32  `json:"image_end_token_id"`
-		IntermediateSize           uint32  `json:"intermediate_size"`
-		NumHiddenLayers            uint32  `json:"num_hidden_layers"`
-		NumAttentionHeads          uint32  `json:"num_attention_heads"`
-		MMProjectorID              string  `json:"mm_projector_id"`
-		SpatialMergeSize           uint32  `json:"spatial_merge_size"`
-		HiddenSize                 uint32  `json:"hidden_size"`
-		NumChannels                uint32  `json:"num_channels"`
-		ImageSize                  uint32  `json:"image_size"`
-		MaxImageSize               uint32  `json:"max_image_size"`
-		PatchSize                  uint32  `json:"patch_size"`
-		RopeTheta                  float32 `json:"rope_theta"`
-		AddPreMMProjectorLayerNorm bool    `json:"add_pre_mm_projector_layer_norm"`
-		AdapterBias                bool    `json:"adapter_bias"`
-	} `json:"vision_encoder"`
-}
-
-func (p *mistralLarge3Model) KV(t *Tokenizer) ggml.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "deepseek2" // Use deepseek2 architecture for runtime compatibility
-	kv["general.type"] = "model"
-
-	// Basic model parameters (using deepseek2 keys for compatibility)
-	kv["deepseek2.vocab_size"] = p.VocabSize
-	kv["deepseek2.block_count"] = p.NumLayers
-	kv["deepseek2.context_length"] = cmp.Or(p.MaxPosEmbed, p.MaxSeqLen)
-	kv["deepseek2.embedding_length"] = p.Dim
-	kv["deepseek2.feed_forward_length"] = p.HiddenDim
-
-	// Attention configuration
-	kv["deepseek2.attention.head_count"] = p.NumHeads
-	kv["deepseek2.attention.head_count_kv"] = p.NumKVHeads
-	kv["deepseek2.attention.layer_norm_rms_epsilon"] = p.NormEps
-	kv["deepseek2.attention.key_length"] = p.QKNopeHeadDim + p.QKRopeHeadDim
-	kv["deepseek2.attention.value_length"] = p.VHeadDim
-
-	// LoRA attention parameters
-	kv["deepseek2.attention.q_lora_rank"] = p.QLoraRank
-	kv["deepseek2.attention.kv_lora_rank"] = p.KVLoraRank
-
-	// ROPE configuration
-	kv["deepseek2.rope.dimension_count"] = p.QKRopeHeadDim
-	kv["deepseek2.rope.freq_base"] = cmp.Or(p.RopeTheta, 10000.0)
-
-	// ROPE scaling - map to deepseek2 format
-	if p.Yarn.OrigMaxPosEmbed > 0 {
-		kv["deepseek2.rope.scaling.factor"] = p.Yarn.Factor
-		kv["deepseek2.rope.scaling.original_context_length"] = p.Yarn.OrigMaxPosEmbed
-		kv["deepseek2.rope.scaling.type"] = "yarn"
-		kv["deepseek2.rope.scaling.yarn_log_multiplier"] = float32(0.1) // mscale_all_dim * 0.1 as in llama.cpp
-	}
-
-	// MOE configuration
-	if p.MOE.NumExperts > 0 {
-		kv["deepseek2.expert_count"] = p.MOE.NumExperts
-		kv["deepseek2.expert_used_count"] = p.MOE.NumExpertsPerTok
-		kv["deepseek2.expert_shared_count"] = p.MOE.NumSharedExperts
-		kv["deepseek2.expert_feed_forward_length"] = p.MOE.ExpertHiddenDim
-		kv["deepseek2.expert_weights_scale"] = p.MOE.RoutedScale
-		kv["deepseek2.leading_dense_block_count"] = p.MOE.FirstKDenseReplace
-		kv["deepseek2.expert_weights_norm"] = true
-		kv["deepseek2.expert_gating_func"] = uint32(1) // softmax
-	}
-
-	// Vision encoder configuration (if supported by deepseek2 runtime)
-	if p.VisionEncoder.HiddenSize > 0 {
-		kv["deepseek2.vision.block_count"] = p.VisionEncoder.NumHiddenLayers
-		kv["deepseek2.vision.embedding_length"] = p.VisionEncoder.HiddenSize
-		kv["deepseek2.vision.feed_forward_length"] = p.VisionEncoder.IntermediateSize
-		kv["deepseek2.vision.attention.head_count"] = p.VisionEncoder.NumAttentionHeads
-		kv["deepseek2.vision.image_size"] = p.VisionEncoder.ImageSize
-		kv["deepseek2.vision.patch_size"] = p.VisionEncoder.PatchSize
-		kv["deepseek2.vision.num_channels"] = p.VisionEncoder.NumChannels
-
-		// Multimodal configuration
-		kv["deepseek2.image_token_id"] = p.VisionEncoder.ImageTokenID
-		kv["deepseek2.image_break_token_id"] = p.VisionEncoder.ImageBreakTokenID
-		kv["deepseek2.image_end_token_id"] = p.VisionEncoder.ImageEndTokenID
-		kv["deepseek2.spatial_merge_size"] = p.VisionEncoder.SpatialMergeSize
-	}
-
-	// Set tokenizer type - use tekken preprocessing (now supported!)
-	kv["tokenizer.ggml.pre"] = "tekken"
-
-	return kv
-}
-
-func (p *mistralLarge3Model) specialTokenTypes() []string {
-	return []string{
-		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
-	}
-}
-
-func (p *mistralLarge3Model) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"tok_embeddings", "token_embd", // Mistral Large uses tok_embeddings instead of model.embed_tokens
-		"norm", "output_norm",
-		"language_model.", "",
-		"layers", "blk", // Mistral 3 Large uses "layers" instead of "model.layers"
-		"attention_norm", "attn_norm",
-
-		// LoRA attention mappings (Mistral 3 Large style)
-		"attention.wkv_a_with_mqa", "attn_kv_a_mqa",
-		"attention.kv_a_norm", "attn_kv_a_norm",
-		"attention.wkv_b", "attn_kv_b",
-		"attention.wq_a", "attn_q_a",
-		"attention.q_a_norm", "attn_q_a_norm",
-		"attention.wq_b", "attn_q_b",
-		"attention.wo", "attn_output",
-
-		"ffn_norm", "ffn_norm", // Keep ffn_norm as is
-
-		// MOE mappings for Mistral 3 Large
-		"shared_experts.w2", "ffn_down_shexp",
-		"shared_experts.w1", "ffn_gate_shexp",
-		"shared_experts.w3", "ffn_up_shexp",
-		"experts.*.w1", "ffn_gate_exps", // Will be merged in Tensors()
-		"experts.*.w2", "ffn_down_exps", // Will be merged in Tensors()
-		"experts.*.w3", "ffn_up_exps", // Will be merged in Tensors()
-		"gate", "ffn_gate_inp",
-
-		// Standard feed forward mappings (for non-MOE layers)
-		"feed_forward.w1", "ffn_gate",
-		"feed_forward.w2", "ffn_down",
-		"feed_forward.w3", "ffn_up",
-
-		// Mistral-specific tensor renaming
-		".qscale_act", ".input_scale",
-		".qscale_weight", ".weight_scale",
-
-		// Vision encoder mappings - do we even need this?
-		"vision_tower", "v",
-		"ln_pre", "encoder_norm",
-		"attention.q_proj", "attn_q",
-		"attention.k_proj", "attn_k",
-		"attention.v_proj", "attn_v",
-		"attention.o_proj", "attn_output",
-		"attention_norm", "attn_norm",
-		"feed_forward.gate_proj", "ffn_gate",
-		"feed_forward.down_proj", "ffn_down",
-		"feed_forward.up_proj", "ffn_up",
-
-		"multi_modal_projector", "mm",
-		"patch_merger.merging_layer", "mm.patch_merger",
-		"pre_mm_projector_norm", "mm.pre_norm",
-		"vision_language_adapter.w_in", "mm.w_in",
-		"vision_language_adapter.w_out", "mm.w_out",
-	}
-}
-
-func (p *mistralLarge3Model) Tensors(s []Tensor) (out []*ggml.Tensor) {
-	// Create merges for MOE expert tensors
-	if p.MOE.NumExperts > 0 {
-		merges := make([]merge, p.NumLayers*3)
-		for i := range p.NumLayers {
-			merges[i*3+0] = merge{
-				fmt.Sprintf("blk.%d.experts.*.w1.weight", i),
-				fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
-			}
-			merges[i*3+1] = merge{
-				fmt.Sprintf("blk.%d.experts.*.w3.weight", i),
-				fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
-			}
-			merges[i*3+2] = merge{
-				fmt.Sprintf("blk.%d.experts.*.w2.weight", i),
-				fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
-			}
-		}
-		out, s = mergeTensors(s, merges...)
-	}
-
-	skipLayer := func(n string, minValue uint32) bool {
-		re := regexp.MustCompile(`^blk\.(\d+)`)
-		matches := re.FindStringSubmatch(n)
-		if matches == nil {
-			return false
-		}
-
-		blkNum, err := strconv.Atoi(matches[1])
-		if err != nil {
-			return false
-		}
-
-		return uint32(blkNum) >= minValue
-	}
-
-	// Function to check if tensor should be skipped (vision components)
-	skipVisionTensor := func(name string) bool {
-		return strings.HasPrefix(name, "vision_") ||
-			strings.HasPrefix(name, "patch_merger.") ||
-			strings.Contains(name, "mm_projector")
-	}
-
-	for _, t := range s {
-		name := t.Name()
-
-		// Skip vision tensors (handled separately or not needed)
-		if skipVisionTensor(name) {
-			slog.Debug("skipping vision tensor", "name", name)
-			continue
-		}
-
-		// Skip any additional layers beyond expected count
-		if skipLayer(name, p.NumLayers) {
-			slog.Debug("skipping extra layer", "name", name)
-			continue
-		}
-
-		out = append(out, &ggml.Tensor{
-			Name:     name,
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-	return out
-}
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -101,8 +101,6 @@ func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error)
 			t.Pre = "deepseek-coder"
 		case "1ff7f41064896984db5d1bb6ff64fa4bc29007d08c1b439e505b7392777a319e":
 			t.Pre = "qwen2"
-		case "1d64a9a8eaf9f1bd80331984d81fdd514e7feafe8df83a525dd31472f275699a":
-			t.Pre = "tekken"
 		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
 			// noop, empty pretokenizer
 		default:
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@@ -49,7 +49,8 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)

 			// temporary fix to handle gemma3 broken configs
-			if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>"}, piece.GetPiece()) {
+			// TODO(parthsareen): allow reading of tokenizer.json to allow managing special tokens when using spm
+			if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>", "<start_function_declaration>", "<end_function_declaration>", "<start_function_call>", "<end_function_call>", "<start_function_response>", "<end_function_response>", "<escape>"}, piece.GetPiece()) {
 				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
 			}

--- a/docs/README.md
+++ b/docs/README.md
@@ -14,6 +14,7 @@
 * [API Reference](https://docs.ollama.com/api)
 * [Modelfile Reference](https://docs.ollama.com/modelfile)
 * [OpenAI Compatibility](https://docs.ollama.com/api/openai-compatibility)
+* [Anthropic Compatibility](./api/anthropic-compatibility.mdx)

 ### Resources

--- a/docs/api/anthropic-compatibility.mdx
+++ b/docs/api/anthropic-compatibility.mdx
@@ -0,0 +1,339 @@
+---
+title: Anthropic compatibility
+---
+
+Ollama provides compatibility with the [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) to help connect existing applications to Ollama, including tools like Claude Code.
+
+## Usage
+
+### Environment variables
+
+To use Ollama with tools that expect the Anthropic API (like Claude Code), set these environment variables:
+
+```shell
+export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama  # required but ignored
+```
+
+### Simple `/v1/messages` example
+
+<CodeGroup dropdown>
+
+```python basic.py
+import anthropic
+
+client = anthropic.Anthropic(
+    base_url='http://localhost:11434',
+    api_key='ollama',  # required but ignored
+)
+
+message = client.messages.create(
+    model='llama3.2:3b',
+    max_tokens=1024,
+    messages=[
+        {'role': 'user', 'content': 'Hello, how are you?'}
+    ]
+)
+print(message.content[0].text)
+```
+
+```javascript basic.js
+import Anthropic from "@anthropic-ai/sdk";
+
+const anthropic = new Anthropic({
+  baseURL: "http://localhost:11434",
+  apiKey: "ollama", // required but ignored
+});
+
+const message = await anthropic.messages.create({
+  model: "llama3.2:3b",
+  max_tokens: 1024,
+  messages: [{ role: "user", content: "Hello, how are you?" }],
+});
+
+console.log(message.content[0].text);
+```
+
+```shell basic.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-H "x-api-key: ollama" \
+-H "anthropic-version: 2023-06-01" \
+-d '{
+  "model": "llama3.2:3b",
+  "max_tokens": 1024,
+  "messages": [{ "role": "user", "content": "Hello, how are you?" }]
+}'
+```
+
+</CodeGroup>
+
+### Streaming example
+
+<CodeGroup dropdown>
+
+```python streaming.py
+import anthropic
+
+client = anthropic.Anthropic(
+    base_url='http://localhost:11434',
+    api_key='ollama',
+)
+
+with client.messages.stream(
+    model='llama3.2:3b',
+    max_tokens=1024,
+    messages=[{'role': 'user', 'content': 'Count from 1 to 10'}]
+) as stream:
+    for text in stream.text_stream:
+        print(text, end='', flush=True)
+```
+
+```javascript streaming.js
+import Anthropic from "@anthropic-ai/sdk";
+
+const anthropic = new Anthropic({
+  baseURL: "http://localhost:11434",
+  apiKey: "ollama",
+});
+
+const stream = await anthropic.messages.stream({
+  model: "llama3.2:3b",
+  max_tokens: 1024,
+  messages: [{ role: "user", content: "Count from 1 to 10" }],
+});
+
+for await (const event of stream) {
+  if (
+    event.type === "content_block_delta" &&
+    event.delta.type === "text_delta"
+  ) {
+    process.stdout.write(event.delta.text);
+  }
+}
+```
+
+```shell streaming.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-d '{
+  "model": "llama3.2:3b",
+  "max_tokens": 1024,
+  "stream": true,
+  "messages": [{ "role": "user", "content": "Count from 1 to 10" }]
+}'
+```
+
+</CodeGroup>
+
+### Tool calling example
+
+<CodeGroup dropdown>
+
+```python tools.py
+import anthropic
+
+client = anthropic.Anthropic(
+    base_url='http://localhost:11434',
+    api_key='ollama',
+)
+
+message = client.messages.create(
+    model='llama3.2:3b',
+    max_tokens=1024,
+    tools=[
+        {
+            'name': 'get_weather',
+            'description': 'Get the current weather in a location',
+            'input_schema': {
+                'type': 'object',
+                'properties': {
+                    'location': {
+                        'type': 'string',
+                        'description': 'The city and state, e.g. San Francisco, CA'
+                    }
+                },
+                'required': ['location']
+            }
+        }
+    ],
+    messages=[{'role': 'user', 'content': "What's the weather in San Francisco?"}]
+)
+
+for block in message.content:
+    if block.type == 'tool_use':
+        print(f'Tool: {block.name}')
+        print(f'Input: {block.input}')
+```
+
+```shell tools.sh
+curl -X POST http://localhost:11434/v1/messages \
+-H "Content-Type: application/json" \
+-d '{
+  "model": "llama3.2:3b",
+  "max_tokens": 1024,
+  "tools": [
+    {
+      "name": "get_weather",
+      "description": "Get the current weather in a location",
+      "input_schema": {
+        "type": "object",
+        "properties": {
+          "location": {
+            "type": "string",
+            "description": "The city and state"
+          }
+        },
+        "required": ["location"]
+      }
+    }
+  ],
+  "messages": [{ "role": "user", "content": "What is the weather in San Francisco?" }]
+}'
+```
+
+</CodeGroup>
+
+## Using with Claude Code
+
+[Claude Code](https://docs.anthropic.com/en/docs/claude-code) can be configured to use Ollama as its backend:
+
+```shell
+ANTHROPIC_BASE_URL=http://localhost:11434 ANTHROPIC_API_KEY=ollama claude --model llama3.2:3b
+```
+
+Or set the environment variables in your shell profile:
+
+```shell
+export ANTHROPIC_BASE_URL=http://localhost:11434
+export ANTHROPIC_API_KEY=ollama
+```
+
+Then run Claude Code with any Ollama model:
+
+```shell
+claude --model llama3.2:3b
+claude --model qwen3:8b
+claude --model deepseek-r1:14b
+```
+
+## Endpoints
+
+### `/v1/messages`
+
+#### Supported features
+
+- [x] Messages
+- [x] Streaming
+- [x] System prompts
+- [x] Multi-turn conversations
+- [x] Vision (images)
+- [x] Tools (function calling)
+- [x] Tool results
+- [x] Thinking/extended thinking
+
+#### Supported request fields
+
+- [x] `model`
+- [x] `max_tokens`
+- [x] `messages`
+  - [x] Text `content`
+  - [x] Image `content` (base64)
+  - [x] Array of content blocks
+  - [x] `tool_use` blocks
+  - [x] `tool_result` blocks
+  - [x] `thinking` blocks
+- [x] `system` (string or array)
+- [x] `stream`
+- [x] `temperature`
+- [x] `top_p`
+- [x] `top_k`
+- [x] `stop_sequences`
+- [x] `tools`
+- [x] `thinking`
+- [ ] `tool_choice`
+- [ ] `metadata`
+
+#### Supported response fields
+
+- [x] `id`
+- [x] `type`
+- [x] `role`
+- [x] `model`
+- [x] `content` (text, tool_use, thinking blocks)
+- [x] `stop_reason` (end_turn, max_tokens, tool_use)
+- [x] `usage` (input_tokens, output_tokens)
+
+#### Streaming events
+
+- [x] `message_start`
+- [x] `content_block_start`
+- [x] `content_block_delta` (text_delta, input_json_delta, thinking_delta)
+- [x] `content_block_stop`
+- [x] `message_delta`
+- [x] `message_stop`
+- [x] `ping`
+- [x] `error`
+
+## Models
+
+Before using a model, pull it locally with `ollama pull`:
+
+```shell
+ollama pull llama3.2:3b
+```
+
+### Default model names
+
+For tooling that relies on default Anthropic model names such as `claude-3-5-sonnet`, use `ollama cp` to copy an existing model name:
+
+```shell
+ollama cp llama3.2:3b claude-3-5-sonnet
+```
+
+Afterwards, this new model name can be specified in the `model` field:
+
+```shell
+curl http://localhost:11434/v1/messages \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "claude-3-5-sonnet",
+        "max_tokens": 1024,
+        "messages": [
+            {
+                "role": "user",
+                "content": "Hello!"
+            }
+        ]
+    }'
+```
+
+## Differences from the Anthropic API
+
+### Behavior differences
+
+- API key is accepted but not validated
+- `anthropic-version` header is accepted but not used
+- Token counts are approximations based on the underlying model's tokenizer
+
+### Not supported
+
+The following Anthropic API features are not currently supported:
+
+| Feature | Description |
+|---------|-------------|
+| `/v1/messages/count_tokens` | Token counting endpoint |
+| `tool_choice` | Forcing specific tool use or disabling tools |
+| `metadata` | Request metadata (user_id) |
+| Prompt caching | `cache_control` blocks for caching prefixes |
+| Batches API | `/v1/messages/batches` for async batch processing |
+| Citations | `citations` content blocks |
+| PDF support | `document` content blocks with PDF files |
+| Server-sent errors | `error` events during streaming (errors return HTTP status) |
+
+### Partial support
+
+| Feature | Status |
+|---------|--------|
+| Image content | Base64 images supported; URL images not supported |
+| Extended thinking | Basic support; `budget_tokens` accepted but not enforced |
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -139,7 +139,8 @@
              "/api/streaming",
              "/api/usage",
              "/api/errors",
-              "/api/openai-compatibility"
+              "/api/openai-compatibility",
+              "/api/anthropic-compatibility"
            ]
          },
          {
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -14,11 +14,11 @@ curl -fsSL https://ollama.com/install.sh | sh

 ## How can I view the logs?

-Review the [Troubleshooting](./troubleshooting.md) docs for more about using logs.
+Review the [Troubleshooting](./troubleshooting) docs for more about using logs.

 ## Is my GPU compatible with Ollama?

-Please refer to the [GPU docs](./gpu.md).
+Please refer to the [GPU docs](./gpu).

 ## How can I specify the context window size?

--- a/docs/gpu.mdx
+++ b/docs/gpu.mdx
@@ -33,7 +33,7 @@ Check your compute compatibility to see if your card is supported:
 | 5.0                | GeForce GTX         | `GTX 750 Ti` `GTX 750` `NVS 810`                                                                                               |
 |                    | Quadro              | `K2200` `K1200` `K620` `M1200` `M520` `M5000M` `M4000M` `M3000M` `M2000M` `M1000M` `K620M` `M600M` `M500M`                     |

-For building locally to support older GPUs, see [developer.md](./development.md#linux-cuda-nvidia)
+For building locally to support older GPUs, see [developer](./development#linux-cuda-nvidia)

 ### GPU Selection

@@ -54,7 +54,7 @@ sudo modprobe nvidia_uvm`

 Ollama supports the following AMD GPUs via the ROCm library:

-> [!NOTE]
+> **NOTE:**
 > Additional AMD GPU support is provided by the Vulkan Library - see below.


@@ -132,9 +132,9 @@ Ollama supports GPU acceleration on Apple devices via the Metal API.

 ## Vulkan GPU Support

-> [!NOTE]
+> **NOTE:**
 > Vulkan is currently an Experimental feature.  To enable, you must set OLLAMA_VULKAN=1 for the Ollama server as
-described in the [FAQ](faq.md#how-do-i-configure-ollama-server)
+described in the [FAQ](faq#how-do-i-configure-ollama-server)

 Additional GPU support on Windows and Linux is provided via
 [Vulkan](https://www.vulkan.org/). On Windows most GPU vendors drivers come
@@ -161,6 +161,6 @@ sudo setcap cap_perfmon+ep /usr/local/bin/ollama

 To select specific Vulkan GPU(s), you can set the environment variable
 `GGML_VK_VISIBLE_DEVICES` to one or more numeric IDs on the Ollama server as
-described in the [FAQ](faq.md#how-do-i-configure-ollama-server). If you
+described in the [FAQ](faq#how-do-i-configure-ollama-server). If you
 encounter any problems with Vulkan based GPUs, you can disable all Vulkan GPUs
 by setting `GGML_VK_VISIBLE_DEVICES=-1` 
--- a/docs/modelfile.mdx
+++ b/docs/modelfile.mdx
@@ -41,6 +41,7 @@ INSTRUCTION arguments
 | [`ADAPTER`](#adapter)               | Defines the (Q)LoRA adapters to apply to the model.            |
 | [`LICENSE`](#license)               | Specifies the legal license.                                   |
 | [`MESSAGE`](#message)               | Specify message history.                                       |
+| [`REQUIRES`](#requires)             | Specify the minimum version of Ollama required by the model.   |

 ## Examples

@@ -248,6 +249,16 @@ MESSAGE user Is Ontario in Canada?
 MESSAGE assistant yes
 ```

+### REQUIRES
+
+The `REQUIRES` instruction allows you to specify the minimum version of Ollama required by the model.
+
+```
+REQUIRES <version>
+```
+
+The version should be a valid Ollama version (e.g. 0.14.0).
+
 ## Notes

 - the **`Modelfile` is not case sensitive**. In the examples, uppercase instructions are used to make it easier to distinguish it from arguments.
--- a/docs/troubleshooting.mdx
+++ b/docs/troubleshooting.mdx
@@ -87,7 +87,7 @@ When Ollama starts up, it takes inventory of the GPUs present in the system to d

 ### Linux NVIDIA Troubleshooting

-If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker.md](./docker.md)
+If you are using a container to run Ollama, make sure you've set up the container runtime first as described in [docker](./docker)

 Sometimes the Ollama can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem

--- a/go.mod
+++ b/go.mod
@@ -15,8 +15,8 @@ require (
 	github.com/spf13/cobra v1.7.0
 	github.com/stretchr/testify v1.9.0
 	github.com/x448/float16 v0.8.4
-	golang.org/x/sync v0.12.0
-	golang.org/x/sys v0.36.0
+	golang.org/x/sync v0.17.0
+	golang.org/x/sys v0.37.0
 )

 require (
@@ -29,7 +29,8 @@ require (
 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
 	github.com/tkrajina/typescriptify-golang-structs v0.2.0
 	golang.org/x/image v0.22.0
-	golang.org/x/tools v0.30.0
+	golang.org/x/mod v0.30.0
+	golang.org/x/tools v0.38.0
 	gonum.org/v1/gonum v0.15.0
 )

@@ -76,11 +77,11 @@ require (
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
 	golang.org/x/arch v0.8.0 // indirect
-	golang.org/x/crypto v0.36.0
+	golang.org/x/crypto v0.43.0
 	golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
-	golang.org/x/net v0.38.0 // indirect
-	golang.org/x/term v0.30.0
-	golang.org/x/text v0.23.0
+	golang.org/x/net v0.46.0 // indirect
+	golang.org/x/term v0.36.0
+	golang.org/x/text v0.30.0
 	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -224,8 +224,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
-golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
+golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
+golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -255,6 +255,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
+golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -267,8 +269,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
-golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
+golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -278,8 +280,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -295,17 +297,17 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
-golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
+golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
+golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
-golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
+golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -319,8 +321,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
-golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/llama/patches/0024-GPU-discovery-enhancements.patch
+++ b/llama/patches/0024-GPU-discovery-enhancements.patch
@@ -20,10 +20,10 @@ fix vulkan PCI ID and ID handling
 ggml/src/ggml-cuda/vendors/hip.h     |   3 +
 ggml/src/ggml-impl.h                 |   8 +
 ggml/src/ggml-metal/ggml-metal.cpp   |   2 +
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 ++++++++-
- ggml/src/mem_hip.cpp                 | 529 +++++++++++++++++++++++++++
- ggml/src/mem_nvml.cpp                | 209 +++++++++++
- 9 files changed, 976 insertions(+), 17 deletions(-)
+ ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 +++++++-
+ ggml/src/mem_hip.cpp                 | 558 +++++++++++++++++++++++++++
+ ggml/src/mem_nvml.cpp                | 209 ++++++++++
+ 9 files changed, 1005 insertions(+), 17 deletions(-)
 create mode 100644 ggml/src/mem_hip.cpp
 create mode 100644 ggml/src/mem_nvml.cpp

@@ -58,7 +58,7 @@ index d55aed348..99ae293cc 100644
 
 set_target_properties(ggml-base PROPERTIES
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 6852d2e20..48cdb1dcf 100644
+index 6852d2e20..334a30135 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -267,6 +267,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -109,7 +109,7 @@ index 6852d2e20..48cdb1dcf 100644
 +
 +#if defined(GGML_USE_HIP)
 +    if (ggml_hip_mgmt_init() == 0) {
-+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total, ctx->integrated != 0);
 +        if (status == 0) {
 +            GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
 +            ggml_hip_mgmt_release();
@@ -204,7 +204,7 @@ index 4e162258d..d89e35a8e 100644
 #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
 #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
 diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
-index fe57d4c58..1c07e767a 100644
+index fe57d4c58..dba8f4695 100644
 --- a/ggml/src/ggml-impl.h
 +++ b/ggml/src/ggml-impl.h
@@ -677,6 +677,14 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
@@ -216,7 +216,7 @@ index fe57d4c58..1c07e767a 100644
 +GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 +GGML_API void ggml_nvml_release();
 +GGML_API int ggml_hip_mgmt_init();
-+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
 +GGML_API void ggml_hip_mgmt_release();
 +
 #ifdef __cplusplus
@@ -243,7 +243,7 @@ index ba95b4acc..f6f8f7a10 100644
         /* .async                 = */ true,
         /* .host_buffer           = */ false,
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index 5349bce24..d43d46d1d 100644
+index 5349bce24..0103fd03a 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -236,6 +236,7 @@ class vk_memory_logger;
@@ -334,7 +334,7 @@ index 5349bce24..d43d46d1d 100644
 +        switch (props2.properties.vendorID) {
 +        case VK_VENDOR_ID_AMD:
 +            if (ggml_hip_mgmt_init() == 0) {
-+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total, ctx->is_integrated_gpu);
 +                if (status == 0) {
 +                    GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
 +                    ggml_hip_mgmt_release();
@@ -505,10 +505,10 @@ index 5349bce24..d43d46d1d 100644
         }
 diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
 new file mode 100644
-index 000000000..c1949b899
+index 000000000..23c765806
 --- /dev/null
 +++ b/ggml/src/mem_hip.cpp
-@@ -0,0 +1,529 @@
+@@ -0,0 +1,558 @@
 +#include "ggml.h"
 +#include "ggml-impl.h"
 +
@@ -842,7 +842,7 @@ index 000000000..c1949b899
 +    if (gpus != NULL) gpus->pVtbl->Release(gpus); \
 +    if (gpu != NULL) gpu->pVtbl->Release(gpu)
 +
-+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
 +    std::lock_guard<std::mutex> lock(ggml_adlx_lock);
 +    if (adlx.handle == NULL) {
 +        GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -966,13 +966,16 @@ index 000000000..c1949b899
 +    return 0;
 +}
 +void ggml_hip_mgmt_release() {}
-+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
 +    GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
 +    const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
 +    const std::string drmTotalMemoryFile = "mem_info_vram_total";
 +    const std::string drmUsedMemoryFile = "mem_info_vram_used";
+    const std::string drmGTTTotalMemoryFile = "mem_info_gtt_total";
+    const std::string drmGTTUsedMemoryFile = "mem_info_gtt_used";
 +    const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
 +
+
 +    glob_t glob_result;
 +    glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
 +
@@ -1006,7 +1009,6 @@ index 000000000..c1949b899
 +
 +                    uint64_t memory;
 +                    totalFileStream >> memory;
-+                    *total = memory;
 +
 +                    std::string usedFile = dir + "/" + drmUsedMemoryFile;
 +                    std::ifstream usedFileStream(usedFile.c_str());
@@ -1019,6 +1021,33 @@ index 000000000..c1949b899
 +
 +                    uint64_t memoryUsed;
 +                    usedFileStream >> memoryUsed;
+
+                    if (is_integrated_gpu) {
+                        std::string totalFile = dir + "/" + drmGTTTotalMemoryFile;
+                        std::ifstream totalFileStream(totalFile.c_str());
+                        if (!totalFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gtt;
+                        totalFileStream >> gtt;
+                        std::string usedFile = dir + "/" + drmGTTUsedMemoryFile;
+                        std::ifstream usedFileStream(usedFile.c_str());
+                        if (!usedFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gttUsed;
+                        usedFileStream >> gttUsed;
+                        memory += gtt;
+                        memoryUsed += gttUsed;
+                    }
+
+                    *total = memory;
 +                    *free = memory - memoryUsed;
 +
 +                    file.close();
--- a/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
+++ b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
@@ -24,12 +24,12 @@ index 99ae293cc..9a134b7af 100644
 
 set_target_properties(ggml-base PROPERTIES
 diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
-index 1c07e767a..0da3e065b 100644
+index dba8f4695..7e17032c7 100644
 --- a/ggml/src/ggml-impl.h
 +++ b/ggml/src/ggml-impl.h
@@ -684,6 +684,9 @@ GGML_API void ggml_nvml_release();
 GGML_API int ggml_hip_mgmt_init();
- GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+ GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
 GGML_API void ggml_hip_mgmt_release();
 +GGML_API int ggml_dxgi_pdh_init();
 +GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu);
@@ -38,7 +38,7 @@ index 1c07e767a..0da3e065b 100644
 #ifdef __cplusplus
 }
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index d43d46d1d..df79f9f79 100644
+index 0103fd03a..9cc4ebdef 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -74,6 +74,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
--- a/llama/patches/0029-ggml-cuda-skip-large-batches.patch
+++ b/llama/patches/0029-ggml-cuda-skip-large-batches.patch
@@ -10,7 +10,7 @@ fallback to cpu
 1 file changed, 3 insertions(+)

 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 48cdb1dcf..3102d7ea7 100644
+index 334a30135..5c9dfd032 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4633,6 +4633,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
--- a/llama/patches/0030-fix-bakllava-regression.patch
+++ b/llama/patches/0030-fix-bakllava-regression.patch
--- a/llama/patches/0031-win-exit-instead-of-abort.patch
+++ b/llama/patches/0031-win-exit-instead-of-abort.patch
--- a/llm/server.go
+++ b/llm/server.go
@@ -524,8 +524,13 @@ func (s *llamaServer) Load(ctx context.Context, systemInfo ml.SystemInfo, system
 	// Use the size of one layer as a buffer
 	layers := s.ggml.Tensors().GroupLayers()
 	if blk0, ok := layers["blk.0"]; ok {
+		buffer := blk0.Size() + kv[0]
 		for i := range gpus {
-			gpus[i].FreeMemory -= blk0.Size() + kv[0]
+			if gpus[i].FreeMemory > buffer {
+				gpus[i].FreeMemory -= buffer
+			} else {
+				gpus[i].FreeMemory = 0
+			}
 		}
 	} else {
 		slog.Warn("model missing blk.0 layer size")
@@ -575,7 +580,11 @@ func (s *llamaServer) Load(ctx context.Context, systemInfo ml.SystemInfo, system
 			projectorGPU = firstIntegrated
 		}

-		gpus[projectorGPU].FreeMemory -= projectorWeights
+		if gpus[projectorGPU].FreeMemory > projectorWeights {
+			gpus[projectorGPU].FreeMemory -= projectorWeights
+		} else {
+			gpus[projectorGPU].FreeMemory = 0
+		}
 	}

 	var kvTotal uint64
--- a/middleware/anthropic.go
+++ b/middleware/anthropic.go
@@ -0,0 +1,152 @@
+package middleware
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+
+	"github.com/ollama/ollama/anthropic"
+	"github.com/ollama/ollama/api"
+)
+
+// AnthropicWriter wraps the response writer to transform Ollama responses to Anthropic format
+type AnthropicWriter struct {
+	BaseWriter
+	stream    bool
+	id        string
+	model     string
+	converter *anthropic.StreamConverter
+}
+
+func (w *AnthropicWriter) writeError(data []byte) (int, error) {
+	var serr api.StatusError
+	err := json.Unmarshal(data, &serr)
+	if err != nil {
+		return 0, err
+	}
+
+	w.ResponseWriter.Header().Set("Content-Type", "application/json")
+	err = json.NewEncoder(w.ResponseWriter).Encode(anthropic.NewError(serr.StatusCode, serr.Error()))
+	if err != nil {
+		return 0, err
+	}
+
+	return len(data), nil
+}
+
+func (w *AnthropicWriter) writeEvent(eventType string, data any) error {
+	d, err := json.Marshal(data)
+	if err != nil {
+		return err
+	}
+	_, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("event: %s\ndata: %s\n\n", eventType, d)))
+	if err != nil {
+		return err
+	}
+	if f, ok := w.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+	return nil
+}
+
+func (w *AnthropicWriter) writeResponse(data []byte) (int, error) {
+	var chatResponse api.ChatResponse
+	err := json.Unmarshal(data, &chatResponse)
+	if err != nil {
+		return 0, err
+	}
+
+	if w.stream {
+		w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
+
+		events := w.converter.Process(chatResponse)
+		for _, event := range events {
+			if err := w.writeEvent(event.Event, event.Data); err != nil {
+				return 0, err
+			}
+		}
+		return len(data), nil
+	}
+
+	// Non-streaming response
+	w.ResponseWriter.Header().Set("Content-Type", "application/json")
+	response := anthropic.ToMessagesResponse(w.id, chatResponse)
+	return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
+}
+
+func (w *AnthropicWriter) Write(data []byte) (int, error) {
+	code := w.ResponseWriter.Status()
+	if code != http.StatusOK {
+		return w.writeError(data)
+	}
+
+	return w.writeResponse(data)
+}
+
+// AnthropicMessagesMiddleware handles Anthropic Messages API requests
+func AnthropicMessagesMiddleware() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req anthropic.MessagesRequest
+		err := c.ShouldBindJSON(&req)
+		if err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		// Validate required fields
+		if req.Model == "" {
+			c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "model is required"))
+			return
+		}
+
+		if req.MaxTokens <= 0 {
+			c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "max_tokens is required and must be positive"))
+			return
+		}
+
+		if len(req.Messages) == 0 {
+			c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, "messages is required"))
+			return
+		}
+
+		// Convert to internal format
+		chatReq, err := anthropic.FromMessagesRequest(req)
+		if err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, anthropic.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		var b bytes.Buffer
+		if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
+			c.AbortWithStatusJSON(http.StatusInternalServerError, anthropic.NewError(http.StatusInternalServerError, err.Error()))
+			return
+		}
+
+		c.Request.Body = io.NopCloser(&b)
+
+		messageID := anthropic.GenerateMessageID()
+
+		w := &AnthropicWriter{
+			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
+			stream:     req.Stream,
+			id:         messageID,
+			model:      req.Model,
+			converter:  anthropic.NewStreamConverter(messageID, req.Model),
+		}
+
+		// Set headers based on streaming mode
+		if req.Stream {
+			c.Writer.Header().Set("Content-Type", "text/event-stream")
+			c.Writer.Header().Set("Cache-Control", "no-cache")
+			c.Writer.Header().Set("Connection", "keep-alive")
+		}
+
+		c.Writer = w
+
+		c.Next()
+	}
+}
--- a/middleware/anthropic_test.go
+++ b/middleware/anthropic_test.go
@@ -0,0 +1,487 @@
+package middleware
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/anthropic"
+	"github.com/ollama/ollama/api"
+)
+
+func captureAnthropicRequest(capturedRequest any) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		bodyBytes, _ := io.ReadAll(c.Request.Body)
+		c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+		_ = json.Unmarshal(bodyBytes, capturedRequest)
+		c.Next()
+	}
+}
+
+func TestAnthropicMessagesMiddleware(t *testing.T) {
+	type testCase struct {
+		name string
+		body string
+		req  api.ChatRequest
+		err  anthropic.ErrorResponse
+	}
+
+	var capturedRequest *api.ChatRequest
+	stream := true
+
+	testCases := []testCase{
+		{
+			name: "basic message",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "Hello"},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &False,
+			},
+		},
+		{
+			name: "with system prompt",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"system": "You are helpful.",
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "system", Content: "You are helpful."},
+					{Role: "user", Content: "Hello"},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &False,
+			},
+		},
+		{
+			name: "with options",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 2048,
+				"temperature": 0.7,
+				"top_p": 0.9,
+				"top_k": 40,
+				"stop_sequences": ["\n", "END"],
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "Hello"},
+				},
+				Options: map[string]any{
+					"num_predict": 2048,
+					"temperature": 0.7,
+					"top_p":       0.9,
+					"top_k":       40,
+					"stop":        []string{"\n", "END"},
+				},
+				Stream: &False,
+			},
+		},
+		{
+			name: "streaming",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"stream": true,
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "Hello"},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &stream,
+			},
+		},
+		{
+			name: "with tools",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"messages": [
+					{"role": "user", "content": "What's the weather?"}
+				],
+				"tools": [{
+					"name": "get_weather",
+					"description": "Get current weather",
+					"input_schema": {
+						"type": "object",
+						"properties": {
+							"location": {"type": "string"}
+						},
+						"required": ["location"]
+					}
+				}]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "What's the weather?"},
+				},
+				Tools: []api.Tool{
+					{
+						Type: "function",
+						Function: api.ToolFunction{
+							Name:        "get_weather",
+							Description: "Get current weather",
+							Parameters: api.ToolFunctionParameters{
+								Type:     "object",
+								Required: []string{"location"},
+								Properties: map[string]api.ToolProperty{
+									"location": {Type: api.PropertyType{"string"}},
+								},
+							},
+						},
+					},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &False,
+			},
+		},
+		{
+			name: "with tool result",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"messages": [
+					{"role": "user", "content": "What's the weather?"},
+					{"role": "assistant", "content": [
+						{"type": "tool_use", "id": "call_123", "name": "get_weather", "input": {"location": "Paris"}}
+					]},
+					{"role": "user", "content": [
+						{"type": "tool_result", "tool_use_id": "call_123", "content": "Sunny, 22°C"}
+					]}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "What's the weather?"},
+					{
+						Role: "assistant",
+						ToolCalls: []api.ToolCall{
+							{
+								ID: "call_123",
+								Function: api.ToolCallFunction{
+									Name:      "get_weather",
+									Arguments: api.ToolCallFunctionArguments{"location": "Paris"},
+								},
+							},
+						},
+					},
+					{Role: "tool", Content: "Sunny, 22°C", ToolCallID: "call_123"},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &False,
+			},
+		},
+		{
+			name: "with thinking enabled",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"thinking": {"type": "enabled", "budget_tokens": 1000},
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			req: api.ChatRequest{
+				Model: "test-model",
+				Messages: []api.Message{
+					{Role: "user", Content: "Hello"},
+				},
+				Options: map[string]any{"num_predict": 1024},
+				Stream:  &False,
+				Think:   &api.ThinkValue{Value: true},
+			},
+		},
+		{
+			name: "missing model error",
+			body: `{
+				"max_tokens": 1024,
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			err: anthropic.ErrorResponse{
+				Type: "error",
+				Error: anthropic.Error{
+					Type:    "invalid_request_error",
+					Message: "model is required",
+				},
+			},
+		},
+		{
+			name: "missing max_tokens error",
+			body: `{
+				"model": "test-model",
+				"messages": [
+					{"role": "user", "content": "Hello"}
+				]
+			}`,
+			err: anthropic.ErrorResponse{
+				Type: "error",
+				Error: anthropic.Error{
+					Type:    "invalid_request_error",
+					Message: "max_tokens is required and must be positive",
+				},
+			},
+		},
+		{
+			name: "missing messages error",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024
+			}`,
+			err: anthropic.ErrorResponse{
+				Type: "error",
+				Error: anthropic.Error{
+					Type:    "invalid_request_error",
+					Message: "messages is required",
+				},
+			},
+		},
+		{
+			name: "tool_use missing id error",
+			body: `{
+				"model": "test-model",
+				"max_tokens": 1024,
+				"messages": [
+					{"role": "assistant", "content": [
+						{"type": "tool_use", "name": "test"}
+					]}
+				]
+			}`,
+			err: anthropic.ErrorResponse{
+				Type: "error",
+				Error: anthropic.Error{
+					Type:    "invalid_request_error",
+					Message: "tool_use block missing required 'id' field",
+				},
+			},
+		},
+	}
+
+	endpoint := func(c *gin.Context) {
+		c.Status(http.StatusOK)
+	}
+
+	gin.SetMode(gin.TestMode)
+	router := gin.New()
+	router.Use(AnthropicMessagesMiddleware(), captureAnthropicRequest(&capturedRequest))
+	router.Handle(http.MethodPost, "/v1/messages", endpoint)
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(tc.body))
+			req.Header.Set("Content-Type", "application/json")
+
+			defer func() { capturedRequest = nil }()
+
+			resp := httptest.NewRecorder()
+			router.ServeHTTP(resp, req)
+
+			if tc.err.Type != "" {
+				// Expect error
+				if resp.Code == http.StatusOK {
+					t.Fatalf("expected error response, got 200 OK")
+				}
+				var errResp anthropic.ErrorResponse
+				if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
+					t.Fatalf("failed to unmarshal error: %v", err)
+				}
+				if errResp.Type != tc.err.Type {
+					t.Errorf("expected error type %q, got %q", tc.err.Type, errResp.Type)
+				}
+				if errResp.Error.Type != tc.err.Error.Type {
+					t.Errorf("expected error.type %q, got %q", tc.err.Error.Type, errResp.Error.Type)
+				}
+				if errResp.Error.Message != tc.err.Error.Message {
+					t.Errorf("expected error.message %q, got %q", tc.err.Error.Message, errResp.Error.Message)
+				}
+				return
+			}
+
+			if resp.Code != http.StatusOK {
+				t.Fatalf("unexpected status code: %d, body: %s", resp.Code, resp.Body.String())
+			}
+
+			if capturedRequest == nil {
+				t.Fatal("request was not captured")
+			}
+
+			// Compare relevant fields
+			if capturedRequest.Model != tc.req.Model {
+				t.Errorf("model mismatch: got %q, want %q", capturedRequest.Model, tc.req.Model)
+			}
+
+			if diff := cmp.Diff(tc.req.Messages, capturedRequest.Messages); diff != "" {
+				t.Errorf("messages mismatch (-want +got):\n%s", diff)
+			}
+
+			if tc.req.Stream != nil && capturedRequest.Stream != nil {
+				if *tc.req.Stream != *capturedRequest.Stream {
+					t.Errorf("stream mismatch: got %v, want %v", *capturedRequest.Stream, *tc.req.Stream)
+				}
+			}
+
+			if tc.req.Think != nil {
+				if capturedRequest.Think == nil {
+					t.Error("expected Think to be set")
+				} else if capturedRequest.Think.Value != tc.req.Think.Value {
+					t.Errorf("Think mismatch: got %v, want %v", capturedRequest.Think.Value, tc.req.Think.Value)
+				}
+			}
+		})
+	}
+}
+
+func TestAnthropicMessagesMiddleware_Headers(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	t.Run("streaming sets correct headers", func(t *testing.T) {
+		router := gin.New()
+		router.Use(AnthropicMessagesMiddleware())
+		router.POST("/v1/messages", func(c *gin.Context) {
+			// Check headers were set
+			if c.Writer.Header().Get("Content-Type") != "text/event-stream" {
+				t.Errorf("expected Content-Type text/event-stream, got %q", c.Writer.Header().Get("Content-Type"))
+			}
+			if c.Writer.Header().Get("Cache-Control") != "no-cache" {
+				t.Errorf("expected Cache-Control no-cache, got %q", c.Writer.Header().Get("Cache-Control"))
+			}
+			c.Status(http.StatusOK)
+		})
+
+		body := `{"model": "test", "max_tokens": 100, "stream": true, "messages": [{"role": "user", "content": "Hi"}]}`
+		req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body))
+		req.Header.Set("Content-Type", "application/json")
+
+		resp := httptest.NewRecorder()
+		router.ServeHTTP(resp, req)
+	})
+}
+
+func TestAnthropicMessagesMiddleware_InvalidJSON(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	router := gin.New()
+	router.Use(AnthropicMessagesMiddleware())
+	router.POST("/v1/messages", func(c *gin.Context) {
+		c.Status(http.StatusOK)
+	})
+
+	req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(`{invalid json`))
+	req.Header.Set("Content-Type", "application/json")
+
+	resp := httptest.NewRecorder()
+	router.ServeHTTP(resp, req)
+
+	if resp.Code != http.StatusBadRequest {
+		t.Errorf("expected status 400, got %d", resp.Code)
+	}
+
+	var errResp anthropic.ErrorResponse
+	if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
+		t.Fatalf("failed to unmarshal error: %v", err)
+	}
+
+	if errResp.Type != "error" {
+		t.Errorf("expected type 'error', got %q", errResp.Type)
+	}
+	if errResp.Error.Type != "invalid_request_error" {
+		t.Errorf("expected error type 'invalid_request_error', got %q", errResp.Error.Type)
+	}
+}
+
+func TestAnthropicWriter_NonStreaming(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	router := gin.New()
+	router.Use(AnthropicMessagesMiddleware())
+	router.POST("/v1/messages", func(c *gin.Context) {
+		// Simulate Ollama response
+		resp := api.ChatResponse{
+			Model: "test-model",
+			Message: api.Message{
+				Role:    "assistant",
+				Content: "Hello there!",
+			},
+			Done:       true,
+			DoneReason: "stop",
+			Metrics: api.Metrics{
+				PromptEvalCount: 10,
+				EvalCount:       5,
+			},
+		}
+		data, _ := json.Marshal(resp)
+		c.Writer.WriteHeader(http.StatusOK)
+		_, _ = c.Writer.Write(data)
+	})
+
+	body := `{"model": "test-model", "max_tokens": 100, "messages": [{"role": "user", "content": "Hi"}]}`
+	req, _ := http.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body))
+	req.Header.Set("Content-Type", "application/json")
+
+	resp := httptest.NewRecorder()
+	router.ServeHTTP(resp, req)
+
+	if resp.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", resp.Code)
+	}
+
+	var result anthropic.MessagesResponse
+	if err := json.Unmarshal(resp.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if result.Type != "message" {
+		t.Errorf("expected type 'message', got %q", result.Type)
+	}
+	if result.Role != "assistant" {
+		t.Errorf("expected role 'assistant', got %q", result.Role)
+	}
+	if len(result.Content) != 1 {
+		t.Fatalf("expected 1 content block, got %d", len(result.Content))
+	}
+	if result.Content[0].Text != "Hello there!" {
+		t.Errorf("expected text 'Hello there!', got %q", result.Content[0].Text)
+	}
+	if result.StopReason != "end_turn" {
+		t.Errorf("expected stop_reason 'end_turn', got %q", result.StopReason)
+	}
+	if result.Usage.InputTokens != 10 {
+		t.Errorf("expected input_tokens 10, got %d", result.Usage.InputTokens)
+	}
+	if result.Usage.OutputTokens != 5 {
+		t.Errorf("expected output_tokens 5, got %d", result.Usage.OutputTokens)
+	}
+}
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4436,7 +4436,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *

 #if defined(GGML_USE_HIP)
    if (ggml_hip_mgmt_init() == 0) {
-        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total, ctx->integrated != 0);
        if (status == 0) {
            GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
            ggml_hip_mgmt_release();
--- a/ml/backend/ggml/ggml/src/ggml-impl.h
+++ b/ml/backend/ggml/ggml/src/ggml-impl.h
@@ -682,7 +682,7 @@ GGML_API int ggml_nvml_init();
 GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 GGML_API void ggml_nvml_release();
 GGML_API int ggml_hip_mgmt_init();
-GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
 GGML_API void ggml_hip_mgmt_release();
 GGML_API int ggml_dxgi_pdh_init();
 GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu);
--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13710,7 +13710,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
        switch (props2.properties.vendorID) {
        case VK_VENDOR_ID_AMD:
            if (ggml_hip_mgmt_init() == 0) {
-                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total, ctx->is_integrated_gpu);
                if (status == 0) {
                    GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
                    ggml_hip_mgmt_release();
--- a/ml/backend/ggml/ggml/src/mem_hip.cpp
+++ b/ml/backend/ggml/ggml/src/mem_hip.cpp
@@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
    if (gpus != NULL) gpus->pVtbl->Release(gpus); \
    if (gpu != NULL) gpu->pVtbl->Release(gpu)

-int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
    std::lock_guard<std::mutex> lock(ggml_adlx_lock);
    if (adlx.handle == NULL) {
        GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -455,13 +455,16 @@ int ggml_hip_mgmt_init() {
    return 0;
 }
 void ggml_hip_mgmt_release() {}
-int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
    GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
    const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
    const std::string drmTotalMemoryFile = "mem_info_vram_total";
    const std::string drmUsedMemoryFile = "mem_info_vram_used";
+    const std::string drmGTTTotalMemoryFile = "mem_info_gtt_total";
+    const std::string drmGTTUsedMemoryFile = "mem_info_gtt_used";
    const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";

+
    glob_t glob_result;
    glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);

@@ -495,7 +498,6 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {

                    uint64_t memory;
                    totalFileStream >> memory;
-                    *total = memory;

                    std::string usedFile = dir + "/" + drmUsedMemoryFile;
                    std::ifstream usedFileStream(usedFile.c_str());
@@ -508,6 +510,33 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {

                    uint64_t memoryUsed;
                    usedFileStream >> memoryUsed;
+
+                    if (is_integrated_gpu) {
+                        std::string totalFile = dir + "/" + drmGTTTotalMemoryFile;
+                        std::ifstream totalFileStream(totalFile.c_str());
+                        if (!totalFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gtt;
+                        totalFileStream >> gtt;
+                        std::string usedFile = dir + "/" + drmGTTUsedMemoryFile;
+                        std::ifstream usedFileStream(usedFile.c_str());
+                        if (!usedFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gttUsed;
+                        usedFileStream >> gttUsed;
+                        memory += gtt;
+                        memoryUsed += gttUsed;
+                    }
+
+                    *total = memory;
                    *free = memory - memoryUsed;

                    file.close();
--- a/model/models/deepseek2/model.go
+++ b/model/models/deepseek2/model.go
@@ -4,7 +4,6 @@ package deepseek2

 import (
 	"cmp"
-	"fmt"
 	"math"

 	"github.com/ollama/ollama/fs"
@@ -40,10 +39,6 @@ type Options struct {
 	ropeBase,
 	ropeScale float32
 	kqScale float64
-
-	attentionTemperatureScale      float32
-	attentionTemperatureLength     int
-	attentionTemperatureFloorScale int
 }

 func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, t, p ml.Tensor) ml.Tensor {
@@ -71,7 +66,7 @@ type Attention struct {
 	Output *nn.Linear `gguf:"attn_out,alt:attn_output"`
 }

-func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions, attentionScales ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
+func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
 	seqLength := hiddenStates.Dim(1)

 	var query ml.Tensor
@@ -109,11 +104,6 @@ func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions, attentio
 		kRot = kRot.Repeat(ctx, 1, queryChunks[0].Dim(1))
 		query = qRot.Concat(ctx, queryChunks[0], 0)
 		key := kRot.Concat(ctx, kvChunks[0], 0)
-
-		if attentionScales != nil {
-			query = query.Mul(ctx, attentionScales)
-		}
-
 		attention = nn.Attention(ctx, query, key, kvChunks[1], opts.kqScale, cache)
 	} else { // v3.1
 		qPass := queryChunks[0].Permute(ctx, 0, 2, 1, 3)
@@ -125,10 +115,6 @@ func (attn *Attention) Forward(ctx ml.Context, hiddenStates, positions, attentio
 		key := kRot.Concat(ctx, kPass, 0)
 		value := kPass

-		if attentionScales != nil {
-			query = query.Mul(ctx, attentionScales)
-		}
-
 		attention = nn.AttentionWithVMLA(ctx, query, key, value, nil, attn.VB.Weight, opts.kqScale, cache)
 	}

@@ -215,10 +201,10 @@ type Layer struct {
 	MLP     MLP
 }

-func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, attentionScales, outputs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
+func (t *Layer) Forward(ctx ml.Context, hiddenStates, positions, outputs ml.Tensor, cache kvcache.Cache, opts *Options) ml.Tensor {
 	residual := hiddenStates
 	hiddenStates = t.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
-	hiddenStates = t.Attention.Forward(ctx, hiddenStates, positions, attentionScales, cache, opts)
+	hiddenStates = t.Attention.Forward(ctx, hiddenStates, positions, cache, opts)

 	if outputs != nil {
 		hiddenStates = hiddenStates.Rows(ctx, outputs)
@@ -248,11 +234,7 @@ type Model struct {
 }

 func New(c fs.Config) (model.Model, error) {
-	// layers := make([]Layer, c.Uint("block_count"))
-	// fmt.Printf("[MODEL DEBUG] Creating model with %d layers\n", c.Uint("block_count"))
-
-	layers := make([]Layer, 4)
-	fmt.Printf("[MODEL DEBUG] Creating model with %d layers\n", 4)
+	layers := make([]Layer, c.Uint("block_count"))

 	firstDenseLayerIndex := int(c.Uint("leading_dense_block_count"))
 	for i := range layers {
@@ -279,10 +261,6 @@ func New(c fs.Config) (model.Model, error) {
 			`[一-龥぀-ゟ゠-ヿ]+`,
 			"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
 		}
-	case "tekken":
-		pre = []string{
-			"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
-		}
 	case "deepseek-llm":
 		// TODO: these models haven't been vetted so skip for now
 		// pre = []string{
@@ -298,20 +276,13 @@ func New(c fs.Config) (model.Model, error) {
 		return nil, model.ErrUnsupportedTokenizer
 	}

-	// DEBUG: Check tokenizer vocabulary loading
-	tokens := c.Strings("tokenizer.ggml.tokens")
-	tokenTypes := c.Ints("tokenizer.ggml.token_type")
-	merges := c.Strings("tokenizer.ggml.merges")
-
-	// Debug output removed for performance
-
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
 			&model.Vocabulary{
-				Values: tokens,
-				Types:  tokenTypes,
-				Merges: merges,
-				AddBOS: false, // c.Bool("tokenizer.ggml.add_bos_token", true),
+				Values: c.Strings("tokenizer.ggml.tokens"),
+				Types:  c.Ints("tokenizer.ggml.token_type"),
+				Merges: c.Strings("tokenizer.ggml.merges"),
+				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
 				BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 				EOS: append(
@@ -345,11 +316,6 @@ func New(c fs.Config) (model.Model, error) {
 			routedScalingFactor:   c.Float("expert_weights_scale"),
 			originalContextLength: int(c.Uint("rope.scaling.original_context_length")),

-			// TODO: double check these values
-			attentionTemperatureScale:      c.Float("attention.temperature_scale", 1.0),
-			attentionTemperatureLength:     int(c.Uint("attention.temperature_length")),
-			attentionTemperatureFloorScale: int(c.Uint("attention.temperature_floor_scale", 8192)),
-
 			kqScale: kqScale,
 		},
 	}
@@ -365,28 +331,8 @@ func (m Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor
 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))

-	// DEBUG: Check TokenEmbedding initialization
-	if m.TokenEmbedding == nil {
-		panic("DEBUG: m.TokenEmbedding is nil - 'token_embd' tensor not found in GGUF")
-	}
-
 	hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs)

-	// Temperature tuning - used by mistral-large
-	var attentionScales ml.Tensor
-	if m.attentionTemperatureScale != 0.0 {
-		nTokens := len(batch.Positions)
-		scales := make([]float32, nTokens)
-
-		for i, pos := range batch.Positions {
-			posFloat := float64(pos)
-			scaleValue := math.Log(math.Floor((posFloat+1.0)/float64(m.attentionTemperatureFloorScale))+1.0)*float64(m.attentionTemperatureScale) + 1.0
-			scales[i] = float32(scaleValue)
-		}
-
-		attentionScales = ctx.Input().FromFloats(scales, 1, 1, nTokens)
-	}
-
 	for i, layer := range m.Layers {
 		m.Cache.SetLayer(i)

@@ -395,7 +341,7 @@ func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 			outputs = batch.Outputs
 		}

-		hiddenStates = layer.Forward(ctx, hiddenStates, positions, attentionScales, outputs, m.Cache, m.Options)
+		hiddenStates = layer.Forward(ctx, hiddenStates, positions, outputs, m.Cache, m.Options)
 	}

 	hiddenStates = m.OutputNorm.Forward(ctx, hiddenStates, m.eps)
--- a/model/parsers/functiongemma.go
+++ b/model/parsers/functiongemma.go
@@ -0,0 +1,323 @@
+package parsers
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	"github.com/ollama/ollama/api"
+)
+
+type FunctionGemmaParserState int
+
+const (
+	FunctionGemmaCollectingContent FunctionGemmaParserState = iota
+	FunctionGemmaCollectingToolCalls
+)
+
+const (
+	functionGemmaFunctionCallOpen  = "<start_function_call>"
+	functionGemmaFunctionCallClose = "<end_function_call>"
+)
+
+// This format uses <start_function_call>call:name{args}<end_function_call> for tool calls.
+type FunctionGemmaParser struct {
+	state  FunctionGemmaParserState
+	buffer strings.Builder
+	tools  []api.Tool
+}
+
+func (p *FunctionGemmaParser) HasToolSupport() bool     { return true }
+func (p *FunctionGemmaParser) HasThinkingSupport() bool { return false }
+
+func (p *FunctionGemmaParser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	p.tools = tools
+	p.state = FunctionGemmaCollectingContent
+	return tools
+}
+
+type functionGemmaEvent interface {
+	isFunctionGemmaEvent()
+}
+
+type FunctionGemmaEventContent struct {
+	content string
+}
+
+type functionGemmaEventToolCall struct {
+	toolCall api.ToolCall
+}
+
+func (FunctionGemmaEventContent) isFunctionGemmaEvent()  {}
+func (functionGemmaEventToolCall) isFunctionGemmaEvent() {}
+
+func (p *FunctionGemmaParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents()
+
+	var toolCalls []api.ToolCall
+	var contentSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case functionGemmaEventToolCall:
+			toolCalls = append(toolCalls, event.toolCall)
+		case FunctionGemmaEventContent:
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), "", toolCalls, nil
+}
+
+func (p *FunctionGemmaParser) parseEvents() []functionGemmaEvent {
+	var all []functionGemmaEvent
+
+	keepLooping := true
+	for keepLooping {
+		var events []functionGemmaEvent
+		events, keepLooping = p.eat()
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	return all
+}
+
+// emitWithPartialCheck extracts unambiguous content before a potential partial tag
+func (p *FunctionGemmaParser) emitWithPartialCheck(bufStr, tag string) (unambiguous, ambiguous string) {
+	if overlapLen := overlap(bufStr, tag); overlapLen > 0 {
+		beforePartialTag := bufStr[:len(bufStr)-overlapLen]
+		return beforePartialTag, bufStr[len(beforePartialTag):]
+	}
+	return bufStr, ""
+}
+
+func (p *FunctionGemmaParser) eat() ([]functionGemmaEvent, bool) {
+	bufStr := p.buffer.String()
+	if bufStr == "" {
+		return nil, false
+	}
+
+	switch p.state {
+	case FunctionGemmaCollectingContent:
+		if strings.Contains(bufStr, functionGemmaFunctionCallOpen) {
+			split := strings.SplitN(bufStr, functionGemmaFunctionCallOpen, 2)
+			content := split[0]
+			p.buffer.Reset()
+			p.buffer.WriteString(split[1])
+			p.state = FunctionGemmaCollectingToolCalls
+			if content != "" {
+				return []functionGemmaEvent{FunctionGemmaEventContent{content: content}}, true
+			}
+			return nil, true
+		}
+		unambig, ambig := p.emitWithPartialCheck(bufStr, functionGemmaFunctionCallOpen)
+		p.buffer.Reset()
+		p.buffer.WriteString(ambig)
+		if unambig != "" {
+			return []functionGemmaEvent{FunctionGemmaEventContent{content: unambig}}, false
+		}
+		return nil, false
+
+	case FunctionGemmaCollectingToolCalls:
+		if strings.Contains(bufStr, functionGemmaFunctionCallClose) {
+			split := strings.SplitN(bufStr, functionGemmaFunctionCallClose, 2)
+			remaining := split[1]
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+
+			var events []functionGemmaEvent
+			if tc, err := p.parseToolCall(split[0]); err == nil {
+				events = append(events, functionGemmaEventToolCall{toolCall: tc})
+			}
+
+			if !strings.Contains(remaining, functionGemmaFunctionCallOpen) {
+				p.state = FunctionGemmaCollectingContent
+			}
+			return events, true
+		}
+		return nil, false
+	}
+
+	return nil, false
+}
+
+// Matches call:function_name{args}
+var functionGemmaCallRegex = regexp.MustCompile(`call:([^{]+)\{(.*)\}`)
+
+func (p *FunctionGemmaParser) parseToolCall(content string) (api.ToolCall, error) {
+	toolCall := api.ToolCall{}
+
+	// Extract function name and arguments
+	match := functionGemmaCallRegex.FindStringSubmatch(content)
+	if len(match) < 3 {
+		return toolCall, nil
+	}
+
+	toolCall.Function.Name = match[1]
+	argsStr := match[2]
+
+	// Parse arguments
+	toolCall.Function.Arguments = p.parseArguments(argsStr)
+
+	return toolCall, nil
+}
+
+// parseArguments parses the key:value,key:value format
+func (p *FunctionGemmaParser) parseArguments(argsStr string) api.ToolCallFunctionArguments {
+	args := make(api.ToolCallFunctionArguments)
+	if argsStr == "" {
+		return args
+	}
+
+	// Split by comma, but handle nested structures
+	parts := p.splitArguments(argsStr)
+
+	for _, part := range parts {
+		// Find the first colon to split key:value
+		colonIdx := strings.Index(part, ":")
+		if colonIdx == -1 {
+			continue
+		}
+
+		key := part[:colonIdx]
+		value := part[colonIdx+1:]
+
+		// Parse the value
+		args[key] = p.parseValue(value)
+	}
+
+	return args
+}
+
+// splitArguments splits arguments by comma, respecting nested structures
+func (p *FunctionGemmaParser) splitArguments(argsStr string) []string {
+	var parts []string
+	var current strings.Builder
+	depth := 0
+	inEscape := false
+
+	for i := 0; i < len(argsStr); i++ {
+		ch := argsStr[i]
+
+		// Check for <escape> tags
+		if i+8 <= len(argsStr) && argsStr[i:i+8] == "<escape>" {
+			inEscape = !inEscape
+			current.WriteString("<escape>")
+			i += 7 // Skip the rest of <escape>
+			continue
+		}
+
+		if !inEscape {
+			switch ch {
+			case '{', '[':
+				depth++
+				current.WriteByte(ch)
+			case '}', ']':
+				depth--
+				current.WriteByte(ch)
+			case ',':
+				if depth == 0 {
+					if current.Len() > 0 {
+						parts = append(parts, current.String())
+						current.Reset()
+					}
+					continue
+				}
+				current.WriteByte(ch)
+			default:
+				current.WriteByte(ch)
+			}
+		} else {
+			current.WriteByte(ch)
+		}
+	}
+
+	if current.Len() > 0 {
+		parts = append(parts, current.String())
+	}
+
+	return parts
+}
+
+// parseValue parses a single value from the FunctionGemma format
+func (p *FunctionGemmaParser) parseValue(value string) any {
+	// Check for escaped string
+	if strings.HasPrefix(value, "<escape>") && strings.HasSuffix(value, "<escape>") {
+		// Remove the escape tags
+		return value[8 : len(value)-8]
+	}
+
+	// Check for boolean
+	if value == "true" {
+		return true
+	}
+	if value == "false" {
+		return false
+	}
+
+	// Check for number
+	if num, ok := parseNumber(value); ok {
+		return num
+	}
+
+	// Check for array
+	if strings.HasPrefix(value, "[") && strings.HasSuffix(value, "]") {
+		return p.parseArray(value[1 : len(value)-1])
+	}
+
+	// Check for object
+	if strings.HasPrefix(value, "{") && strings.HasSuffix(value, "}") {
+		return p.parseObject(value[1 : len(value)-1])
+	}
+
+	// Default to string
+	return value
+}
+
+// parseArray parses an array value
+func (p *FunctionGemmaParser) parseArray(content string) []any {
+	var result []any
+	parts := p.splitArguments(content)
+	for _, part := range parts {
+		result = append(result, p.parseValue(part))
+	}
+	return result
+}
+
+// parseObject parses an object value
+func (p *FunctionGemmaParser) parseObject(content string) map[string]any {
+	result := make(map[string]any)
+	parts := p.splitArguments(content)
+	for _, part := range parts {
+		colonIdx := strings.Index(part, ":")
+		if colonIdx == -1 {
+			continue
+		}
+		key := part[:colonIdx]
+		value := part[colonIdx+1:]
+		result[key] = p.parseValue(value)
+	}
+	return result
+}
+
+// parseNumber tries to parse a string as a number
+func parseNumber(s string) (any, bool) {
+	// Try integer first
+	var intVal int64
+	if _, err := fmt.Sscanf(s, "%d", &intVal); err == nil {
+		// Check if the entire string was consumed
+		if fmt.Sprintf("%d", intVal) == s {
+			return intVal, true
+		}
+	}
+
+	// Try float
+	var floatVal float64
+	if _, err := fmt.Sscanf(s, "%f", &floatVal); err == nil {
+		return floatVal, true
+	}
+
+	return nil, false
+}
--- a/model/parsers/functiongemma_test.go
+++ b/model/parsers/functiongemma_test.go
@@ -0,0 +1,423 @@
+package parsers
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/api"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestFunctionGemmaParser(t *testing.T) {
+	tests := []struct {
+		name          string
+		chunks        []string
+		tools         []api.Tool
+		expectedCalls []api.ToolCall
+		expectedText  string
+	}{
+		{
+			name:          "plain_content",
+			chunks:        []string{"H", "e", "l", "l", "o", ",", " ", "w", "o", "r", "l", "d", "!"},
+			expectedCalls: nil,
+			expectedText:  "Hello, world!",
+		},
+		{
+			name: "simple_tool_call",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "weather", "{",
+				"city", ":", "<", "escape", ">", "Paris", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name: "get_weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}},
+							},
+						},
+					},
+				},
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "content_before_tool_call",
+			chunks: []string{
+				"L", "et", " ", "me", " ", "check", ".",
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "weather", "{",
+				"city", ":", "<", "escape", ">", "Paris", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+			},
+			expectedText: "Let me check.",
+		},
+		{
+			name: "numeric_arguments",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "add", "{",
+				"a", ":", "1", ",", "b", ":", "2",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "add",
+						Arguments: api.ToolCallFunctionArguments{"a": int64(1), "b": int64(2)},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "boolean_arguments",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "set", "_", "flag", "{",
+				"enabled", ":", "true", ",", "verbose", ":", "false",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "set_flag",
+						Arguments: api.ToolCallFunctionArguments{"enabled": true, "verbose": false},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "multiple_tool_calls",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "weather", "{",
+				"city", ":", "<", "escape", ">", "Paris", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "weather", "{",
+				"city", ":", "<", "escape", ">", "London", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "London"},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "array_argument",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "process", "{",
+				"items", ":", "[",
+				"<", "escape", ">", "a", "<", "escape", ">", ",",
+				"<", "escape", ">", "b", "<", "escape", ">", ",",
+				"<", "escape", ">", "c", "<", "escape", ">",
+				"]",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "process",
+						Arguments: api.ToolCallFunctionArguments{"items": []any{"a", "b", "c"}},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "object_argument",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "update", "{",
+				"data", ":", "{",
+				"name", ":", "<", "escape", ">", "test", "<", "escape", ">", ",",
+				"value", ":", "42",
+				"}",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "update",
+						Arguments: api.ToolCallFunctionArguments{
+							"data": map[string]any{"name": "test", "value": int64(42)},
+						},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name:          "empty_input",
+			chunks:        []string{},
+			expectedCalls: nil,
+			expectedText:  "",
+		},
+		{
+			name: "tool_call_with_no_arguments",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "time", "{", "}",
+				"<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_time",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "content_with_angle_brackets",
+			chunks: []string{
+				"The", " ", "result", " ", "is", " ", "a", " ", "<", "value", ">", " ", "tag",
+			},
+			expectedCalls: nil,
+			expectedText:  "The result is a <value> tag",
+		},
+		{
+			name: "float_argument",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "set", "_", "temp", "{",
+				"value", ":", "3", ".", "14",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "set_temp",
+						Arguments: api.ToolCallFunctionArguments{"value": 3.14},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "content_after_tool_call",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "test", "{", "}",
+				"<", "end", "_", "function", "_", "call", ">",
+				"Done", "!",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "test",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+			expectedText: "Done!",
+		},
+		{
+			name: "unicode_content_and_arguments",
+			chunks: []string{
+				"こんにちは", " ",
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "greet", "{",
+				"name", ":", "<", "escape", ">", "日本語", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "greet",
+						Arguments: api.ToolCallFunctionArguments{"name": "日本語"},
+					},
+				},
+			},
+			expectedText: "こんにちは ",
+		},
+		{
+			name: "multiple_params_sorted",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "search", "{",
+				"query", ":", "<", "escape", ">", "test", "<", "escape", ">", ",",
+				"limit", ":", "10", ",",
+				"offset", ":", "0",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: api.ToolCallFunctionArguments{
+							"query":  "test",
+							"limit":  int64(10),
+							"offset": int64(0),
+						},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "nested_object_argument",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "create", "{",
+				"config", ":", "{",
+				"settings", ":", "{",
+				"enabled", ":", "true", ",",
+				"name", ":", "<", "escape", ">", "test", "<", "escape", ">",
+				"}",
+				"}",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "create",
+						Arguments: api.ToolCallFunctionArguments{
+							"config": map[string]any{
+								"settings": map[string]any{
+									"enabled": true,
+									"name":    "test",
+								},
+							},
+						},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "partial_start_tag_in_content",
+			chunks: []string{
+				"Hello", " ", "<", "start", " ", "world",
+			},
+			expectedCalls: nil,
+			expectedText:  "Hello <start world",
+		},
+		{
+			name: "parallel_tool_calls",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "weather", "{",
+				"city", ":", "<", "escape", ">", "Paris", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "get", "_", "time", "{",
+				"timezone", ":", "<", "escape", ">", "UTC", "<", "escape", ">",
+				"}", "<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name:      "get_time",
+						Arguments: api.ToolCallFunctionArguments{"timezone": "UTC"},
+					},
+				},
+			},
+			expectedText: "",
+		},
+		{
+			name: "content_between_tool_calls",
+			chunks: []string{
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "first", "{", "}",
+				"<", "end", "_", "function", "_", "call", ">",
+				"Some", " ", "text", " ", "here",
+				"<", "start", "_", "function", "_", "call", ">",
+				"call", ":", "second", "{", "}",
+				"<", "end", "_", "function", "_", "call", ">",
+			},
+			expectedCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name:      "first",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name:      "second",
+						Arguments: api.ToolCallFunctionArguments{},
+					},
+				},
+			},
+			expectedText: "Some text here",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &FunctionGemmaParser{}
+			parser.Init(tt.tools, nil, nil)
+
+			var allContent string
+			var allCalls []api.ToolCall
+
+			for i, chunk := range tt.chunks {
+				done := i == len(tt.chunks)-1
+				content, _, calls, err := parser.Add(chunk, done)
+				assert.NoError(t, err)
+				allContent += content
+				allCalls = append(allCalls, calls...)
+			}
+
+			// Handle empty chunks case
+			if len(tt.chunks) == 0 {
+				content, _, calls, err := parser.Add("", true)
+				assert.NoError(t, err)
+				allContent = content
+				allCalls = calls
+			}
+
+			assert.Equal(t, tt.expectedText, allContent)
+			assert.Equal(t, tt.expectedCalls, allCalls)
+		})
+	}
+}
+
+func TestFunctionGemmaParser_HasSupport(t *testing.T) {
+	parser := &FunctionGemmaParser{}
+	assert.True(t, parser.HasToolSupport())
+	assert.False(t, parser.HasThinkingSupport())
+}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -66,6 +66,8 @@ func ParserForName(name string) Parser {
 		return &Olmo3ThinkParser{}
 	case "nemotron-3-nano":
 		return &Nemotron3NanoParser{}
+	case "functiongemma":
+		return &FunctionGemmaParser{}
 	default:
 		return nil
 	}
--- a/model/renderers/functiongemma.go
+++ b/model/renderers/functiongemma.go
@@ -0,0 +1,287 @@
+package renderers
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/ollama/ollama/api"
+)
+
+type FunctionGemmaRenderer struct{}
+
+const defaultSystemMessage = "You can do function calling with the following functions:"
+
+func (r *FunctionGemmaRenderer) Render(messages []api.Message, tools []api.Tool, thinkValue *api.ThinkValue) (string, error) {
+	var sb strings.Builder
+
+	sb.WriteString("<bos>")
+
+	var systemMessage string
+	var loopMessages []api.Message
+	if len(messages) > 0 && (messages[0].Role == "system" || messages[0].Role == "developer") {
+		systemMessage = messages[0].Content
+		loopMessages = messages[1:]
+	} else {
+		loopMessages = messages
+	}
+
+	if systemMessage != "" || len(tools) > 0 {
+		sb.WriteString("<start_of_turn>developer\n")
+		if systemMessage != "" {
+			sb.WriteString(strings.TrimSpace(systemMessage))
+		}
+		if len(tools) > 0 {
+			if systemMessage != "" {
+				sb.WriteString("\n")
+			}
+			if strings.TrimSpace(systemMessage) != defaultSystemMessage {
+				// Only add default message if user does not provide it
+				sb.WriteString(defaultSystemMessage)
+			}
+		}
+		for _, tool := range tools {
+			sb.WriteString(r.renderToolDeclaration(tool))
+		}
+		sb.WriteString("<end_of_turn>\n")
+	}
+
+	// Track previous message type for tool response handling
+	prevMessageType := ""
+
+	for i, message := range loopMessages {
+		switch message.Role {
+		case "assistant":
+			if prevMessageType != "tool_response" {
+				sb.WriteString("<start_of_turn>model\n")
+			}
+			prevMessageType = ""
+
+			if message.Content != "" {
+				sb.WriteString(strings.TrimSpace(message.Content))
+			}
+
+			if len(message.ToolCalls) > 0 {
+				for _, tc := range message.ToolCalls {
+					sb.WriteString(r.formatToolCall(tc))
+				}
+				// After tool calls, expect tool responses
+				if i+1 < len(loopMessages) && loopMessages[i+1].Role == "tool" {
+					sb.WriteString("<start_function_response>")
+					prevMessageType = "tool_call"
+				} else {
+					sb.WriteString("<end_of_turn>\n")
+				}
+			} else {
+				sb.WriteString("<end_of_turn>\n")
+			}
+
+		case "user":
+			if prevMessageType != "tool_response" {
+				sb.WriteString("<start_of_turn>user\n")
+			}
+			prevMessageType = ""
+			sb.WriteString(strings.TrimSpace(message.Content))
+			sb.WriteString("<end_of_turn>\n")
+
+		case "tool":
+			toolName := ""
+			// Find the tool name from the previous assistant's tool call
+			for j := i - 1; j >= 0; j-- {
+				if loopMessages[j].Role == "assistant" && len(loopMessages[j].ToolCalls) > 0 {
+					// Count how many tool messages came before this one
+					toolIdx := 0
+					for k := j + 1; k < i; k++ {
+						if loopMessages[k].Role == "tool" {
+							toolIdx++
+						}
+					}
+					if toolIdx < len(loopMessages[j].ToolCalls) {
+						toolName = loopMessages[j].ToolCalls[toolIdx].Function.Name
+					}
+					break
+				}
+			}
+
+			if prevMessageType != "tool_call" {
+				sb.WriteString("<start_function_response>")
+			}
+			sb.WriteString("response:" + toolName + "{" + r.formatArgValue(message.Content) + "}<end_function_response>")
+			prevMessageType = "tool_response"
+
+		default:
+			sb.WriteString("<start_of_turn>" + message.Role + "\n")
+			sb.WriteString(strings.TrimSpace(message.Content))
+			sb.WriteString("<end_of_turn>\n")
+		}
+	}
+
+	if prevMessageType != "tool_response" {
+		sb.WriteString("<start_of_turn>model\n")
+	}
+
+	return sb.String(), nil
+}
+
+func (r *FunctionGemmaRenderer) renderToolDeclaration(tool api.Tool) string {
+	var sb strings.Builder
+
+	fn := tool.Function
+	sb.WriteString("<start_function_declaration>declaration:" + fn.Name + "{")
+	sb.WriteString("description:<escape>" + fn.Description + "<escape>")
+
+	if fn.Parameters.Properties != nil || fn.Parameters.Type != "" {
+		sb.WriteString(",parameters:{")
+
+		needsComma := false
+
+		// Only include properties:{} if there are actual properties
+		if len(fn.Parameters.Properties) > 0 {
+			sb.WriteString("properties:{")
+			r.writeProperties(&sb, fn.Parameters.Properties)
+			sb.WriteString("}")
+			needsComma = true
+		}
+
+		if len(fn.Parameters.Required) > 0 {
+			if needsComma {
+				sb.WriteString(",")
+			}
+			sb.WriteString("required:[")
+			for i, req := range fn.Parameters.Required {
+				if i > 0 {
+					sb.WriteString(",")
+				}
+				sb.WriteString("<escape>" + req + "<escape>")
+			}
+			sb.WriteString("]")
+			needsComma = true
+		}
+
+		if fn.Parameters.Type != "" {
+			if needsComma {
+				sb.WriteString(",")
+			}
+			sb.WriteString("type:<escape>" + strings.ToUpper(fn.Parameters.Type) + "<escape>")
+		}
+
+		sb.WriteString("}")
+	}
+
+	sb.WriteString("}<end_function_declaration>")
+	return sb.String()
+}
+
+func (r *FunctionGemmaRenderer) writeProperties(sb *strings.Builder, props map[string]api.ToolProperty) {
+	keys := make([]string, 0, len(props))
+	for k := range props {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	first := true
+	for _, name := range keys {
+		prop := props[name]
+		if !first {
+			sb.WriteString(",")
+		}
+		first = false
+
+		sb.WriteString(name + ":{description:<escape>")
+		sb.WriteString(prop.Description)
+		sb.WriteString("<escape>")
+
+		if len(prop.Type) > 0 {
+			sb.WriteString(",type:<escape>" + strings.ToUpper(prop.Type[0]) + "<escape>")
+		}
+
+		sb.WriteString("}")
+	}
+}
+
+func (r *FunctionGemmaRenderer) formatToolCall(tc api.ToolCall) string {
+	var sb strings.Builder
+	sb.WriteString("<start_function_call>call:" + tc.Function.Name + "{")
+
+	keys := make([]string, 0, len(tc.Function.Arguments))
+	for k := range tc.Function.Arguments {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	first := true
+	for _, key := range keys {
+		value := tc.Function.Arguments[key]
+		if !first {
+			sb.WriteString(",")
+		}
+		first = false
+		sb.WriteString(key + ":" + r.formatArgValue(value))
+	}
+
+	sb.WriteString("}<end_function_call>")
+	return sb.String()
+}
+
+func (r *FunctionGemmaRenderer) formatArgValue(value any) string {
+	switch v := value.(type) {
+	case string:
+		return "<escape>" + v + "<escape>"
+	case bool:
+		if v {
+			return "true"
+		}
+		return "false"
+	case float64:
+		if v == float64(int64(v)) {
+			return fmt.Sprintf("%d", int64(v))
+		}
+		return fmt.Sprintf("%v", v)
+	case int, int64, int32:
+		return fmt.Sprintf("%d", v)
+	case map[string]any:
+		return r.formatMapValue(v)
+	case []any:
+		return r.formatArrayValue(v)
+	default:
+		return fmt.Sprintf("%v", v)
+	}
+}
+
+func (r *FunctionGemmaRenderer) formatMapValue(m map[string]any) string {
+	var sb strings.Builder
+	sb.WriteString("{")
+
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	first := true
+	for _, key := range keys {
+		if !first {
+			sb.WriteString(",")
+		}
+		first = false
+		sb.WriteString(key + ":" + r.formatArgValue(m[key]))
+	}
+
+	sb.WriteString("}")
+	return sb.String()
+}
+
+func (r *FunctionGemmaRenderer) formatArrayValue(arr []any) string {
+	var sb strings.Builder
+	sb.WriteString("[")
+
+	for i, item := range arr {
+		if i > 0 {
+			sb.WriteString(",")
+		}
+		sb.WriteString(r.formatArgValue(item))
+	}
+
+	sb.WriteString("]")
+	return sb.String()
+}
--- a/model/renderers/functiongemma_test.go
+++ b/model/renderers/functiongemma_test.go
@@ -0,0 +1,514 @@
+package renderers
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/api"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestFunctionGemmaRenderer(t *testing.T) {
+	tests := []struct {
+		name     string
+		messages []api.Message
+		tools    []api.Tool
+		expected string
+	}{
+		{
+			name: "basic_user_message",
+			messages: []api.Message{
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<bos><start_of_turn>user\nHello!<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "with_system_message",
+			messages: []api.Message{
+				{Role: "system", Content: "You are helpful"},
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<bos><start_of_turn>developer\nYou are helpful<end_of_turn>\n<start_of_turn>user\nHello!<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "with_developer_role",
+			messages: []api.Message{
+				{Role: "developer", Content: "You are a coding assistant"},
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<bos><start_of_turn>developer\nYou are a coding assistant<end_of_turn>\n<start_of_turn>user\nHello!<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "custom_system_message_with_tools",
+			messages: []api.Message{
+				{Role: "system", Content: "You are a weather expert."},
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			// Custom system message is preserved, tools are appended
+			expected: "<bos><start_of_turn>developer\nYou are a weather expert.\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "developer_role_with_tools",
+			messages: []api.Message{
+				{Role: "developer", Content: "Be concise."},
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			// Developer role message is preserved, tools are appended
+			expected: "<bos><start_of_turn>developer\nBe concise.\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "multi_turn",
+			messages: []api.Message{
+				{Role: "user", Content: "Hi"},
+				{Role: "assistant", Content: "Hello!"},
+				{Role: "user", Content: "More"},
+			},
+			expected: "<bos><start_of_turn>user\nHi<end_of_turn>\n<start_of_turn>model\nHello!<end_of_turn>\n<start_of_turn>user\nMore<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "with_tools",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "tool_call",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n<start_function_call>call:get_weather{city:<escape>Paris<escape>}<end_function_call><start_function_response>response:get_weather{<escape>Sunny<escape>}<end_function_response>",
+		},
+		{
+			name: "assistant_content_with_tool_call",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role:    "assistant",
+					Content: "Let me check.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\nLet me check.<start_function_call>call:get_weather{city:<escape>Paris<escape>}<end_function_call><start_function_response>response:get_weather{<escape>Sunny<escape>}<end_function_response>",
+		},
+		{
+			name: "numeric_arguments",
+			messages: []api.Message{
+				{Role: "user", Content: "Add"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "add",
+								Arguments: api.ToolCallFunctionArguments{"a": float64(1), "b": float64(2)},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "3"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "add",
+						Description: "Add numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"a": {Type: api.PropertyType{"number"}},
+								"b": {Type: api.PropertyType{"number"}},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:add{description:<escape>Add numbers<escape>,parameters:{properties:{a:{description:<escape><escape>,type:<escape>NUMBER<escape>},b:{description:<escape><escape>,type:<escape>NUMBER<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nAdd<end_of_turn>\n<start_of_turn>model\n<start_function_call>call:add{a:1,b:2}<end_function_call><start_function_response>response:add{<escape>3<escape>}<end_function_response>",
+		},
+		{
+			name:     "empty_messages",
+			messages: []api.Message{},
+			expected: "<bos><start_of_turn>model\n",
+		},
+		{
+			name: "tool_with_required_params",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Gets the weather for a given city",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"city"},
+							Properties: map[string]api.ToolProperty{
+								"city":    {Type: api.PropertyType{"string"}, Description: "City Name"},
+								"country": {Type: api.PropertyType{"string"}, Description: "Country Name"},
+							},
+						},
+					},
+				},
+			},
+			// Required params are escaped: required:[<escape>city<escape>]
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Gets the weather for a given city<escape>,parameters:{properties:{city:{description:<escape>City Name<escape>,type:<escape>STRING<escape>},country:{description:<escape>Country Name<escape>,type:<escape>STRING<escape>}},required:[<escape>city<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "multiple_tools",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather and time?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_time",
+						Description: "Get current time",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"timezone": {Type: api.PropertyType{"string"}, Description: "Timezone"},
+							},
+						},
+					},
+				},
+			},
+			// Multiple tool declarations are consecutive
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:get_time{description:<escape>Get current time<escape>,parameters:{properties:{timezone:{description:<escape>Timezone<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather and time?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "parallel_tool_calls",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather and time?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+							},
+						},
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_time",
+								Arguments: api.ToolCallFunctionArguments{"timezone": "UTC"},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+				{Role: "tool", Content: "12:00"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_time",
+						Description: "Get current time",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"timezone": {Type: api.PropertyType{"string"}, Description: "Timezone"},
+							},
+						},
+					},
+				},
+			},
+			// Multiple tool calls and responses are consecutive
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:get_time{description:<escape>Get current time<escape>,parameters:{properties:{timezone:{description:<escape>Timezone<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather and time?<end_of_turn>\n<start_of_turn>model\n<start_function_call>call:get_weather{city:<escape>Paris<escape>}<end_function_call><start_function_call>call:get_time{timezone:<escape>UTC<escape>}<end_function_call><start_function_response>response:get_weather{<escape>Sunny<escape>}<end_function_response><start_function_response>response:get_time{<escape>12:00<escape>}<end_function_response>",
+		},
+		{
+			name: "user_after_tool_response",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+				{Role: "user", Content: "Thanks! What about London?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							},
+						},
+					},
+				},
+			},
+			// User message after tool response gets concatenated (user reverted to this behavior)
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:get_weather{description:<escape>Get weather<escape>,parameters:{properties:{city:{description:<escape>City<escape>,type:<escape>STRING<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nWeather?<end_of_turn>\n<start_of_turn>model\n<start_function_call>call:get_weather{city:<escape>Paris<escape>}<end_function_call><start_function_response>response:get_weather{<escape>Sunny<escape>}<end_function_response>Thanks! What about London?<end_of_turn>\n<start_of_turn>model\n",
+		},
+		// Edge cases
+		{
+			name: "tool_empty_properties",
+			messages: []api.Message{
+				{Role: "user", Content: "Test"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "test_fn",
+						Description: "",
+						Parameters: api.ToolFunctionParameters{
+							Type:       "object",
+							Properties: map[string]api.ToolProperty{},
+						},
+					},
+				},
+			},
+			// Empty properties are omitted
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:test_fn{description:<escape><escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nTest<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "unicode_content",
+			messages: []api.Message{
+				{Role: "user", Content: "こんにちは 🎉"},
+			},
+			expected: "<bos><start_of_turn>user\nこんにちは 🎉<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "newlines_in_content",
+			messages: []api.Message{
+				{Role: "user", Content: "Line 1\nLine 2\nLine 3"},
+			},
+			expected: "<bos><start_of_turn>user\nLine 1\nLine 2\nLine 3<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "special_chars_in_content",
+			messages: []api.Message{
+				{Role: "user", Content: "Test <tag> & \"quotes\" chars"},
+			},
+			expected: "<bos><start_of_turn>user\nTest <tag> & \"quotes\" chars<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "boolean_argument",
+			messages: []api.Message{
+				{Role: "user", Content: "Set flag"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "set_flag",
+								Arguments: api.ToolCallFunctionArguments{"enabled": true},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "done"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "set_flag",
+						Description: "Set a flag",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"enabled": {Type: api.PropertyType{"boolean"}, Description: "Flag value"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:set_flag{description:<escape>Set a flag<escape>,parameters:{properties:{enabled:{description:<escape>Flag value<escape>,type:<escape>BOOLEAN<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nSet flag<end_of_turn>\n<start_of_turn>model\n<start_function_call>call:set_flag{enabled:true}<end_function_call><start_function_response>response:set_flag{<escape>done<escape>}<end_function_response>",
+		},
+		{
+			name: "multiple_required_params",
+			messages: []api.Message{
+				{Role: "user", Content: "Test"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "test",
+						Description: "Test",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"a", "b", "c"},
+							Properties: map[string]api.ToolProperty{
+								"a": {Type: api.PropertyType{"string"}, Description: "A"},
+								"b": {Type: api.PropertyType{"string"}, Description: "B"},
+								"c": {Type: api.PropertyType{"string"}, Description: "C"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:test{description:<escape>Test<escape>,parameters:{properties:{a:{description:<escape>A<escape>,type:<escape>STRING<escape>},b:{description:<escape>B<escape>,type:<escape>STRING<escape>},c:{description:<escape>C<escape>,type:<escape>STRING<escape>}},required:[<escape>a<escape>,<escape>b<escape>,<escape>c<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nTest<end_of_turn>\n<start_of_turn>model\n",
+		},
+		{
+			name: "array_type_param",
+			messages: []api.Message{
+				{Role: "user", Content: "Test"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "test",
+						Description: "Test",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: map[string]api.ToolProperty{
+								"items": {Type: api.PropertyType{"array"}, Description: "List of items"},
+							},
+						},
+					},
+				},
+			},
+			expected: "<bos><start_of_turn>developer\nYou can do function calling with the following functions:<start_function_declaration>declaration:test{description:<escape>Test<escape>,parameters:{properties:{items:{description:<escape>List of items<escape>,type:<escape>ARRAY<escape>}},type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nTest<end_of_turn>\n<start_of_turn>model\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			renderer := &FunctionGemmaRenderer{}
+			result, err := renderer.Render(tt.messages, tt.tools, nil)
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -78,6 +78,8 @@ func rendererForName(name string) Renderer {
 		return renderer
 	case "nemotron-3-nano":
 		return &Nemotron3NanoRenderer{}
+	case "functiongemma":
+		return &FunctionGemmaRenderer{}
 	default:
 		return nil
 	}
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -17,6 +17,7 @@ import (
 	"strings"
 	"sync"

+	"golang.org/x/mod/semver"
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/text/encoding/unicode"
 	"golang.org/x/text/transform"
@@ -104,6 +105,16 @@ func (f Modelfile) CreateRequest(relativeDir string) (*api.CreateRequest, error)
 			req.Renderer = c.Args
 		case "parser":
 			req.Parser = c.Args
+		case "requires":
+			// golang.org/x/mod/semver requires "v" prefix
+			requires := c.Args
+			if !strings.HasPrefix(requires, "v") {
+				requires = "v" + requires
+			}
+			if !semver.IsValid(requires) {
+				return nil, fmt.Errorf("requires must be a valid semver (e.g. 0.14.0)")
+			}
+			req.Requires = strings.TrimPrefix(requires, "v")
 		case "message":
 			role, msg, _ := strings.Cut(c.Args, ": ")
 			messages = append(messages, api.Message{Role: role, Content: msg})
@@ -322,7 +333,7 @@ func (c Command) String() string {
 	switch c.Name {
 	case "model":
 		fmt.Fprintf(&sb, "FROM %s", c.Args)
-	case "license", "template", "system", "adapter", "renderer", "parser":
+	case "license", "template", "system", "adapter", "renderer", "parser", "requires":
 		fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
 	case "message":
 		role, message, _ := strings.Cut(c.Args, ": ")
@@ -348,7 +359,7 @@ const (
 var (
 	errMissingFrom        = errors.New("no FROM line")
 	errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
-	errInvalidCommand     = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"renderer\", \"parser\", \"parameter\", or \"message\"")
+	errInvalidCommand     = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"renderer\", \"parser\", \"parameter\", \"message\", or \"requires\"")
 )

 type ParserError struct {
@@ -608,7 +619,7 @@ func isValidMessageRole(role string) bool {

 func isValidCommand(cmd string) bool {
 	switch strings.ToLower(cmd) {
-	case "from", "license", "template", "system", "adapter", "renderer", "parser", "parameter", "message":
+	case "from", "license", "template", "system", "adapter", "renderer", "parser", "parameter", "message", "requires":
 		return true
 	default:
 		return false
--- a/server/create.go
+++ b/server/create.go
@@ -61,6 +61,7 @@ func (s *Server) CreateHandler(c *gin.Context) {

 	config.Renderer = r.Renderer
 	config.Parser = r.Parser
+	config.Requires = r.Requires

 	for v := range r.Files {
 		if !fs.ValidPath(v) {
@@ -120,7 +121,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
 					ch <- gin.H{"error": err.Error()}
 				}

-				if err == nil && !remote && (config.Renderer == "" || config.Parser == "") {
+				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "") {
 					manifest, mErr := ParseNamedManifest(fromName)
 					if mErr == nil && manifest.Config.Digest != "" {
 						configPath, pErr := GetBlobsPath(manifest.Config.Digest)
@@ -134,6 +135,9 @@ func (s *Server) CreateHandler(c *gin.Context) {
 									if config.Parser == "" {
 										config.Parser = baseConfig.Parser
 									}
+									if config.Requires == "" {
+										config.Requires = baseConfig.Requires
+									}
 								}
 								cfgFile.Close()
 							}
--- a/server/routes.go
+++ b/server/routes.go
@@ -1106,6 +1106,7 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		Messages:     msgs,
 		Capabilities: m.Capabilities(),
 		ModifiedAt:   manifest.fi.ModTime(),
+		Requires:     m.Config.Requires,
 	}

 	if m.Config.RemoteHost != "" {
@@ -1534,6 +1535,9 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
 	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)

+	// Inference (Anthropic compatibility)
+	r.POST("/v1/messages", middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
+
 	if rc != nil {
 		// wrap old with new
 		rs := &registry.Local{
--- a/types/model/config.go
+++ b/types/model/config.go
@@ -9,6 +9,7 @@ type ConfigV2 struct {
 	FileType      string   `json:"file_type"`  // shown as Quantization Level
 	Renderer      string   `json:"renderer,omitempty"`
 	Parser        string   `json:"parser,omitempty"`
+	Requires      string   `json:"requires,omitempty"`

 	RemoteHost  string `json:"remote_host,omitempty"`
 	RemoteModel string `json:"remote_model,omitempty"`
Author	SHA1	Message	Date
ParthSareen	cb9427b6d7	anthropic: add unit and integration tests - Unit tests for transformation functions (FromMessagesRequest, ToMessagesResponse) - Unit tests for error handling and edge cases - Middleware integration tests with httptest - Fix lint issues (gofmt) - Fix unused struct fields in StreamConverter - Add fallback for crypto/rand errors	2026-01-02 01:55:04 -05:00
ParthSareen	331bee0b8f	api: add Anthropic Messages API compatibility layer Add middleware to support the Anthropic Messages API format at /v1/messages. This enables tools like Claude Code to work with Ollama models through the Anthropic API interface. Features: - Request/response transformation between Anthropic and internal formats - Streaming support with SSE events (message_start, content_block_delta, etc.) - Tool calling support (tool_use and tool_result content blocks) - Thinking/extended thinking block support - Image content block support (base64) - System prompt handling - Multi-turn conversation support - Proper stop_reason mapping (end_turn, max_tokens, tool_use) - Error responses in Anthropic format New files: - anthropic/anthropic.go: Types and transformation functions - middleware/anthropic.go: Request/response middleware	2026-01-02 01:09:46 -05:00
Vallabh Mahajan	18fdcc94e5	docs: fix broken .md links and render issues (#13550 )	2025-12-23 12:44:55 -05:00
Daniel Hiltgen	7ad036992f	amd: use GTT on iGPUs on linux (#13196 ) On Linux, look at the GTT memory information for iGPUs.	2025-12-23 09:30:05 -08:00
Jesse Gross	172b5924af	llm: Avoid integer underflow on llama engine memory layout On the llama engine, when we compute the memory layout, we reserve a buffer to allow for some flexibility for incorrect estimates. This is subtracted from GPU free memory and on GPUs with limited memory, it may underflow. Fixes #13494	2025-12-19 15:48:15 -08:00
Jeffrey Morgan	8852220f59	add REQUIRES command to Modelfile (#13361 )	2025-12-18 13:21:29 -08:00
Parth Sareen	7325791599	parsers/renderers: functiongemma (#13521 )	2025-12-18 07:55:37 -08:00
Grace	522c11a763	Revert "Omit args and params in tool function def and calls (#13516 )" (#13518 ) This reverts commit `0fadeffaee`.	2025-12-17 19:06:56 -08:00
Grace	0fadeffaee	Omit args and params in tool function def and calls (#13516 )	2025-12-17 18:42:21 -08:00