gofmt

pr feedback
server: cache gguf model capabilities rather than reading off disc
2026-01-20 05:18:31 -05:00 · 2025-06-16 16:34:46 -07:00 · 2025-06-16 16:08:38 -07:00 · 2025-06-16 15:17:36 -07:00 · 2025-06-16 15:17:02 -07:00 · 2025-06-16 15:16:58 -07:00
5 changed files with 344 additions and 185 deletions
--- a/server/cache/capabilities.go
+++ b/server/cache/capabilities.go
@@ -0,0 +1,115 @@
 package cache
 import (
 	"fmt"
 	"log/slog"
 	"os"
 	"slices"
 	"sync"
 	"time"
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/thinking"
 	"github.com/ollama/ollama/types/model"
 )
 // cacheEntry stores capabilities and the modification time of the model file
 type cacheEntry struct {
 	capabilities []model.Capability
 	modTime      time.Time
 }
 // ggufCapabilities is a cache for gguf model capabilities
 var ggufCapabilities = &sync.Map{}
 // ModelInfo contains the minimal information needed to determine capabilities
 type ModelInfo struct {
 	ModelPath      string
 	ProjectorPaths []string
 	Template       *template.Template
 }
 // Capabilities returns the capabilities that the model supports
 func Capabilities(info ModelInfo) []model.Capability {
 	capabilities, err := ggufCapabilties(info.ModelPath)
 	if err != nil {
 		slog.Error("could not determine gguf capabilities", "error", err)
 	}
 	if info.Template == nil {
 		return capabilities
 	}
 	// Check for tools capability
 	if slices.Contains(info.Template.Vars(), "tools") {
 		capabilities = append(capabilities, model.CapabilityTools)
 	}
 	// Check for insert capability
 	if slices.Contains(info.Template.Vars(), "suffix") {
 		capabilities = append(capabilities, model.CapabilityInsert)
 	}
 	// Check for vision capability in projector-based models
 	if len(info.ProjectorPaths) > 0 {
 		capabilities = append(capabilities, model.CapabilityVision)
 	}
 	// Check for thinking capability
 	openingTag, closingTag := thinking.InferTags(info.Template.Template)
 	if openingTag != "" && closingTag != "" {
 		capabilities = append(capabilities, model.CapabilityThinking)
 	}
 	return capabilities
 }
 func ggufCapabilties(modelPath string) ([]model.Capability, error) {
 	// Get file info to check modification time
 	fileInfo, err := os.Stat(modelPath)
 	if err != nil {
 		return nil, err
 	}
 	currentModTime := fileInfo.ModTime()
 	// Check if we have a cached entry
 	if cached, ok := ggufCapabilities.Load(modelPath); ok {
 		entry := cached.(cacheEntry)
 		// If the file hasn't been modified since we cached it, return the cached capabilities
 		if entry.modTime.Equal(currentModTime) {
 			return entry.capabilities, nil
 		}
 	}
 	// If not cached or file was modified, read the model file to determine capabilities
 	capabilities := []model.Capability{}
 	r, err := os.Open(modelPath)
 	if err != nil {
 		return nil, err
 	}
 	defer r.Close()
 	f, err := ggml.Decode(r, 1024)
 	if err != nil {
 		return nil, err
 	}
 	if _, ok := f.KV()[fmt.Sprintf("%s.pooling_type", f.KV().Architecture())]; ok {
 		capabilities = append(capabilities, model.CapabilityEmbedding)
 	} else {
 		capabilities = append(capabilities, model.CapabilityCompletion)
 	}
 	if _, ok := f.KV()[fmt.Sprintf("%s.vision.block_count", f.KV().Architecture())]; ok {
 		capabilities = append(capabilities, model.CapabilityVision)
 	}
 	// Cache the capabilities with the modification time
 	ggufCapabilities.Store(modelPath, cacheEntry{
 		capabilities: capabilities,
 		modTime:      currentModTime,
 	})
 	return capabilities, nil
 }
--- a/server/cache/capabilities_test.go
+++ b/server/cache/capabilities_test.go
@@ -0,0 +1,211 @@
 package cache
 import (
 	"bytes"
 	"maps"
 	"os"
 	"slices"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/fs/ggml"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/model"
 )
 // testGGUF creates a temporary GGUF model file for testing with custom key-value pairs
 func testGGUF(tb testing.TB, customKV ggml.KV) string {
 	tb.Helper()
 	f, err := os.CreateTemp(tb.TempDir(), "test*.gguf")
 	if err != nil {
 		tb.Fatal(err)
 	}
 	defer f.Close()
 	kv := ggml.KV{}
 	maps.Copy(kv, customKV)
 	tensors := []*ggml.Tensor{
 		{
 			Name:     "token_embd.weight",
 			Kind:     0,
 			Shape:    []uint64{1, 1},
 			WriterTo: bytes.NewBuffer(make([]byte, 4)),
 		},
 	}
 	if err := ggml.WriteGGUF(f, kv, tensors); err != nil {
 		tb.Fatal(err)
 	}
 	return f.Name()
 }
 func TestCapabilities(t *testing.T) {
 	ggufCapabilities.Range(func(key, value any) bool {
 		ggufCapabilities.Delete(key)
 		return true
 	})
 	// Create test model paths
 	completionModelPath := testGGUF(t, ggml.KV{
 		"general.architecture": "llama",
 	})
 	visionModelPath := testGGUF(t, ggml.KV{
 		"general.architecture":     "llama",
 		"llama.vision.block_count": uint32(1),
 	})
 	embeddingModelPath := testGGUF(t, ggml.KV{
 		"general.architecture": "bert",
 		"bert.pooling_type":    uint32(1),
 	})
 	// Create templates
 	toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	chatTemplate, err := template.Parse("{{ .prompt }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	testCases := []struct {
 		name         string
 		model        ModelInfo
 		expectedCaps []model.Capability
 	}{
 		{
 			name: "model with completion capability",
 			model: ModelInfo{
 				ModelPath: completionModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion},
 		},
 		{
 			name: "model with completion, tools, and insert capability",
 			model: ModelInfo{
 				ModelPath: completionModelPath,
 				Template:  toolsInsertTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools, model.CapabilityInsert},
 		},
 		{
 			name: "model with tools capability",
 			model: ModelInfo{
 				ModelPath: completionModelPath,
 				Template:  toolsTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools},
 		},
 		{
 			name: "model with vision capability from gguf",
 			model: ModelInfo{
 				ModelPath: visionModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityVision},
 		},
 		{
 			name: "model with vision capability from projector",
 			model: ModelInfo{
 				ModelPath:      completionModelPath,
 				ProjectorPaths: []string{"/path/to/projector"},
 				Template:       chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityVision},
 		},
 		{
 			name: "model with vision, tools, and insert capability",
 			model: ModelInfo{
 				ModelPath: visionModelPath,
 				Template:  toolsInsertTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityTools, model.CapabilityInsert},
 		},
 		{
 			name: "model with embedding capability",
 			model: ModelInfo{
 				ModelPath: embeddingModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityEmbedding},
 		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			// First call - should read from file
 			caps := Capabilities(tc.model)
 			slices.Sort(caps)
 			slices.Sort(tc.expectedCaps)
 			if !slices.Equal(caps, tc.expectedCaps) {
 				t.Errorf("Expected capabilities %v, got %v", tc.expectedCaps, caps)
 			}
 			// Verify caching for models that read from GGUF
 			if tc.model.ModelPath != "" {
 				// Check that entry is cached
 				_, ok := ggufCapabilities.Load(tc.model.ModelPath)
 				if !ok {
 					t.Error("Expected capabilities to be cached")
 				}
 				// Second call - should use cache
 				caps2 := Capabilities(tc.model)
 				slices.Sort(caps2)
 				if !slices.Equal(caps, caps2) {
 					t.Errorf("Cached capabilities don't match original: expected %v, got %v", caps, caps2)
 				}
 			}
 		})
 	}
 	// Test cache invalidation on file modification
 	t.Run("cache invalidation", func(t *testing.T) {
 		// Use completion model for this test
 		info := ModelInfo{
 			ModelPath: completionModelPath,
 			Template:  chatTemplate,
 		}
 		// Get initial cached entry
 		cached, ok := ggufCapabilities.Load(completionModelPath)
 		if !ok {
 			t.Fatal("Expected model to be cached from previous tests")
 		}
 		entry := cached.(cacheEntry)
 		// Modify the file's timestamp to the future
 		future := time.Now().Add(time.Hour)
 		err := os.Chtimes(completionModelPath, future, future)
 		if err != nil {
 			t.Fatalf("Failed to update file timestamp: %v", err)
 		}
 		// Call should re-read from file due to changed modtime
 		caps := Capabilities(info)
 		if len(caps) != 1 || caps[0] != model.CapabilityCompletion {
 			t.Errorf("Expected [CapabilityCompletion], got %v", caps)
 		}
 		// Check that cache was updated with new modtime
 		cached2, ok := ggufCapabilities.Load(completionModelPath)
 		if !ok {
 			t.Error("Expected capabilities to be cached after re-read")
 		}
 		entry2 := cached2.(cacheEntry)
 		if entry2.modTime.Equal(entry.modTime) {
 			t.Error("Expected cache entry to have updated modTime")
 		}
 	})
 }
--- a/server/images.go
+++ b/server/images.go
@@ -23,10 +23,9 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/fs/gguf"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/server/cache"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/thinking"
 	"github.com/ollama/ollama/types/model"
 	"github.com/ollama/ollama/version"
 )
@@ -68,60 +67,14 @@ type Model struct {
 	Template *template.Template
 }
 // Capabilities returns the capabilities that the model supports
 func (m *Model) Capabilities() []model.Capability {
 	capabilities := []model.Capability{}
 	// Check for completion capability
 	f, err := gguf.Open(m.ModelPath)
 	if err == nil {
 		defer f.Close()
 		if f.KeyValue("pooling_type").Valid() {
 			capabilities = append(capabilities, model.CapabilityEmbedding)
 		} else {
 			// If no embedding is specified, we assume the model supports completion
 			capabilities = append(capabilities, model.CapabilityCompletion)
 		}
 		if f.KeyValue("vision.block_count").Valid() {
 			capabilities = append(capabilities, model.CapabilityVision)
 		}
 	} else {
 		slog.Error("couldn't open model file", "error", err)
 	}
 	if m.Template == nil {
 		return capabilities
 	}
 	// Check for tools capability
 	if slices.Contains(m.Template.Vars(), "tools") {
 		capabilities = append(capabilities, model.CapabilityTools)
 	}
 	// Check for insert capability
 	if slices.Contains(m.Template.Vars(), "suffix") {
 		capabilities = append(capabilities, model.CapabilityInsert)
 	}
 	// Check for vision capability in projector-based models
 	if len(m.ProjectorPaths) > 0 {
 		capabilities = append(capabilities, model.CapabilityVision)
 	}
 	// Check for thinking capability
 	openingTag, closingTag := thinking.InferTags(m.Template.Template)
 	if openingTag != "" && closingTag != "" {
 		capabilities = append(capabilities, model.CapabilityThinking)
 	}
 	return capabilities
 }
 // CheckCapabilities checks if the model has the specified capabilities returning an error describing
 // any missing or unknown capabilities
 func (m *Model) CheckCapabilities(want ...model.Capability) error {
-	available := m.Capabilities()
+	available := cache.Capabilities(cache.ModelInfo{
 		ModelPath:      m.ModelPath,
 		ProjectorPaths: m.ProjectorPaths,
 		Template:       m.Template,
 	})
 	var errs []error
 	// Map capabilities to their corresponding error
--- a/server/images_test.go
+++ b/server/images_test.go
@@ -9,131 +9,6 @@ import (
 	"github.com/ollama/ollama/types/model"
 )
 func TestModelCapabilities(t *testing.T) {
 	// Create completion model (llama architecture without vision)
 	completionModelPath, _ := createBinFile(t, ggml.KV{
 		"general.architecture": "llama",
 	}, []*ggml.Tensor{})
 	// Create vision model (llama architecture with vision block count)
 	visionModelPath, _ := createBinFile(t, ggml.KV{
 		"general.architecture":     "llama",
 		"llama.vision.block_count": uint32(1),
 	}, []*ggml.Tensor{})
 	// Create embedding model (bert architecture with pooling type)
 	embeddingModelPath, _ := createBinFile(t, ggml.KV{
 		"general.architecture": "bert",
 		"bert.pooling_type":    uint32(1),
 	}, []*ggml.Tensor{})
 	toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	chatTemplate, err := template.Parse("{{ .prompt }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
 	if err != nil {
 		t.Fatalf("Failed to parse template: %v", err)
 	}
 	testModels := []struct {
 		name         string
 		model        Model
 		expectedCaps []model.Capability
 	}{
 		{
 			name: "model with completion capability",
 			model: Model{
 				ModelPath: completionModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion},
 		},
 		{
 			name: "model with completion, tools, and insert capability",
 			model: Model{
 				ModelPath: completionModelPath,
 				Template:  toolsInsertTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools, model.CapabilityInsert},
 		},
 		{
 			name: "model with tools capability",
 			model: Model{
 				ModelPath: completionModelPath,
 				Template:  toolsTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools},
 		},
 		{
 			name: "model with vision capability",
 			model: Model{
 				ModelPath: visionModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityVision},
 		},
 		{
 			name: "model with vision, tools, and insert capability",
 			model: Model{
 				ModelPath: visionModelPath,
 				Template:  toolsInsertTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityVision, model.CapabilityTools, model.CapabilityInsert},
 		},
 		{
 			name: "model with embedding capability",
 			model: Model{
 				ModelPath: embeddingModelPath,
 				Template:  chatTemplate,
 			},
 			expectedCaps: []model.Capability{model.CapabilityEmbedding},
 		},
 	}
 	// compare two slices of model.Capability regardless of order
 	compareCapabilities := func(a, b []model.Capability) bool {
 		if len(a) != len(b) {
 			return false
 		}
 		aCount := make(map[model.Capability]int)
 		for _, cap := range a {
 			aCount[cap]++
 		}
 		bCount := make(map[model.Capability]int)
 		for _, cap := range b {
 			bCount[cap]++
 		}
 		for cap, count := range aCount {
 			if bCount[cap] != count {
 				return false
 			}
 		}
 		return true
 	}
 	for _, tt := range testModels {
 		t.Run(tt.name, func(t *testing.T) {
 			// Test Capabilities method
 			caps := tt.model.Capabilities()
 			if !compareCapabilities(caps, tt.expectedCaps) {
 				t.Errorf("Expected capabilities %v, got %v", tt.expectedCaps, caps)
 			}
 		})
 	}
 }
 func TestModelCheckCapabilities(t *testing.T) {
 	// Create simple model file for tests that don't depend on GGUF content
 	completionModelPath, _ := createBinFile(t, ggml.KV{
--- a/server/routes.go
+++ b/server/routes.go
@@ -34,6 +34,7 @@ import (
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/logutil"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/server/cache"
 	"github.com/ollama/ollama/server/internal/client/ollama"
 	"github.com/ollama/ollama/server/internal/registry"
 	"github.com/ollama/ollama/template"
@@ -824,7 +825,11 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 		Template: m.Template.String(),
 		Details:  modelDetails,
 		Messages: msgs,
-		Capabilities: m.Capabilities(),
+		Capabilities: cache.Capabilities(cache.ModelInfo{
 			ModelPath:      m.ModelPath,
 			Template:       m.Template,
 			ProjectorPaths: m.ProjectorPaths,
 		}),
 		ModifiedAt: manifest.fi.ModTime(),
 	}
Author	SHA1	Message	Date
Bruce MacDonald	f2a4d058f9	gofmt	2025-06-16 16:34:46 -07:00
Bruce MacDonald	63e7634014	pr feedback	2025-06-16 16:08:38 -07:00
Bruce MacDonald	8d51d92f3b	server: cache gguf model capabilities rather than reading off disc	2025-06-16 15:17:36 -07:00
Bruce MacDonald	2348fef568	Revert "server: model info caching system for improved performance" This reverts commit 8ef643d4978168a8563ae24434a424358ce390e3.	2025-06-16 15:17:02 -07:00
Bruce MacDonald	883f655dd6	server: model info caching system for improved performance Implements an in-memory cache for loaded models with file modification time tracking to ensure cache validity. Models are now cached after first load and retrieved from cache on subsequent requests if the underlying manifest file hasn't changed. Key changes: - Add ModelCache with get/set methods and modification time validation - Cache models in GetModel() and check cache before disk load - Move capabilities calculation to model loading time and store in model - Update capability access to use cached field instead of runtime calculation - Add test coverage for cache behavior and model loading This reduces redundant model loading operations and improves response times for model access.	2025-06-16 15:16:58 -07:00