Compare commits

..

1 Commits

Author SHA1 Message Date
jmorganca
d132315276 uip
api: expose usage data
2026-01-16 00:24:07 -08:00
13 changed files with 829 additions and 751 deletions

View File

@@ -377,6 +377,15 @@ func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
return &lr, nil
}
// Usage returns usage statistics and system info.
func (c *Client) Usage(ctx context.Context) (*UsageResponse, error) {
var ur UsageResponse
if err := c.do(ctx, http.MethodGet, "/api/usage", nil, &ur); err != nil {
return nil, err
}
return &ur, nil
}
// Copy copies a model - creating a model with another name from an existing
// model.
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {

View File

@@ -792,6 +792,33 @@ type ProcessResponse struct {
Models []ProcessModelResponse `json:"models"`
}
// UsageResponse is the response from [Client.Usage].
type UsageResponse struct {
GPUs []GPUUsage `json:"gpus,omitempty"`
}
// GPUUsage contains GPU/device memory usage breakdown.
type GPUUsage struct {
Name string `json:"name"` // Device name (e.g., "Apple M2 Max", "NVIDIA GeForce RTX 4090")
Backend string `json:"backend"` // CUDA, ROCm, Metal, etc.
Total uint64 `json:"total"`
Free uint64 `json:"free"`
Used uint64 `json:"used"` // Memory used by Ollama
Other uint64 `json:"other"` // Memory used by other processes
}
// UsageStats contains usage statistics.
type UsageStats struct {
Requests int64 `json:"requests"`
TokensInput int64 `json:"tokens_input"`
TokensOutput int64 `json:"tokens_output"`
TotalTokens int64 `json:"total_tokens"`
Models map[string]int64 `json:"models,omitempty"`
Sources map[string]int64 `json:"sources,omitempty"`
ToolCalls int64 `json:"tool_calls,omitempty"`
StructuredOutput int64 `json:"structured_output,omitempty"`
}
// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Name string `json:"name"`

View File

@@ -5,7 +5,6 @@ import (
"context"
"crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
@@ -13,22 +12,11 @@ import (
"os"
"path/filepath"
"strings"
"time"
"golang.org/x/crypto/ssh"
)
const (
defaultPrivateKey = "id_ed25519"
signInStateFile = "signin.json"
)
// SignInState represents the locally cached sign-in state
type SignInState struct {
Name string `json:"name"`
Email string `json:"email"`
CachedAt time.Time `json:"cached_at"`
}
const defaultPrivateKey = "id_ed25519"
func GetPublicKey() (string, error) {
home, err := os.UserHomeDir()
@@ -95,75 +83,3 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
// signature is <pubkey>:<signature>
return fmt.Sprintf("%s:%s", bytes.TrimSpace(parts[1]), base64.StdEncoding.EncodeToString(signedData.Blob)), nil
}
// GetSignInState reads the locally cached sign-in state from ~/.ollama/signin.json
func GetSignInState() (*SignInState, error) {
home, err := os.UserHomeDir()
if err != nil {
return nil, err
}
statePath := filepath.Join(home, ".ollama", signInStateFile)
data, err := os.ReadFile(statePath)
if err != nil {
return nil, err
}
var state SignInState
if err := json.Unmarshal(data, &state); err != nil {
return nil, err
}
return &state, nil
}
// SetSignInState atomically writes the sign-in state to ~/.ollama/signin.json
func SetSignInState(state *SignInState) error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
ollamaDir := filepath.Join(home, ".ollama")
statePath := filepath.Join(ollamaDir, signInStateFile)
tmpPath := statePath + ".tmp"
state.CachedAt = time.Now()
data, err := json.MarshalIndent(state, "", " ")
if err != nil {
return err
}
// Write to temp file first
if err := os.WriteFile(tmpPath, data, 0o600); err != nil {
return err
}
// Atomic rename
return os.Rename(tmpPath, statePath)
}
// ClearSignInState removes the locally cached sign-in state
func ClearSignInState() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
statePath := filepath.Join(home, ".ollama", signInStateFile)
err = os.Remove(statePath)
if errors.Is(err, os.ErrNotExist) {
return nil // Already cleared
}
return err
}
// IsSignedIn returns true if there is a valid locally cached sign-in state
func IsSignedIn() bool {
state, err := GetSignInState()
if err != nil {
return false
}
return state.Name != ""
}

View File

@@ -1,294 +0,0 @@
package auth
import (
"os"
"path/filepath"
"runtime"
"testing"
)
// TestWhoamiHandlerFlow simulates the WhoamiHandler logic flow
func TestWhoamiHandlerFlow(t *testing.T) {
_ = setupTestDir(t)
// Scenario 1: No local cache - should indicate need for network call
t.Run("NoCache_RequiresNetwork", func(t *testing.T) {
state, err := GetSignInState()
if err == nil && state != nil && state.Name != "" {
t.Error("should not have cached state initially")
}
// In real WhoamiHandler, this would trigger a network call
})
// Scenario 2: Simulate successful sign-in from network
t.Run("CacheAfterNetworkSuccess", func(t *testing.T) {
// Simulate receiving user from ollama.com
networkUser := &SignInState{
Name: "networkuser",
Email: "network@example.com",
}
// Cache the result (as WhoamiHandler would)
if err := SetSignInState(networkUser); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Verify it's cached
if !IsSignedIn() {
t.Error("should be signed in after caching")
}
})
// Scenario 3: Subsequent calls use cache (no network)
t.Run("SubsequentCalls_UseCache", func(t *testing.T) {
state, err := GetSignInState()
if err != nil {
t.Fatalf("GetSignInState failed: %v", err)
}
if state.Name != "networkuser" {
t.Errorf("expected cached name 'networkuser', got '%s'", state.Name)
}
// In real WhoamiHandler, this would skip the network call and return cached data
})
// Scenario 4: Sign-out clears cache
t.Run("SignOut_ClearsCache", func(t *testing.T) {
// Simulate SignoutHandler clearing cache
if err := ClearSignInState(); err != nil {
t.Fatalf("ClearSignInState failed: %v", err)
}
if IsSignedIn() {
t.Error("should not be signed in after sign-out")
}
})
// Scenario 5: After sign-out, next call requires network
t.Run("AfterSignOut_RequiresNetwork", func(t *testing.T) {
state, err := GetSignInState()
if err == nil && state != nil && state.Name != "" {
t.Error("should require network after sign-out")
}
})
}
// TestOfflineScenarios tests behavior when offline
func TestOfflineScenarios(t *testing.T) {
_ = setupTestDir(t)
t.Run("Offline_WithCache_Works", func(t *testing.T) {
// Pre-populate cache (simulate previous sign-in)
state := &SignInState{Name: "cacheduser", Email: "cached@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Simulate offline check - should work with cache
cached, err := GetSignInState()
if err != nil {
t.Fatalf("should work offline with cache: %v", err)
}
if cached.Name != "cacheduser" {
t.Errorf("expected 'cacheduser', got '%s'", cached.Name)
}
if !IsSignedIn() {
t.Error("should report signed in with cache")
}
})
t.Run("Offline_WithoutCache_Fails", func(t *testing.T) {
// Clear cache first
ClearSignInState()
// Offline without cache should indicate not signed in
if IsSignedIn() {
t.Error("should not be signed in offline without cache")
}
_, err := GetSignInState()
if err == nil {
t.Error("should get error when no cache exists")
}
})
}
// TestMultipleSessions tests overwriting sessions
func TestMultipleSessions(t *testing.T) {
_ = setupTestDir(t)
t.Run("NewSignIn_OverwritesOld", func(t *testing.T) {
// First user signs in
user1 := &SignInState{Name: "user1", Email: "user1@example.com"}
SetSignInState(user1)
// Different user signs in (should overwrite)
user2 := &SignInState{Name: "user2", Email: "user2@example.com"}
SetSignInState(user2)
// Should have user2's data
state, _ := GetSignInState()
if state.Name != "user2" {
t.Errorf("expected 'user2', got '%s'", state.Name)
}
})
}
// TestEdgeCases tests various edge cases
func TestEdgeCases(t *testing.T) {
_ = setupTestDir(t)
t.Run("EmptyName_NotSignedIn", func(t *testing.T) {
// User with empty name should not count as signed in
state := &SignInState{Name: "", Email: "noname@example.com"}
SetSignInState(state)
if IsSignedIn() {
t.Error("empty name should not count as signed in")
}
})
t.Run("SpecialCharactersInName", func(t *testing.T) {
state := &SignInState{
Name: "user with spaces & symbols!@#$%",
Email: "special@example.com",
}
if err := SetSignInState(state); err != nil {
t.Fatalf("failed to set state with special chars: %v", err)
}
read, err := GetSignInState()
if err != nil {
t.Fatalf("failed to read state: %v", err)
}
if read.Name != state.Name {
t.Errorf("name mismatch with special chars")
}
})
t.Run("UnicodeInName", func(t *testing.T) {
state := &SignInState{
Name: "用户名 🎉 émojis",
Email: "unicode@example.com",
}
if err := SetSignInState(state); err != nil {
t.Fatalf("failed to set state with unicode: %v", err)
}
read, err := GetSignInState()
if err != nil {
t.Fatalf("failed to read state: %v", err)
}
if read.Name != state.Name {
t.Errorf("name mismatch with unicode")
}
})
t.Run("VeryLongEmail", func(t *testing.T) {
longEmail := ""
for range 1000 {
longEmail += "a"
}
longEmail += "@example.com"
state := &SignInState{Name: "user", Email: longEmail}
if err := SetSignInState(state); err != nil {
t.Fatalf("failed to set state with long email: %v", err)
}
read, err := GetSignInState()
if err != nil {
t.Fatalf("failed to read state: %v", err)
}
if read.Email != longEmail {
t.Error("email mismatch with long value")
}
})
}
// TestConcurrentAccess tests race conditions
func TestConcurrentAccess(t *testing.T) {
_ = setupTestDir(t)
t.Run("ConcurrentReads", func(t *testing.T) {
// Set up initial state
state := &SignInState{Name: "concurrent", Email: "concurrent@example.com"}
SetSignInState(state)
// Multiple concurrent reads should all succeed
done := make(chan bool, 10)
for range 10 {
go func() {
read, err := GetSignInState()
if err != nil {
t.Errorf("concurrent read failed: %v", err)
}
if read.Name != "concurrent" {
t.Errorf("wrong name in concurrent read")
}
done <- true
}()
}
for range 10 {
<-done
}
})
t.Run("ConcurrentWrites", func(t *testing.T) {
// Multiple concurrent writes - last one should win
done := make(chan bool, 10)
for range 10 {
go func() {
state := &SignInState{
Name: "user",
Email: "user@example.com",
}
SetSignInState(state)
done <- true
}()
}
for range 10 {
<-done
}
// Should have some valid state
if !IsSignedIn() {
t.Error("should be signed in after concurrent writes")
}
})
}
// TestFileSystemEdgeCases tests filesystem-related edge cases
func TestFileSystemEdgeCases(t *testing.T) {
t.Run("DirectoryIsFile", func(t *testing.T) {
tmpDir := t.TempDir()
// Create a file where .ollama directory should be
ollamaPath := filepath.Join(tmpDir, ".ollama")
if err := os.WriteFile(ollamaPath, []byte("not a directory"), 0o600); err != nil {
t.Fatalf("failed to create blocking file: %v", err)
}
// Override home directory
if runtime.GOOS == "windows" {
t.Setenv("USERPROFILE", tmpDir)
} else {
t.Setenv("HOME", tmpDir)
}
// Try to write - should fail because .ollama is a file, not a directory
state := &SignInState{Name: "newuser", Email: "new@example.com"}
err := SetSignInState(state)
if err == nil {
t.Error("should fail when .ollama is a file instead of directory")
}
})
}

View File

@@ -1,347 +0,0 @@
package auth
import (
"encoding/json"
"os"
"path/filepath"
"runtime"
"testing"
"time"
)
func setupTestDir(t *testing.T) string {
t.Helper()
// Create a temporary directory for testing
tmpDir := t.TempDir()
// Create .ollama subdirectory
ollamaDir := filepath.Join(tmpDir, ".ollama")
if err := os.MkdirAll(ollamaDir, 0o755); err != nil {
t.Fatalf("failed to create .ollama dir: %v", err)
}
// Override home directory for tests (platform-specific)
if runtime.GOOS == "windows" {
t.Setenv("USERPROFILE", tmpDir)
} else {
t.Setenv("HOME", tmpDir)
}
return tmpDir
}
func TestSetSignInState(t *testing.T) {
_ = setupTestDir(t)
state := &SignInState{
Name: "testuser",
Email: "test@example.com",
}
err := SetSignInState(state)
if err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Verify file was created
home, _ := os.UserHomeDir()
statePath := filepath.Join(home, ".ollama", signInStateFile)
data, err := os.ReadFile(statePath)
if err != nil {
t.Fatalf("failed to read state file: %v", err)
}
var savedState SignInState
if err := json.Unmarshal(data, &savedState); err != nil {
t.Fatalf("failed to unmarshal state: %v", err)
}
if savedState.Name != "testuser" {
t.Errorf("expected name 'testuser', got '%s'", savedState.Name)
}
if savedState.Email != "test@example.com" {
t.Errorf("expected email 'test@example.com', got '%s'", savedState.Email)
}
if savedState.CachedAt.IsZero() {
t.Error("expected CachedAt to be set, got zero time")
}
// Verify CachedAt is recent (within last minute)
if time.Since(savedState.CachedAt) > time.Minute {
t.Errorf("CachedAt is too old: %v", savedState.CachedAt)
}
}
func TestSetSignInState_Overwrites(t *testing.T) {
_ = setupTestDir(t)
// Set initial state
state1 := &SignInState{Name: "user1", Email: "user1@example.com"}
if err := SetSignInState(state1); err != nil {
t.Fatalf("first SetSignInState failed: %v", err)
}
// Overwrite with new state
state2 := &SignInState{Name: "user2", Email: "user2@example.com"}
if err := SetSignInState(state2); err != nil {
t.Fatalf("second SetSignInState failed: %v", err)
}
// Verify only new state exists
readState, err := GetSignInState()
if err != nil {
t.Fatalf("GetSignInState failed: %v", err)
}
if readState.Name != "user2" {
t.Errorf("expected name 'user2', got '%s'", readState.Name)
}
}
func TestGetSignInState(t *testing.T) {
_ = setupTestDir(t)
// First set a state
originalState := &SignInState{
Name: "testuser",
Email: "test@example.com",
}
if err := SetSignInState(originalState); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Now read it back
readState, err := GetSignInState()
if err != nil {
t.Fatalf("GetSignInState failed: %v", err)
}
if readState.Name != originalState.Name {
t.Errorf("expected name '%s', got '%s'", originalState.Name, readState.Name)
}
if readState.Email != originalState.Email {
t.Errorf("expected email '%s', got '%s'", originalState.Email, readState.Email)
}
}
func TestGetSignInState_NoFile(t *testing.T) {
_ = setupTestDir(t)
// Try to read without any file existing
state, err := GetSignInState()
if err == nil {
t.Error("expected error when file doesn't exist, got nil")
}
if state != nil {
t.Errorf("expected nil state, got %+v", state)
}
}
func TestGetSignInState_InvalidJSON(t *testing.T) {
tmpDir := setupTestDir(t)
// Write invalid JSON to the state file
statePath := filepath.Join(tmpDir, ".ollama", signInStateFile)
if err := os.WriteFile(statePath, []byte("not valid json"), 0o600); err != nil {
t.Fatalf("failed to write invalid json: %v", err)
}
state, err := GetSignInState()
if err == nil {
t.Error("expected error for invalid JSON, got nil")
}
if state != nil {
t.Errorf("expected nil state for invalid JSON, got %+v", state)
}
}
func TestClearSignInState(t *testing.T) {
_ = setupTestDir(t)
// First set a state
state := &SignInState{Name: "testuser", Email: "test@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Verify file exists
home, _ := os.UserHomeDir()
statePath := filepath.Join(home, ".ollama", signInStateFile)
if _, err := os.Stat(statePath); os.IsNotExist(err) {
t.Fatal("state file should exist before clearing")
}
// Clear the state
if err := ClearSignInState(); err != nil {
t.Fatalf("ClearSignInState failed: %v", err)
}
// Verify file is gone
if _, err := os.Stat(statePath); !os.IsNotExist(err) {
t.Error("state file should be deleted after clearing")
}
}
func TestClearSignInState_NoFile(t *testing.T) {
_ = setupTestDir(t)
// Clear when no file exists should not error
err := ClearSignInState()
if err != nil {
t.Errorf("ClearSignInState should not error when file doesn't exist: %v", err)
}
}
func TestClearSignInState_Idempotent(t *testing.T) {
_ = setupTestDir(t)
// Set a state first
state := &SignInState{Name: "testuser", Email: "test@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Clear multiple times should not error
for i := range 3 {
if err := ClearSignInState(); err != nil {
t.Errorf("ClearSignInState iteration %d failed: %v", i, err)
}
}
}
func TestIsSignedIn(t *testing.T) {
_ = setupTestDir(t)
// Initially not signed in
if IsSignedIn() {
t.Error("should not be signed in initially")
}
// Set a state with a name
state := &SignInState{Name: "testuser", Email: "test@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Now should be signed in
if !IsSignedIn() {
t.Error("should be signed in after setting state")
}
// Clear the state
if err := ClearSignInState(); err != nil {
t.Fatalf("ClearSignInState failed: %v", err)
}
// Should not be signed in after clearing
if IsSignedIn() {
t.Error("should not be signed in after clearing state")
}
}
func TestIsSignedIn_EmptyName(t *testing.T) {
tmpDir := setupTestDir(t)
// Write a state with empty name directly
state := SignInState{
Name: "",
Email: "test@example.com",
CachedAt: time.Now(),
}
data, _ := json.Marshal(state)
statePath := filepath.Join(tmpDir, ".ollama", signInStateFile)
if err := os.WriteFile(statePath, data, 0o600); err != nil {
t.Fatalf("failed to write state: %v", err)
}
// Should not be signed in with empty name
if IsSignedIn() {
t.Error("should not be signed in with empty name")
}
}
func TestSetSignInState_AtomicWrite(t *testing.T) {
tmpDir := setupTestDir(t)
state := &SignInState{Name: "testuser", Email: "test@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Verify temp file is cleaned up
tmpPath := filepath.Join(tmpDir, ".ollama", signInStateFile+".tmp")
if _, err := os.Stat(tmpPath); !os.IsNotExist(err) {
t.Error("temp file should be cleaned up after atomic write")
}
// Verify final file exists
statePath := filepath.Join(tmpDir, ".ollama", signInStateFile)
if _, err := os.Stat(statePath); os.IsNotExist(err) {
t.Error("final state file should exist")
}
}
func TestSetSignInState_FilePermissions(t *testing.T) {
tmpDir := setupTestDir(t)
state := &SignInState{Name: "testuser", Email: "test@example.com"}
if err := SetSignInState(state); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
statePath := filepath.Join(tmpDir, ".ollama", signInStateFile)
info, err := os.Stat(statePath)
if err != nil {
t.Fatalf("failed to stat state file: %v", err)
}
// Check file permissions (should be 0600 - owner read/write only)
perm := info.Mode().Perm()
if perm != 0o600 {
t.Errorf("expected permissions 0600, got %04o", perm)
}
}
func TestRoundTrip(t *testing.T) {
_ = setupTestDir(t)
// Test full round trip: set -> get -> clear -> get
original := &SignInState{
Name: "roundtrip_user",
Email: "roundtrip@example.com",
}
// Set
if err := SetSignInState(original); err != nil {
t.Fatalf("SetSignInState failed: %v", err)
}
// Get and verify
retrieved, err := GetSignInState()
if err != nil {
t.Fatalf("GetSignInState failed: %v", err)
}
if retrieved.Name != original.Name {
t.Errorf("name mismatch: expected '%s', got '%s'", original.Name, retrieved.Name)
}
if retrieved.Email != original.Email {
t.Errorf("email mismatch: expected '%s', got '%s'", original.Email, retrieved.Email)
}
// Clear
if err := ClearSignInState(); err != nil {
t.Fatalf("ClearSignInState failed: %v", err)
}
// Get should fail now
_, err = GetSignInState()
if err == nil {
t.Error("GetSignInState should fail after clear")
}
}

View File

@@ -1833,6 +1833,7 @@ func NewCLI() *cobra.Command {
PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler,
}
copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION",
Short: "Copy a model",

View File

@@ -206,6 +206,8 @@ var (
UseAuth = Bool("OLLAMA_AUTH")
// Enable Vulkan backend
EnableVulkan = Bool("OLLAMA_VULKAN")
// Usage enables usage statistics reporting
Usage = Bool("OLLAMA_USAGE")
)
func String(s string) func() string {

View File

@@ -20,6 +20,7 @@ import (
"net/url"
"os"
"os/signal"
"runtime"
"slices"
"strings"
"sync/atomic"
@@ -44,6 +45,7 @@ import (
"github.com/ollama/ollama/model/renderers"
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/server/usage"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/tools"
@@ -82,6 +84,7 @@ type Server struct {
addr net.Addr
sched *Scheduler
lowVRAM bool
stats *usage.Stats
}
func init() {
@@ -104,6 +107,30 @@ var (
errBadTemplate = errors.New("template error")
)
// usage records a request to usage stats if enabled.
func (s *Server) usage(c *gin.Context, endpoint, model, architecture string, promptTokens, completionTokens int, usedTools bool) {
if s.stats == nil {
return
}
s.stats.Record(&usage.Request{
Endpoint: endpoint,
Model: model,
Architecture: architecture,
APIType: usage.ClassifyAPIType(c.Request.URL.Path),
PromptTokens: promptTokens,
CompletionTokens: completionTokens,
UsedTools: usedTools,
})
}
// usageError records a failed request to usage stats if enabled.
func (s *Server) usageError() {
if s.stats == nil {
return
}
s.stats.RecordError()
}
func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
opts := api.DefaultOptions()
if err := opts.FromMap(model.Options); err != nil {
@@ -374,7 +401,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
return
} else if err != nil {
handleScheduleError(c, req.Model, err)
s.handleScheduleError(c, req.Model, err)
return
}
@@ -561,6 +588,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
res.DoneReason = cr.DoneReason.String()
res.TotalDuration = time.Since(checkpointStart)
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
s.usage(c, "generate", m.ShortName, m.Config.ModelFamily, cr.PromptEvalCount, cr.EvalCount, false)
if !req.Raw {
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
@@ -680,7 +708,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)
s.handleScheduleError(c, req.Model, err)
return
}
@@ -790,6 +818,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
LoadDuration: checkpointLoaded.Sub(checkpointStart),
PromptEvalCount: int(totalTokens),
}
s.usage(c, "embed", m.ShortName, m.Config.ModelFamily, int(totalTokens), 0, false)
c.JSON(http.StatusOK, resp)
}
@@ -827,7 +856,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)
s.handleScheduleError(c, req.Model, err)
return
}
@@ -1531,6 +1560,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
// Inference
r.GET("/api/ps", s.PsHandler)
r.GET("/api/usage", s.UsageHandler)
r.POST("/api/generate", s.GenerateHandler)
r.POST("/api/chat", s.ChatHandler)
r.POST("/api/embed", s.EmbedHandler)
@@ -1593,6 +1623,13 @@ func Serve(ln net.Listener) error {
s := &Server{addr: ln.Addr()}
// Initialize usage stats if enabled
if envconfig.Usage() {
s.stats = usage.New()
s.stats.Start()
slog.Info("usage stats enabled")
}
var rc *ollama.Registry
if useClient2 {
var err error
@@ -1632,6 +1669,9 @@ func Serve(ln net.Listener) error {
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-signals
if s.stats != nil {
s.stats.Stop()
}
srvr.Close()
schedDone()
sched.unloadAllRunners()
@@ -1649,6 +1689,24 @@ func Serve(ln net.Listener) error {
gpus := discover.GPUDevices(ctx, nil)
discover.LogDetails(gpus)
// Set GPU info for usage reporting
if s.stats != nil {
usage.GPUInfoFunc = func() []usage.GPU {
var result []usage.GPU
for _, gpu := range gpus {
result = append(result, usage.GPU{
Name: gpu.Name,
VRAMBytes: gpu.TotalMemory,
ComputeMajor: gpu.ComputeMajor,
ComputeMinor: gpu.ComputeMinor,
DriverMajor: gpu.DriverMajor,
DriverMinor: gpu.DriverMinor,
})
}
return result
}
}
var totalVRAM uint64
for _, gpu := range gpus {
totalVRAM += gpu.TotalMemory - envconfig.GpuOverhead()
@@ -1745,14 +1803,6 @@ func streamResponse(c *gin.Context, ch chan any) {
}
func (s *Server) WhoamiHandler(c *gin.Context) {
// Check local cache first
state, err := auth.GetSignInState()
if err == nil && state.Name != "" {
c.JSON(http.StatusOK, &api.UserResponse{Name: state.Name, Email: state.Email})
return
}
// No local cache - try network
// todo allow other hosts
u, err := url.Parse("https://ollama.com")
if err != nil {
@@ -1780,13 +1830,6 @@ func (s *Server) WhoamiHandler(c *gin.Context) {
return
}
// Cache successful result locally
if user != nil && user.Name != "" {
if err := auth.SetSignInState(&auth.SignInState{Name: user.Name, Email: user.Email}); err != nil {
slog.Warn("failed to cache sign-in state", "error", err)
}
}
c.JSON(http.StatusOK, user)
}
@@ -1820,11 +1863,6 @@ func (s *Server) SignoutHandler(c *gin.Context) {
return
}
// Clear local sign-in cache
if err := auth.ClearSignInState(); err != nil {
slog.Warn("failed to clear local sign-in state", "error", err)
}
c.JSON(http.StatusOK, nil)
}
@@ -1872,6 +1910,63 @@ func (s *Server) PsHandler(c *gin.Context) {
c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
}
func (s *Server) UsageHandler(c *gin.Context) {
// Get total VRAM used by Ollama
s.sched.loadedMu.Lock()
var totalOllamaVRAM uint64
for _, runner := range s.sched.loaded {
totalOllamaVRAM += runner.vramSize
}
s.sched.loadedMu.Unlock()
var resp api.UsageResponse
// Get GPU/device info
gpus := discover.GPUDevices(c.Request.Context(), nil)
// On Apple Silicon, use system memory instead of Metal's recommendedMaxWorkingSetSize
// because unified memory means GPU and CPU share the same physical RAM pool
var sysTotal, sysFree uint64
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
sysInfo := discover.GetSystemInfo()
sysTotal = sysInfo.TotalMemory
sysFree = sysInfo.FreeMemory
}
for _, gpu := range gpus {
total := gpu.TotalMemory
free := gpu.FreeMemory
// On Apple Silicon, override with system memory values
if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" && sysTotal > 0 {
total = sysTotal
free = sysFree
}
used := total - free
ollamaUsed := min(totalOllamaVRAM, used)
otherUsed := used - ollamaUsed
// Use Description for Name (actual device name like "Apple M2 Max")
// Fall back to backend name if Description is empty
name := gpu.Description
if name == "" {
name = gpu.Name
}
resp.GPUs = append(resp.GPUs, api.GPUUsage{
Name: name,
Backend: gpu.Library,
Total: total,
Free: free,
Used: ollamaUsed,
Other: otherUsed,
})
}
c.JSON(http.StatusOK, resp)
}
func toolCallId() string {
const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
b := make([]byte, 8)
@@ -2052,7 +2147,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
return
} else if err != nil {
handleScheduleError(c, req.Model, err)
s.handleScheduleError(c, req.Model, err)
return
}
@@ -2200,6 +2295,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.DoneReason = r.DoneReason.String()
res.TotalDuration = time.Since(checkpointStart)
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
s.usage(c, "chat", m.ShortName, m.Config.ModelFamily, r.PromptEvalCount, r.EvalCount, len(req.Tools) > 0)
}
if builtinParser != nil {
@@ -2375,6 +2471,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
resp.Message.ToolCalls = toolCalls
}
s.usage(c, "chat", m.ShortName, m.Config.ModelFamily, resp.PromptEvalCount, resp.EvalCount, len(toolCalls) > 0)
c.JSON(http.StatusOK, resp)
return
}
@@ -2382,7 +2479,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
streamResponse(c, ch)
}
func handleScheduleError(c *gin.Context, name string, err error) {
func (s *Server) handleScheduleError(c *gin.Context, name string, err error) {
s.usageError()
switch {
case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})

View File

@@ -0,0 +1,60 @@
package server
import (
"encoding/json"
"net/http"
"testing"
"github.com/gin-gonic/gin"
"github.com/ollama/ollama/api"
)
func TestUsageHandler(t *testing.T) {
gin.SetMode(gin.TestMode)
t.Run("empty server", func(t *testing.T) {
s := Server{
sched: &Scheduler{
loaded: make(map[string]*runnerRef),
},
}
w := createRequest(t, s.UsageHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
var resp api.UsageResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
// GPUs may or may not be present depending on system
// Just verify we can decode the response
})
t.Run("response structure", func(t *testing.T) {
s := Server{
sched: &Scheduler{
loaded: make(map[string]*runnerRef),
},
}
w := createRequest(t, s.UsageHandler, nil)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
// Verify we can decode the response as valid JSON
var resp map[string]any
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
// The response should be a valid object (not null)
if resp == nil {
t.Error("expected non-nil response")
}
})
}

65
server/usage/reporter.go Normal file
View File

@@ -0,0 +1,65 @@
package usage
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/ollama/ollama/version"
)
const (
reportTimeout = 10 * time.Second
usageURL = "https://ollama.com/api/usage"
)
// HeartbeatResponse is the response from the heartbeat endpoint.
type HeartbeatResponse struct {
UpdateVersion string `json:"update_version,omitempty"`
}
// UpdateAvailable returns the available update version, if any.
func (t *Stats) UpdateAvailable() string {
if v := t.updateAvailable.Load(); v != nil {
return v.(string)
}
return ""
}
// sendHeartbeat sends usage stats and checks for updates.
func (t *Stats) sendHeartbeat(payload *Payload) {
data, err := json.Marshal(payload)
if err != nil {
return
}
ctx, cancel := context.WithTimeout(context.Background(), reportTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, usageURL, bytes.NewReader(data))
if err != nil {
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", fmt.Sprintf("ollama/%s", version.Version))
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return
}
var heartbeat HeartbeatResponse
if err := json.NewDecoder(resp.Body).Decode(&heartbeat); err != nil {
return
}
t.updateAvailable.Store(heartbeat.UpdateVersion)
}

23
server/usage/source.go Normal file
View File

@@ -0,0 +1,23 @@
package usage
import (
"strings"
)
// API type constants
const (
APITypeOllama = "ollama"
APITypeOpenAI = "openai"
APITypeAnthropic = "anthropic"
)
// ClassifyAPIType determines the API type from the request path.
func ClassifyAPIType(path string) string {
if strings.HasPrefix(path, "/v1/messages") {
return APITypeAnthropic
}
if strings.HasPrefix(path, "/v1/") {
return APITypeOpenAI
}
return APITypeOllama
}

324
server/usage/usage.go Normal file
View File

@@ -0,0 +1,324 @@
// Package usage provides in-memory usage statistics collection and reporting.
package usage
import (
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/ollama/ollama/discover"
"github.com/ollama/ollama/version"
)
// Stats collects usage statistics in memory and reports them periodically.
type Stats struct {
mu sync.RWMutex
// Atomic counters for hot path
requestsTotal atomic.Int64
tokensPrompt atomic.Int64
tokensCompletion atomic.Int64
errorsTotal atomic.Int64
// Map-based counters (require lock)
endpoints map[string]int64
architectures map[string]int64
apis map[string]int64
models map[string]*ModelStats // per-model stats
// Feature usage
toolCalls atomic.Int64
structuredOutput atomic.Int64
// Update info (set by reporter after pinging update endpoint)
updateAvailable atomic.Value // string
// Reporter
stopCh chan struct{}
doneCh chan struct{}
interval time.Duration
endpoint string
}
// ModelStats tracks per-model usage statistics.
type ModelStats struct {
Requests int64
TokensInput int64
TokensOutput int64
}
// Request contains the data to record for a single request.
type Request struct {
Endpoint string // "chat", "generate", "embed"
Model string // model name (e.g., "llama3.2:3b")
Architecture string // model architecture (e.g., "llama", "qwen2")
APIType string // "native" or "openai_compat"
PromptTokens int
CompletionTokens int
UsedTools bool
StructuredOutput bool
}
// SystemInfo contains hardware information to report.
type SystemInfo struct {
OS string `json:"os"`
Arch string `json:"arch"`
CPUCores int `json:"cpu_cores"`
RAMBytes uint64 `json:"ram_bytes"`
GPUs []GPU `json:"gpus,omitempty"`
}
// GPU contains information about a GPU.
type GPU struct {
Name string `json:"name"`
VRAMBytes uint64 `json:"vram_bytes"`
ComputeMajor int `json:"compute_major,omitempty"`
ComputeMinor int `json:"compute_minor,omitempty"`
DriverMajor int `json:"driver_major,omitempty"`
DriverMinor int `json:"driver_minor,omitempty"`
}
// Payload is the data sent to the heartbeat endpoint.
type Payload struct {
Version string `json:"version"`
Time time.Time `json:"time"`
System SystemInfo `json:"system"`
Totals struct {
Requests int64 `json:"requests"`
Errors int64 `json:"errors"`
InputTokens int64 `json:"input_tokens"`
OutputTokens int64 `json:"output_tokens"`
} `json:"totals"`
Endpoints map[string]int64 `json:"endpoints"`
Architectures map[string]int64 `json:"architectures"`
APIs map[string]int64 `json:"apis"`
Features struct {
ToolCalls int64 `json:"tool_calls"`
StructuredOutput int64 `json:"structured_output"`
} `json:"features"`
}
const (
defaultInterval = 1 * time.Hour
)
// New creates a new Stats instance.
func New(opts ...Option) *Stats {
t := &Stats{
endpoints: make(map[string]int64),
architectures: make(map[string]int64),
apis: make(map[string]int64),
models: make(map[string]*ModelStats),
stopCh: make(chan struct{}),
doneCh: make(chan struct{}),
interval: defaultInterval,
}
for _, opt := range opts {
opt(t)
}
return t
}
// Option configures the Stats instance.
type Option func(*Stats)
// WithInterval sets the reporting interval.
func WithInterval(d time.Duration) Option {
return func(t *Stats) {
t.interval = d
}
}
// Record records a request. This is the hot path and should be fast.
func (t *Stats) Record(r *Request) {
t.requestsTotal.Add(1)
t.tokensPrompt.Add(int64(r.PromptTokens))
t.tokensCompletion.Add(int64(r.CompletionTokens))
if r.UsedTools {
t.toolCalls.Add(1)
}
if r.StructuredOutput {
t.structuredOutput.Add(1)
}
t.mu.Lock()
t.endpoints[r.Endpoint]++
t.architectures[r.Architecture]++
t.apis[r.APIType]++
// Track per-model stats
if r.Model != "" {
if t.models[r.Model] == nil {
t.models[r.Model] = &ModelStats{}
}
t.models[r.Model].Requests++
t.models[r.Model].TokensInput += int64(r.PromptTokens)
t.models[r.Model].TokensOutput += int64(r.CompletionTokens)
}
t.mu.Unlock()
}
// RecordError records a failed request.
func (t *Stats) RecordError() {
t.errorsTotal.Add(1)
}
// GetModelStats returns a copy of per-model statistics.
func (t *Stats) GetModelStats() map[string]*ModelStats {
t.mu.RLock()
defer t.mu.RUnlock()
result := make(map[string]*ModelStats, len(t.models))
for k, v := range t.models {
result[k] = &ModelStats{
Requests: v.Requests,
TokensInput: v.TokensInput,
TokensOutput: v.TokensOutput,
}
}
return result
}
// View returns current stats without resetting counters.
func (t *Stats) View() *Payload {
t.mu.RLock()
defer t.mu.RUnlock()
now := time.Now()
// Copy maps
endpoints := make(map[string]int64, len(t.endpoints))
for k, v := range t.endpoints {
endpoints[k] = v
}
architectures := make(map[string]int64, len(t.architectures))
for k, v := range t.architectures {
architectures[k] = v
}
apis := make(map[string]int64, len(t.apis))
for k, v := range t.apis {
apis[k] = v
}
p := &Payload{
Version: version.Version,
Time: now,
System: getSystemInfo(),
Endpoints: endpoints,
Architectures: architectures,
APIs: apis,
}
p.Totals.Requests = t.requestsTotal.Load()
p.Totals.Errors = t.errorsTotal.Load()
p.Totals.InputTokens = t.tokensPrompt.Load()
p.Totals.OutputTokens = t.tokensCompletion.Load()
p.Features.ToolCalls = t.toolCalls.Load()
p.Features.StructuredOutput = t.structuredOutput.Load()
return p
}
// Snapshot returns current stats and resets counters.
func (t *Stats) Snapshot() *Payload {
t.mu.Lock()
defer t.mu.Unlock()
now := time.Now()
p := &Payload{
Version: version.Version,
Time: now,
System: getSystemInfo(),
Endpoints: t.endpoints,
Architectures: t.architectures,
APIs: t.apis,
}
p.Totals.Requests = t.requestsTotal.Swap(0)
p.Totals.Errors = t.errorsTotal.Swap(0)
p.Totals.InputTokens = t.tokensPrompt.Swap(0)
p.Totals.OutputTokens = t.tokensCompletion.Swap(0)
p.Features.ToolCalls = t.toolCalls.Swap(0)
p.Features.StructuredOutput = t.structuredOutput.Swap(0)
// Reset maps
t.endpoints = make(map[string]int64)
t.architectures = make(map[string]int64)
t.apis = make(map[string]int64)
return p
}
// getSystemInfo collects hardware information.
func getSystemInfo() SystemInfo {
info := SystemInfo{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
}
// Get CPU and memory info
sysInfo := discover.GetSystemInfo()
info.CPUCores = sysInfo.ThreadCount
info.RAMBytes = sysInfo.TotalMemory
// Get GPU info
gpus := getGPUInfo()
info.GPUs = gpus
return info
}
// GPUInfoFunc is a function that returns GPU information.
// It's set by the server package after GPU discovery.
var GPUInfoFunc func() []GPU
// getGPUInfo collects GPU information.
func getGPUInfo() []GPU {
if GPUInfoFunc != nil {
return GPUInfoFunc()
}
return nil
}
// Start begins the periodic reporting goroutine.
func (t *Stats) Start() {
go t.reportLoop()
}
// Stop stops reporting and waits for the final report.
func (t *Stats) Stop() {
close(t.stopCh)
<-t.doneCh
}
// reportLoop runs the periodic reporting.
func (t *Stats) reportLoop() {
defer close(t.doneCh)
ticker := time.NewTicker(t.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
t.report()
case <-t.stopCh:
// Send final report before stopping
t.report()
return
}
}
}
// report sends usage stats and checks for updates.
func (t *Stats) report() {
payload := t.Snapshot()
t.sendHeartbeat(payload)
}

194
server/usage/usage_test.go Normal file
View File

@@ -0,0 +1,194 @@
package usage
import (
"testing"
)
func TestNew(t *testing.T) {
stats := New()
if stats == nil {
t.Fatal("New() returned nil")
}
}
func TestRecord(t *testing.T) {
stats := New()
stats.Record(&Request{
Model: "llama3:8b",
Endpoint: "chat",
Architecture: "llama",
APIType: "native",
PromptTokens: 100,
CompletionTokens: 50,
UsedTools: true,
StructuredOutput: false,
})
// Check totals
payload := stats.View()
if payload.Totals.Requests != 1 {
t.Errorf("expected 1 request, got %d", payload.Totals.Requests)
}
if payload.Totals.InputTokens != 100 {
t.Errorf("expected 100 prompt tokens, got %d", payload.Totals.InputTokens)
}
if payload.Totals.OutputTokens != 50 {
t.Errorf("expected 50 completion tokens, got %d", payload.Totals.OutputTokens)
}
if payload.Features.ToolCalls != 1 {
t.Errorf("expected 1 tool call, got %d", payload.Features.ToolCalls)
}
if payload.Features.StructuredOutput != 0 {
t.Errorf("expected 0 structured outputs, got %d", payload.Features.StructuredOutput)
}
}
func TestGetModelStats(t *testing.T) {
stats := New()
// Record requests for multiple models
stats.Record(&Request{
Model: "llama3:8b",
PromptTokens: 100,
CompletionTokens: 50,
})
stats.Record(&Request{
Model: "llama3:8b",
PromptTokens: 200,
CompletionTokens: 100,
})
stats.Record(&Request{
Model: "mistral:7b",
PromptTokens: 50,
CompletionTokens: 25,
})
modelStats := stats.GetModelStats()
// Check llama3:8b stats
llama := modelStats["llama3:8b"]
if llama == nil {
t.Fatal("expected llama3:8b stats")
}
if llama.Requests != 2 {
t.Errorf("expected 2 requests for llama3:8b, got %d", llama.Requests)
}
if llama.TokensInput != 300 {
t.Errorf("expected 300 input tokens for llama3:8b, got %d", llama.TokensInput)
}
if llama.TokensOutput != 150 {
t.Errorf("expected 150 output tokens for llama3:8b, got %d", llama.TokensOutput)
}
// Check mistral:7b stats
mistral := modelStats["mistral:7b"]
if mistral == nil {
t.Fatal("expected mistral:7b stats")
}
if mistral.Requests != 1 {
t.Errorf("expected 1 request for mistral:7b, got %d", mistral.Requests)
}
if mistral.TokensInput != 50 {
t.Errorf("expected 50 input tokens for mistral:7b, got %d", mistral.TokensInput)
}
if mistral.TokensOutput != 25 {
t.Errorf("expected 25 output tokens for mistral:7b, got %d", mistral.TokensOutput)
}
}
func TestRecordError(t *testing.T) {
stats := New()
stats.RecordError()
stats.RecordError()
payload := stats.View()
if payload.Totals.Errors != 2 {
t.Errorf("expected 2 errors, got %d", payload.Totals.Errors)
}
}
func TestView(t *testing.T) {
stats := New()
stats.Record(&Request{
Model: "llama3:8b",
Endpoint: "chat",
Architecture: "llama",
APIType: "native",
})
// First view
_ = stats.View()
// View should not reset counters
payload := stats.View()
if payload.Totals.Requests != 1 {
t.Errorf("View should not reset counters, expected 1 request, got %d", payload.Totals.Requests)
}
}
func TestSnapshot(t *testing.T) {
stats := New()
stats.Record(&Request{
Model: "llama3:8b",
Endpoint: "chat",
PromptTokens: 100,
CompletionTokens: 50,
})
// Snapshot should return data and reset counters
snapshot := stats.Snapshot()
if snapshot.Totals.Requests != 1 {
t.Errorf("expected 1 request in snapshot, got %d", snapshot.Totals.Requests)
}
// After snapshot, counters should be reset
payload2 := stats.View()
if payload2.Totals.Requests != 0 {
t.Errorf("expected 0 requests after snapshot, got %d", payload2.Totals.Requests)
}
}
func TestConcurrentAccess(t *testing.T) {
stats := New()
done := make(chan bool)
// Concurrent writes
for i := 0; i < 10; i++ {
go func() {
for j := 0; j < 100; j++ {
stats.Record(&Request{
Model: "llama3:8b",
PromptTokens: 10,
CompletionTokens: 5,
})
}
done <- true
}()
}
// Concurrent reads
for i := 0; i < 5; i++ {
go func() {
for j := 0; j < 100; j++ {
_ = stats.View()
_ = stats.GetModelStats()
}
done <- true
}()
}
// Wait for all goroutines
for i := 0; i < 15; i++ {
<-done
}
payload := stats.View()
if payload.Totals.Requests != 1000 {
t.Errorf("expected 1000 requests, got %d", payload.Totals.Requests)
}
}