mirror of
https://github.com/ollama/ollama.git
synced 2026-02-27 20:46:49 -05:00
Compare commits
3 Commits
pdevine/sa
...
brucemacd/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ac1300805 | ||
|
|
43d9907dd6 | ||
|
|
91dc088e8b |
13
api/types.go
13
api/types.go
@@ -922,6 +922,19 @@ type UserResponse struct {
|
|||||||
Plan string `json:"plan,omitempty"`
|
Plan string `json:"plan,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UsageResponse struct {
|
||||||
|
// Start is the time the server started tracking usage (UTC, RFC 3339).
|
||||||
|
Start time.Time `json:"start"`
|
||||||
|
Usage []ModelUsageData `json:"usage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelUsageData struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Requests int64 `json:"requests"`
|
||||||
|
PromptTokens int64 `json:"prompt_tokens"`
|
||||||
|
CompletionTokens int64 `json:"completion_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
// Tensor describes the metadata for a given tensor.
|
// Tensor describes the metadata for a given tensor.
|
||||||
type Tensor struct {
|
type Tensor struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
|||||||
48
docs/api.md
48
docs/api.md
@@ -15,6 +15,7 @@
|
|||||||
- [Push a Model](#push-a-model)
|
- [Push a Model](#push-a-model)
|
||||||
- [Generate Embeddings](#generate-embeddings)
|
- [Generate Embeddings](#generate-embeddings)
|
||||||
- [List Running Models](#list-running-models)
|
- [List Running Models](#list-running-models)
|
||||||
|
- [Usage](#usage)
|
||||||
- [Version](#version)
|
- [Version](#version)
|
||||||
- [Experimental: Image Generation](#image-generation-experimental)
|
- [Experimental: Image Generation](#image-generation-experimental)
|
||||||
|
|
||||||
@@ -1854,6 +1855,53 @@ curl http://localhost:11434/api/embeddings -d '{
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/usage
|
||||||
|
```
|
||||||
|
|
||||||
|
Show aggregate usage statistics per model since the server started. All timestamps are UTC in RFC 3339 format.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
#### Request
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:11434/api/usage
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"start": "2025-01-27T20:00:00Z",
|
||||||
|
"usage": [
|
||||||
|
{
|
||||||
|
"model": "llama3.2",
|
||||||
|
"requests": 5,
|
||||||
|
"prompt_tokens": 130,
|
||||||
|
"completion_tokens": 890
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "deepseek-r1",
|
||||||
|
"requests": 2,
|
||||||
|
"prompt_tokens": 48,
|
||||||
|
"completion_tokens": 312
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response fields
|
||||||
|
|
||||||
|
- `start`: when the server started tracking usage (UTC, RFC 3339)
|
||||||
|
- `usage`: list of per-model usage statistics
|
||||||
|
- `model`: model name
|
||||||
|
- `requests`: total number of completed requests
|
||||||
|
- `prompt_tokens`: total prompt tokens evaluated
|
||||||
|
- `completion_tokens`: total completion tokens generated
|
||||||
|
|
||||||
## Version
|
## Version
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -91,6 +91,8 @@ type Server struct {
|
|||||||
aliasesOnce sync.Once
|
aliasesOnce sync.Once
|
||||||
aliases *store
|
aliases *store
|
||||||
aliasesErr error
|
aliasesErr error
|
||||||
|
lowVRAM bool
|
||||||
|
usage *UsageTracker
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@@ -289,6 +291,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
c.Header("Content-Type", contentType)
|
c.Header("Content-Type", contentType)
|
||||||
|
|
||||||
fn := func(resp api.GenerateResponse) error {
|
fn := func(resp api.GenerateResponse) error {
|
||||||
|
if resp.Done {
|
||||||
|
s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
|
||||||
|
}
|
||||||
|
|
||||||
resp.Model = origModel
|
resp.Model = origModel
|
||||||
resp.RemoteModel = m.Config.RemoteModel
|
resp.RemoteModel = m.Config.RemoteModel
|
||||||
resp.RemoteHost = m.Config.RemoteHost
|
resp.RemoteHost = m.Config.RemoteHost
|
||||||
@@ -595,6 +601,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
res.Context = tokens
|
res.Context = tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s.usage.Record(req.Model, cr.PromptEvalCount, cr.EvalCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
if builtinParser != nil {
|
if builtinParser != nil {
|
||||||
@@ -1622,6 +1630,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||||||
r.POST("/api/experimental/aliases", s.CreateAliasHandler)
|
r.POST("/api/experimental/aliases", s.CreateAliasHandler)
|
||||||
r.DELETE("/api/experimental/aliases", s.DeleteAliasHandler)
|
r.DELETE("/api/experimental/aliases", s.DeleteAliasHandler)
|
||||||
|
|
||||||
|
r.GET("/api/usage", s.UsageHandler)
|
||||||
|
|
||||||
// Inference
|
// Inference
|
||||||
r.GET("/api/ps", s.PsHandler)
|
r.GET("/api/ps", s.PsHandler)
|
||||||
r.POST("/api/generate", s.GenerateHandler)
|
r.POST("/api/generate", s.GenerateHandler)
|
||||||
@@ -1692,7 +1702,7 @@ func Serve(ln net.Listener) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s := &Server{addr: ln.Addr()}
|
s := &Server{addr: ln.Addr(), usage: NewUsageTracker()}
|
||||||
|
|
||||||
var rc *ollama.Registry
|
var rc *ollama.Registry
|
||||||
if useClient2 {
|
if useClient2 {
|
||||||
@@ -1927,6 +1937,10 @@ func (s *Server) SignoutHandler(c *gin.Context) {
|
|||||||
c.JSON(http.StatusOK, nil)
|
c.JSON(http.StatusOK, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Server) UsageHandler(c *gin.Context) {
|
||||||
|
c.JSON(http.StatusOK, s.usage.Stats())
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Server) PsHandler(c *gin.Context) {
|
func (s *Server) PsHandler(c *gin.Context) {
|
||||||
models := []api.ProcessModelResponse{}
|
models := []api.ProcessModelResponse{}
|
||||||
|
|
||||||
@@ -2097,6 +2111,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
c.Header("Content-Type", contentType)
|
c.Header("Content-Type", contentType)
|
||||||
|
|
||||||
fn := func(resp api.ChatResponse) error {
|
fn := func(resp api.ChatResponse) error {
|
||||||
|
if resp.Done {
|
||||||
|
s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
|
||||||
|
}
|
||||||
|
|
||||||
resp.Model = origModel
|
resp.Model = origModel
|
||||||
resp.RemoteModel = m.Config.RemoteModel
|
resp.RemoteModel = m.Config.RemoteModel
|
||||||
resp.RemoteHost = m.Config.RemoteHost
|
resp.RemoteHost = m.Config.RemoteHost
|
||||||
@@ -2317,6 +2335,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
res.DoneReason = r.DoneReason.String()
|
res.DoneReason = r.DoneReason.String()
|
||||||
res.TotalDuration = time.Since(checkpointStart)
|
res.TotalDuration = time.Since(checkpointStart)
|
||||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||||
|
|
||||||
|
s.usage.Record(req.Model, r.PromptEvalCount, r.EvalCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
if builtinParser != nil {
|
if builtinParser != nil {
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -224,6 +225,7 @@ func TestChatDebugRenderOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -220,6 +221,7 @@ func TestGenerateWithDebugRenderOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
|
|||||||
@@ -88,19 +88,39 @@ func TestGenerateChatRemote(t *testing.T) {
|
|||||||
if r.Method != http.MethodPost {
|
if r.Method != http.MethodPost {
|
||||||
t.Errorf("Expected POST request, got %s", r.Method)
|
t.Errorf("Expected POST request, got %s", r.Method)
|
||||||
}
|
}
|
||||||
if r.URL.Path != "/api/chat" {
|
|
||||||
t.Errorf("Expected path '/api/chat', got %s", r.URL.Path)
|
|
||||||
}
|
|
||||||
|
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
resp := api.ChatResponse{
|
|
||||||
Model: "test",
|
switch r.URL.Path {
|
||||||
Done: true,
|
case "/api/chat":
|
||||||
DoneReason: "load",
|
resp := api.ChatResponse{
|
||||||
}
|
Model: "test",
|
||||||
if err := json.NewEncoder(w).Encode(&resp); err != nil {
|
Done: true,
|
||||||
t.Fatal(err)
|
DoneReason: "load",
|
||||||
|
Metrics: api.Metrics{
|
||||||
|
PromptEvalCount: 10,
|
||||||
|
EvalCount: 20,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(w).Encode(&resp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
case "/api/generate":
|
||||||
|
resp := api.GenerateResponse{
|
||||||
|
Model: "test",
|
||||||
|
Done: true,
|
||||||
|
DoneReason: "stop",
|
||||||
|
Metrics: api.Metrics{
|
||||||
|
PromptEvalCount: 5,
|
||||||
|
EvalCount: 15,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(w).Encode(&resp); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
t.Errorf("unexpected path %s", r.URL.Path)
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
defer rs.Close()
|
defer rs.Close()
|
||||||
@@ -111,7 +131,7 @@ func TestGenerateChatRemote(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t.Setenv("OLLAMA_REMOTES", p.Hostname())
|
t.Setenv("OLLAMA_REMOTES", p.Hostname())
|
||||||
s := Server{}
|
s := Server{usage: NewUsageTracker()}
|
||||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||||
Model: "test-cloud",
|
Model: "test-cloud",
|
||||||
RemoteHost: rs.URL,
|
RemoteHost: rs.URL,
|
||||||
@@ -159,6 +179,61 @@ func TestGenerateChatRemote(t *testing.T) {
|
|||||||
t.Errorf("expected done reason load, got %s", actual.DoneReason)
|
t.Errorf("expected done reason load, got %s", actual.DoneReason)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("remote chat usage tracking", func(t *testing.T) {
|
||||||
|
stats := s.usage.Stats()
|
||||||
|
found := false
|
||||||
|
for _, m := range stats.Usage {
|
||||||
|
if m.Model == "test-cloud" {
|
||||||
|
found = true
|
||||||
|
if m.Requests != 1 {
|
||||||
|
t.Errorf("expected 1 request, got %d", m.Requests)
|
||||||
|
}
|
||||||
|
if m.PromptTokens != 10 {
|
||||||
|
t.Errorf("expected 10 prompt tokens, got %d", m.PromptTokens)
|
||||||
|
}
|
||||||
|
if m.CompletionTokens != 20 {
|
||||||
|
t.Errorf("expected 20 completion tokens, got %d", m.CompletionTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Error("expected usage entry for test-cloud")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("remote generate usage tracking", func(t *testing.T) {
|
||||||
|
// Reset the tracker for a clean test
|
||||||
|
s.usage = NewUsageTracker()
|
||||||
|
|
||||||
|
w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
|
||||||
|
Model: "test-cloud",
|
||||||
|
Prompt: "hello",
|
||||||
|
})
|
||||||
|
if w.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status 200, got %d", w.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats := s.usage.Stats()
|
||||||
|
found := false
|
||||||
|
for _, m := range stats.Usage {
|
||||||
|
if m.Model == "test-cloud" {
|
||||||
|
found = true
|
||||||
|
if m.Requests != 1 {
|
||||||
|
t.Errorf("expected 1 request, got %d", m.Requests)
|
||||||
|
}
|
||||||
|
if m.PromptTokens != 5 {
|
||||||
|
t.Errorf("expected 5 prompt tokens, got %d", m.PromptTokens)
|
||||||
|
}
|
||||||
|
if m.CompletionTokens != 15 {
|
||||||
|
t.Errorf("expected 15 completion tokens, got %d", m.CompletionTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Error("expected usage entry for test-cloud")
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGenerateChat(t *testing.T) {
|
func TestGenerateChat(t *testing.T) {
|
||||||
@@ -177,6 +252,7 @@ func TestGenerateChat(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -894,6 +970,7 @@ func TestGenerate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -1378,6 +1455,7 @@ func TestGenerateLogprobs(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := &Server{
|
s := &Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -1558,6 +1636,7 @@ func TestChatLogprobs(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := &Server{
|
s := &Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -1668,6 +1747,7 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := &Server{
|
s := &Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -2114,6 +2194,7 @@ func TestGenerateUnload(t *testing.T) {
|
|||||||
var loadFnCalled bool
|
var loadFnCalled bool
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -2215,6 +2296,7 @@ func TestGenerateWithImages(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -2393,6 +2475,7 @@ func TestImageGenerateStreamFalse(t *testing.T) {
|
|||||||
|
|
||||||
opts := api.DefaultOptions()
|
opts := api.DefaultOptions()
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
|
|||||||
@@ -255,6 +255,7 @@ func TestChatHarmonyParserStreamingRealtime(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -406,6 +407,7 @@ func TestChatHarmonyParserStreamingSimple(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
@@ -588,6 +590,7 @@ func TestChatHarmonyParserStreaming(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s := Server{
|
s := Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
sched: &Scheduler{
|
sched: &Scheduler{
|
||||||
pendingReqCh: make(chan *LlmRequest, 1),
|
pendingReqCh: make(chan *LlmRequest, 1),
|
||||||
finishedReqCh: make(chan *LlmRequest, 1),
|
finishedReqCh: make(chan *LlmRequest, 1),
|
||||||
|
|||||||
62
server/usage.go
Normal file
62
server/usage.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ModelUsage struct {
|
||||||
|
Requests int64
|
||||||
|
PromptTokens int64
|
||||||
|
CompletionTokens int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type UsageTracker struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
start time.Time
|
||||||
|
models map[string]*ModelUsage
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUsageTracker() *UsageTracker {
|
||||||
|
return &UsageTracker{
|
||||||
|
start: time.Now().UTC(),
|
||||||
|
models: make(map[string]*ModelUsage),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *UsageTracker) Record(model string, promptTokens, completionTokens int) {
|
||||||
|
u.mu.Lock()
|
||||||
|
defer u.mu.Unlock()
|
||||||
|
|
||||||
|
m, ok := u.models[model]
|
||||||
|
if !ok {
|
||||||
|
m = &ModelUsage{}
|
||||||
|
u.models[model] = m
|
||||||
|
}
|
||||||
|
|
||||||
|
m.Requests++
|
||||||
|
m.PromptTokens += int64(promptTokens)
|
||||||
|
m.CompletionTokens += int64(completionTokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *UsageTracker) Stats() api.UsageResponse {
|
||||||
|
u.mu.Lock()
|
||||||
|
defer u.mu.Unlock()
|
||||||
|
|
||||||
|
byModel := make([]api.ModelUsageData, 0, len(u.models))
|
||||||
|
for model, usage := range u.models {
|
||||||
|
byModel = append(byModel, api.ModelUsageData{
|
||||||
|
Model: model,
|
||||||
|
Requests: usage.Requests,
|
||||||
|
PromptTokens: usage.PromptTokens,
|
||||||
|
CompletionTokens: usage.CompletionTokens,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return api.UsageResponse{
|
||||||
|
Start: u.start,
|
||||||
|
Usage: byModel,
|
||||||
|
}
|
||||||
|
}
|
||||||
136
server/usage_test.go
Normal file
136
server/usage_test.go
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUsageTrackerRecord(t *testing.T) {
|
||||||
|
tracker := NewUsageTracker()
|
||||||
|
|
||||||
|
tracker.Record("model-a", 10, 20)
|
||||||
|
tracker.Record("model-a", 5, 15)
|
||||||
|
tracker.Record("model-b", 100, 200)
|
||||||
|
|
||||||
|
stats := tracker.Stats()
|
||||||
|
|
||||||
|
if len(stats.Usage) != 2 {
|
||||||
|
t.Fatalf("expected 2 models, got %d", len(stats.Usage))
|
||||||
|
}
|
||||||
|
|
||||||
|
lookup := make(map[string]api.ModelUsageData)
|
||||||
|
for _, m := range stats.Usage {
|
||||||
|
lookup[m.Model] = m
|
||||||
|
}
|
||||||
|
|
||||||
|
a := lookup["model-a"]
|
||||||
|
if a.Requests != 2 {
|
||||||
|
t.Errorf("model-a requests: expected 2, got %d", a.Requests)
|
||||||
|
}
|
||||||
|
if a.PromptTokens != 15 {
|
||||||
|
t.Errorf("model-a prompt tokens: expected 15, got %d", a.PromptTokens)
|
||||||
|
}
|
||||||
|
if a.CompletionTokens != 35 {
|
||||||
|
t.Errorf("model-a completion tokens: expected 35, got %d", a.CompletionTokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
b := lookup["model-b"]
|
||||||
|
if b.Requests != 1 {
|
||||||
|
t.Errorf("model-b requests: expected 1, got %d", b.Requests)
|
||||||
|
}
|
||||||
|
if b.PromptTokens != 100 {
|
||||||
|
t.Errorf("model-b prompt tokens: expected 100, got %d", b.PromptTokens)
|
||||||
|
}
|
||||||
|
if b.CompletionTokens != 200 {
|
||||||
|
t.Errorf("model-b completion tokens: expected 200, got %d", b.CompletionTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUsageTrackerConcurrent(t *testing.T) {
|
||||||
|
tracker := NewUsageTracker()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for range 100 {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
tracker.Record("model-a", 1, 2)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
stats := tracker.Stats()
|
||||||
|
if len(stats.Usage) != 1 {
|
||||||
|
t.Fatalf("expected 1 model, got %d", len(stats.Usage))
|
||||||
|
}
|
||||||
|
|
||||||
|
m := stats.Usage[0]
|
||||||
|
if m.Requests != 100 {
|
||||||
|
t.Errorf("requests: expected 100, got %d", m.Requests)
|
||||||
|
}
|
||||||
|
if m.PromptTokens != 100 {
|
||||||
|
t.Errorf("prompt tokens: expected 100, got %d", m.PromptTokens)
|
||||||
|
}
|
||||||
|
if m.CompletionTokens != 200 {
|
||||||
|
t.Errorf("completion tokens: expected 200, got %d", m.CompletionTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUsageTrackerStart(t *testing.T) {
|
||||||
|
tracker := NewUsageTracker()
|
||||||
|
|
||||||
|
stats := tracker.Stats()
|
||||||
|
if stats.Start.IsZero() {
|
||||||
|
t.Error("expected non-zero start time")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUsageHandler(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
s := &Server{
|
||||||
|
usage: NewUsageTracker(),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.usage.Record("llama3", 50, 100)
|
||||||
|
s.usage.Record("llama3", 25, 50)
|
||||||
|
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
c, _ := gin.CreateTestContext(w)
|
||||||
|
c.Request = httptest.NewRequest(http.MethodGet, "/api/usage", nil)
|
||||||
|
|
||||||
|
s.UsageHandler(c)
|
||||||
|
|
||||||
|
if w.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status 200, got %d", w.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
var resp api.UsageResponse
|
||||||
|
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatalf("failed to unmarshal response: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(resp.Usage) != 1 {
|
||||||
|
t.Fatalf("expected 1 model, got %d", len(resp.Usage))
|
||||||
|
}
|
||||||
|
|
||||||
|
m := resp.Usage[0]
|
||||||
|
if m.Model != "llama3" {
|
||||||
|
t.Errorf("expected model llama3, got %s", m.Model)
|
||||||
|
}
|
||||||
|
if m.Requests != 2 {
|
||||||
|
t.Errorf("expected 2 requests, got %d", m.Requests)
|
||||||
|
}
|
||||||
|
if m.PromptTokens != 75 {
|
||||||
|
t.Errorf("expected 75 prompt tokens, got %d", m.PromptTokens)
|
||||||
|
}
|
||||||
|
if m.CompletionTokens != 150 {
|
||||||
|
t.Errorf("expected 150 completion tokens, got %d", m.CompletionTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user