Compare commits

..

5 Commits

Author SHA1 Message Date
Bruce MacDonald
9d02d1d767 install: prevent partial download script execution (#14311)
Wrap script in main function so that a truncated partial download doesn't end up executing half a script.
2026-02-18 18:32:45 -08:00
Bruce MacDonald
1a636fb47a cmd: set codex env vars on launch and handle zstd request bodies (#14122)
The Codex runner was not setting OPENAI_BASE_URL or OPENAI_API_KEY, this prevents Codex from sending requests to api.openai.com instead of the local Ollama server. This mirrors the approach used by the Claude runner.

Codex v0.98.0 sends zstd-compressed request bodies to the /v1/responses endpoint. Add decompression support in ResponsesMiddleware with an 8MB max decompressed size limit to prevent resource exhaustion.
2026-02-18 17:19:36 -08:00
Patrick Devine
0759fface9 Revert "chore: update mlx-c bindings to 0.5.0 (#14303)" (#14316)
This reverts commit f01a9a7859.
2026-02-18 17:01:25 -08:00
Parth Sareen
325b72bc31 cmd/tui: default to single-select for editor integrations (#14302) 2026-02-17 18:17:27 -08:00
Patrick Devine
f01a9a7859 chore: update mlx-c bindings to 0.5.0 (#14303) 2026-02-17 16:48:16 -08:00
18 changed files with 406 additions and 409 deletions

View File

@@ -922,19 +922,6 @@ type UserResponse struct {
Plan string `json:"plan,omitempty"`
}
type UsageResponse struct {
// Start is the time the server started tracking usage (UTC, RFC 3339).
Start time.Time `json:"start"`
Usage []ModelUsageData `json:"usage"`
}
type ModelUsageData struct {
Model string `json:"model"`
Requests int64 `json:"requests"`
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
}
// Tensor describes the metadata for a given tensor.
type Tensor struct {
Name string `json:"name"`

View File

@@ -6,6 +6,7 @@ import (
"os/exec"
"strings"
"github.com/ollama/ollama/envconfig"
"golang.org/x/mod/semver"
)
@@ -32,6 +33,10 @@ func (c *Codex) Run(model string, args []string) error {
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Env = append(os.Environ(),
"OPENAI_BASE_URL="+envconfig.Host().String()+"/v1/",
"OPENAI_API_KEY=ollama",
)
return cmd.Run()
}

View File

@@ -415,6 +415,12 @@ type multiSelectorModel struct {
cancelled bool
confirmed bool
width int
// multi enables full multi-select editing mode. The zero value (false)
// shows a single-select picker where Enter adds the chosen model to
// the existing list. Tab toggles between modes.
multi bool
singleAdd string // model picked in single mode
}
func newMultiSelectorModel(title string, items []SelectItem, preChecked []string) multiSelectorModel {
@@ -429,13 +435,23 @@ func newMultiSelectorModel(title string, items []SelectItem, preChecked []string
m.itemIndex[item.Name] = i
}
for _, name := range preChecked {
if idx, ok := m.itemIndex[name]; ok {
// Reverse order so preChecked[0] (the current default) ends up last
// in checkOrder, matching the "last checked = default" convention.
for i := len(preChecked) - 1; i >= 0; i-- {
if idx, ok := m.itemIndex[preChecked[i]]; ok {
m.checked[idx] = true
m.checkOrder = append(m.checkOrder, idx)
}
}
// Position cursor on the current default model
if len(preChecked) > 0 {
if idx, ok := m.itemIndex[preChecked[0]]; ok {
m.cursor = idx
m.updateScroll(m.otherStart())
}
}
return m
}
@@ -546,14 +562,25 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.cancelled = true
return m, tea.Quit
case tea.KeyTab:
m.multi = !m.multi
case tea.KeyEnter:
if len(m.checkOrder) > 0 {
if !m.multi {
if len(filtered) > 0 && m.cursor < len(filtered) {
m.singleAdd = filtered[m.cursor].Name
m.confirmed = true
return m, tea.Quit
}
} else if len(m.checkOrder) > 0 {
m.confirmed = true
return m, tea.Quit
}
case tea.KeySpace:
m.toggleItem()
if m.multi {
m.toggleItem()
}
case tea.KeyUp:
if m.cursor > 0 {
@@ -592,7 +619,9 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
// On some terminals (e.g. Windows PowerShell), space arrives as
// KeyRunes instead of KeySpace. Intercept it so toggle still works.
if len(msg.Runes) == 1 && msg.Runes[0] == ' ' {
m.toggleItem()
if m.multi {
m.toggleItem()
}
} else {
m.filter += string(msg.Runes)
m.cursor = 0
@@ -604,6 +633,19 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
return m, nil
}
func (m multiSelectorModel) renderSingleItem(s *strings.Builder, item SelectItem, idx int) {
if idx == m.cursor {
s.WriteString(selectorSelectedItemStyle.Render("▸ " + item.Name))
} else {
s.WriteString(selectorItemStyle.Render(item.Name))
}
s.WriteString("\n")
if item.Description != "" {
s.WriteString(selectorDescLineStyle.Render(item.Description))
s.WriteString("\n")
}
}
func (m multiSelectorModel) renderMultiItem(s *strings.Builder, item SelectItem, idx int) {
origIdx := m.itemIndex[item.Name]
@@ -615,7 +657,7 @@ func (m multiSelectorModel) renderMultiItem(s *strings.Builder, item SelectItem,
}
suffix := ""
if len(m.checkOrder) > 0 && m.checkOrder[0] == origIdx {
if len(m.checkOrder) > 0 && m.checkOrder[len(m.checkOrder)-1] == origIdx {
suffix = " " + selectorDefaultTagStyle.Render("(default)")
}
@@ -637,6 +679,11 @@ func (m multiSelectorModel) View() string {
return ""
}
renderItem := m.renderSingleItem
if m.multi {
renderItem = m.renderMultiItem
}
var s strings.Builder
s.WriteString(selectorTitleStyle.Render(m.title))
@@ -661,7 +708,7 @@ func (m multiSelectorModel) View() string {
if idx >= len(filtered) {
break
}
m.renderMultiItem(&s, filtered[idx], idx)
renderItem(&s, filtered[idx], idx)
}
if remaining := len(filtered) - m.scrollOffset - displayCount; remaining > 0 {
@@ -684,7 +731,7 @@ func (m multiSelectorModel) View() string {
s.WriteString(sectionHeaderStyle.Render("Recommended"))
s.WriteString("\n")
for _, idx := range recItems {
m.renderMultiItem(&s, filtered[idx], idx)
renderItem(&s, filtered[idx], idx)
}
}
@@ -704,7 +751,7 @@ func (m multiSelectorModel) View() string {
if idx >= len(otherItems) {
break
}
m.renderMultiItem(&s, filtered[otherItems[idx]], otherItems[idx])
renderItem(&s, filtered[otherItems[idx]], otherItems[idx])
}
if remaining := len(otherItems) - m.scrollOffset - displayCount; remaining > 0 {
@@ -716,15 +763,18 @@ func (m multiSelectorModel) View() string {
s.WriteString("\n")
count := m.selectedCount()
if count == 0 {
s.WriteString(selectorDescStyle.Render(" Select at least one model."))
if !m.multi {
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
} else {
s.WriteString(selectorDescStyle.Render(fmt.Sprintf(" %d selected - press enter to continue", count)))
count := m.selectedCount()
if count == 0 {
s.WriteString(selectorDescStyle.Render(" Select at least one model."))
} else {
s.WriteString(selectorDescStyle.Render(fmt.Sprintf(" %d selected - press enter to continue", count)))
}
s.WriteString("\n\n")
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
}
s.WriteString("\n\n")
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • enter confirm • esc cancel"))
result := s.String()
if m.width > 0 {
@@ -747,18 +797,28 @@ func SelectMultiple(title string, items []SelectItem, preChecked []string) ([]st
}
fm := finalModel.(multiSelectorModel)
if fm.cancelled {
if fm.cancelled || !fm.confirmed {
return nil, ErrCancelled
}
if !fm.confirmed {
return nil, ErrCancelled
// Single-add mode: prepend the picked model, keep existing models deduped
if fm.singleAdd != "" {
result := []string{fm.singleAdd}
for _, name := range preChecked {
if name != fm.singleAdd {
result = append(result, name)
}
}
return result, nil
}
var result []string
// Multi-edit mode: last checked is default (first in result)
last := fm.checkOrder[len(fm.checkOrder)-1]
result := []string{fm.items[last].Name}
for _, idx := range fm.checkOrder {
result = append(result, fm.items[idx].Name)
if idx != last {
result = append(result, fm.items[idx].Name)
}
}
return result, nil
}

View File

@@ -539,6 +539,7 @@ func TestMultiView_CursorIndicator(t *testing.T) {
func TestMultiView_CheckedItemShowsX(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), []string{"a"})
m.multi = true
content := m.View()
if !strings.Contains(content, "[x]") {
@@ -550,11 +551,18 @@ func TestMultiView_CheckedItemShowsX(t *testing.T) {
}
func TestMultiView_DefaultTag(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), []string{"a"})
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"a", "b"})
m.multi = true
content := m.View()
if !strings.Contains(content, "(default)") {
t.Error("first checked item should have (default) tag")
t.Error("should have (default) tag")
}
// preChecked[0] ("a") should be the default (last in checkOrder)
aIdx := strings.Index(content, "a")
defaultIdx := strings.Index(content, "(default)")
if defaultIdx < aIdx {
t.Error("(default) tag should appear after 'a' (the current default)")
}
}
@@ -585,6 +593,7 @@ func TestMultiView_OverflowIndicator(t *testing.T) {
func TestMultiUpdate_SpaceTogglesItem(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
m.multi = true
m.cursor = 1
// Simulate space delivered as tea.KeySpace
@@ -601,6 +610,7 @@ func TestMultiUpdate_SpaceTogglesItem(t *testing.T) {
func TestMultiUpdate_SpaceRuneTogglesItem(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
m.multi = true
m.cursor = 1
// Simulate space delivered as tea.KeyRunes (Windows PowerShell behavior)
@@ -618,6 +628,161 @@ func TestMultiUpdate_SpaceRuneTogglesItem(t *testing.T) {
}
}
// --- Single-add mode ---
func TestMulti_StartsInSingleMode(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
if m.multi {
t.Error("should start in single mode (multi=false)")
}
}
func TestMulti_SingleModeNoCheckboxes(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
content := m.View()
if strings.Contains(content, "[x]") || strings.Contains(content, "[ ]") {
t.Error("single mode should not show checkboxes")
}
if !strings.Contains(content, "▸") {
t.Error("single mode should show cursor indicator")
}
}
func TestMulti_SingleModeEnterPicksItem(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
m.cursor = 1
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyEnter})
m = updated.(multiSelectorModel)
if m.singleAdd != "b" {
t.Errorf("enter in single mode should pick cursor item, got %q", m.singleAdd)
}
if !m.confirmed {
t.Error("should set confirmed")
}
}
func TestMulti_SingleModeSpaceIsNoop(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
m.cursor = 0
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeySpace})
m = updated.(multiSelectorModel)
if len(m.checked) != 0 {
t.Error("space in single mode should not toggle items")
}
}
func TestMulti_SingleModeSpaceRuneIsNoop(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
m.cursor = 0
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune{' '}})
m = updated.(multiSelectorModel)
if len(m.checked) != 0 {
t.Error("space rune in single mode should not toggle items")
}
if m.filter != "" {
t.Error("space rune in single mode should not add to filter")
}
}
func TestMulti_TabTogglesMode(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b"), nil)
if m.multi {
t.Fatal("should start in single mode")
}
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyTab})
m = updated.(multiSelectorModel)
if !m.multi {
t.Error("tab should switch to multi mode")
}
updated, _ = m.Update(tea.KeyMsg{Type: tea.KeyTab})
m = updated.(multiSelectorModel)
if m.multi {
t.Error("tab should switch back to single mode")
}
}
func TestMulti_SingleModeHelpText(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a"), nil)
content := m.View()
if !strings.Contains(content, "tab add multiple") {
t.Error("single mode should show 'tab add multiple' in help")
}
}
func TestMulti_MultiModeHelpText(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a"), nil)
m.multi = true
content := m.View()
if !strings.Contains(content, "tab select single") {
t.Error("multi mode should show 'tab select single' in help")
}
}
// --- preChecked initialization order ---
func TestMulti_PreCheckedDefaultIsLast(t *testing.T) {
// preChecked[0] ("a") is the current default and should end up
// last in checkOrder so it gets the (default) tag.
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"a", "b", "c"})
if len(m.checkOrder) != 3 {
t.Fatalf("expected 3 in checkOrder, got %d", len(m.checkOrder))
}
lastIdx := m.checkOrder[len(m.checkOrder)-1]
if m.items[lastIdx].Name != "a" {
t.Errorf("preChecked[0] should be last in checkOrder, got %q", m.items[lastIdx].Name)
}
}
func TestMulti_CursorOnDefaultModel(t *testing.T) {
// preChecked[0] ("b") is the default; cursor should start on it
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), []string{"b", "c"})
if m.cursor != 1 {
t.Errorf("cursor should be on preChecked[0] ('b') at index 1, got %d", m.cursor)
}
}
// --- Multi-mode last-checked is default ---
func TestMulti_LastCheckedIsDefault(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("alpha", "beta", "gamma"), nil)
m.multi = true
// Check "alpha" then "gamma"
m.cursor = 0
m.toggleItem()
m.cursor = 2
m.toggleItem()
// Last checked ("gamma") should be at the end of checkOrder
lastIdx := m.checkOrder[len(m.checkOrder)-1]
if m.items[lastIdx].Name != "gamma" {
t.Errorf("last checked should be 'gamma', got %q", m.items[lastIdx].Name)
}
// The (default) tag renders based on checkOrder[len-1]
content := m.View()
if !strings.Contains(content, "(default)") {
t.Fatal("should show (default) tag")
}
// "alpha" line should NOT have the default tag
for _, line := range strings.Split(content, "\n") {
if strings.Contains(line, "alpha") && strings.Contains(line, "(default)") {
t.Error("'alpha' (first checked) should not have (default) tag")
}
}
}
// Key message helpers for testing
type keyType = int

View File

@@ -429,8 +429,24 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
}
if m.multiModalSelector.confirmed {
var selected []string
for _, idx := range m.multiModalSelector.checkOrder {
selected = append(selected, m.multiModalSelector.items[idx].Name)
if m.multiModalSelector.singleAdd != "" {
// Single-add mode: prepend picked model, keep existing deduped
selected = []string{m.multiModalSelector.singleAdd}
for _, name := range config.IntegrationModels(m.items[m.cursor].integration) {
if name != m.multiModalSelector.singleAdd {
selected = append(selected, name)
}
}
} else {
// Last checked is default (first in result)
co := m.multiModalSelector.checkOrder
last := co[len(co)-1]
selected = []string{m.multiModalSelector.items[last].Name}
for _, idx := range co {
if idx != last {
selected = append(selected, m.multiModalSelector.items[idx].Name)
}
}
}
if len(selected) > 0 {
m.changeModels = selected

View File

@@ -15,7 +15,6 @@
- [Push a Model](#push-a-model)
- [Generate Embeddings](#generate-embeddings)
- [List Running Models](#list-running-models)
- [Usage](#usage)
- [Version](#version)
- [Experimental: Image Generation](#image-generation-experimental)
@@ -1855,53 +1854,6 @@ curl http://localhost:11434/api/embeddings -d '{
}
```
## Usage
```
GET /api/usage
```
Show aggregate usage statistics per model since the server started. All timestamps are UTC in RFC 3339 format.
### Examples
#### Request
```shell
curl http://localhost:11434/api/usage
```
#### Response
```json
{
"start": "2025-01-27T20:00:00Z",
"usage": [
{
"model": "llama3.2",
"requests": 5,
"prompt_tokens": 130,
"completion_tokens": 890
},
{
"model": "deepseek-r1",
"requests": 2,
"prompt_tokens": 48,
"completion_tokens": 312
}
]
}
```
#### Response fields
- `start`: when the server started tracking usage (UTC, RFC 3339)
- `usage`: list of per-model usage statistics
- `model`: model name
- `requests`: total number of completed requests
- `prompt_tokens`: total prompt tokens evaluated
- `completion_tokens`: total completion tokens generated
## Version
```

1
go.mod
View File

@@ -26,6 +26,7 @@ require (
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/dlclark/regexp2 v1.11.4
github.com/emirpasic/gods/v2 v2.0.0-alpha
github.com/klauspost/compress v1.18.3
github.com/mattn/go-runewidth v0.0.16
github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c

4
go.sum
View File

@@ -122,7 +122,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
@@ -150,8 +149,9 @@ github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/gin-gonic/gin"
"github.com/klauspost/compress/zstd"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/openai"
@@ -496,6 +497,17 @@ func (w *ResponsesWriter) Write(data []byte) (int, error) {
func ResponsesMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
if c.GetHeader("Content-Encoding") == "zstd" {
reader, err := zstd.NewReader(c.Request.Body, zstd.WithDecoderMaxMemory(8<<20))
if err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "failed to decompress zstd body"))
return
}
defer reader.Close()
c.Request.Body = io.NopCloser(reader)
c.Request.Header.Del("Content-Encoding")
}
var req openai.ResponsesRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))

View File

@@ -14,6 +14,7 @@ import (
"github.com/gin-gonic/gin"
"github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/openai"
@@ -1238,3 +1239,102 @@ func TestImageEditsMiddleware(t *testing.T) {
})
}
}
func zstdCompress(t *testing.T, data []byte) []byte {
t.Helper()
var buf bytes.Buffer
w, err := zstd.NewWriter(&buf)
if err != nil {
t.Fatal(err)
}
if _, err := w.Write(data); err != nil {
t.Fatal(err)
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
return buf.Bytes()
}
func TestResponsesMiddlewareZstd(t *testing.T) {
tests := []struct {
name string
body string
useZstd bool
oversized bool
wantCode int
wantModel string
wantMessage string
}{
{
name: "plain JSON",
body: `{"model": "test-model", "input": "Hello"}`,
wantCode: http.StatusOK,
wantModel: "test-model",
wantMessage: "Hello",
},
{
name: "zstd compressed",
body: `{"model": "test-model", "input": "Hello"}`,
useZstd: true,
wantCode: http.StatusOK,
wantModel: "test-model",
wantMessage: "Hello",
},
{
name: "zstd over max decompressed size",
oversized: true,
useZstd: true,
wantCode: http.StatusBadRequest,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var capturedRequest *api.ChatRequest
gin.SetMode(gin.TestMode)
router := gin.New()
router.Use(ResponsesMiddleware(), captureRequestMiddleware(&capturedRequest))
router.Handle(http.MethodPost, "/v1/responses", func(c *gin.Context) {
c.Status(http.StatusOK)
})
var bodyReader io.Reader
if tt.oversized {
bodyReader = bytes.NewReader(zstdCompress(t, bytes.Repeat([]byte("A"), 9<<20)))
} else if tt.useZstd {
bodyReader = bytes.NewReader(zstdCompress(t, []byte(tt.body)))
} else {
bodyReader = strings.NewReader(tt.body)
}
req, _ := http.NewRequest(http.MethodPost, "/v1/responses", bodyReader)
req.Header.Set("Content-Type", "application/json")
if tt.useZstd || tt.oversized {
req.Header.Set("Content-Encoding", "zstd")
}
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
if resp.Code != tt.wantCode {
t.Fatalf("expected status %d, got %d: %s", tt.wantCode, resp.Code, resp.Body.String())
}
if tt.wantCode != http.StatusOK {
return
}
if capturedRequest == nil {
t.Fatal("expected captured request, got nil")
}
if capturedRequest.Model != tt.wantModel {
t.Fatalf("expected model %q, got %q", tt.wantModel, capturedRequest.Model)
}
if len(capturedRequest.Messages) != 1 || capturedRequest.Messages[0].Content != tt.wantMessage {
t.Fatalf("expected single user message %q, got %+v", tt.wantMessage, capturedRequest.Messages)
}
})
}
}

View File

@@ -2,6 +2,10 @@
# This script installs Ollama on Linux and macOS.
# It detects the current operating system architecture and installs the appropriate version of Ollama.
# Wrap script in main function so that a truncated partial download doesn't end
# up executing half a script.
main() {
set -eu
red="$( (/usr/bin/tput bold || :; /usr/bin/tput setaf 1 || :) 2>&-)"
@@ -446,3 +450,6 @@ fi
status "NVIDIA GPU ready."
install_success
}
main

View File

@@ -91,8 +91,6 @@ type Server struct {
aliasesOnce sync.Once
aliases *store
aliasesErr error
lowVRAM bool
usage *UsageTracker
}
func init() {
@@ -291,10 +289,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
c.Header("Content-Type", contentType)
fn := func(resp api.GenerateResponse) error {
if resp.Done {
s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
}
resp.Model = origModel
resp.RemoteModel = m.Config.RemoteModel
resp.RemoteHost = m.Config.RemoteHost
@@ -601,8 +595,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
res.Context = tokens
}
s.usage.Record(req.Model, cr.PromptEvalCount, cr.EvalCount)
}
if builtinParser != nil {
@@ -1630,8 +1622,6 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
r.POST("/api/experimental/aliases", s.CreateAliasHandler)
r.DELETE("/api/experimental/aliases", s.DeleteAliasHandler)
r.GET("/api/usage", s.UsageHandler)
// Inference
r.GET("/api/ps", s.PsHandler)
r.POST("/api/generate", s.GenerateHandler)
@@ -1702,7 +1692,7 @@ func Serve(ln net.Listener) error {
}
}
s := &Server{addr: ln.Addr(), usage: NewUsageTracker()}
s := &Server{addr: ln.Addr()}
var rc *ollama.Registry
if useClient2 {
@@ -1937,10 +1927,6 @@ func (s *Server) SignoutHandler(c *gin.Context) {
c.JSON(http.StatusOK, nil)
}
func (s *Server) UsageHandler(c *gin.Context) {
c.JSON(http.StatusOK, s.usage.Stats())
}
func (s *Server) PsHandler(c *gin.Context) {
models := []api.ProcessModelResponse{}
@@ -2111,10 +2097,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
c.Header("Content-Type", contentType)
fn := func(resp api.ChatResponse) error {
if resp.Done {
s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
}
resp.Model = origModel
resp.RemoteModel = m.Config.RemoteModel
resp.RemoteHost = m.Config.RemoteHost
@@ -2335,8 +2317,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.DoneReason = r.DoneReason.String()
res.TotalDuration = time.Since(checkpointStart)
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
s.usage.Record(req.Model, r.PromptEvalCount, r.EvalCount)
}
if builtinParser != nil {

View File

@@ -30,7 +30,6 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -225,7 +224,6 @@ func TestChatDebugRenderOnly(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),

View File

@@ -35,7 +35,6 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -221,7 +220,6 @@ func TestGenerateWithDebugRenderOnly(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),

View File

@@ -88,39 +88,19 @@ func TestGenerateChatRemote(t *testing.T) {
if r.Method != http.MethodPost {
t.Errorf("Expected POST request, got %s", r.Method)
}
if r.URL.Path != "/api/chat" {
t.Errorf("Expected path '/api/chat', got %s", r.URL.Path)
}
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/api/chat":
resp := api.ChatResponse{
Model: "test",
Done: true,
DoneReason: "load",
Metrics: api.Metrics{
PromptEvalCount: 10,
EvalCount: 20,
},
}
if err := json.NewEncoder(w).Encode(&resp); err != nil {
t.Fatal(err)
}
case "/api/generate":
resp := api.GenerateResponse{
Model: "test",
Done: true,
DoneReason: "stop",
Metrics: api.Metrics{
PromptEvalCount: 5,
EvalCount: 15,
},
}
if err := json.NewEncoder(w).Encode(&resp); err != nil {
t.Fatal(err)
}
default:
t.Errorf("unexpected path %s", r.URL.Path)
resp := api.ChatResponse{
Model: "test",
Done: true,
DoneReason: "load",
}
if err := json.NewEncoder(w).Encode(&resp); err != nil {
t.Fatal(err)
}
}))
defer rs.Close()
@@ -131,7 +111,7 @@ func TestGenerateChatRemote(t *testing.T) {
}
t.Setenv("OLLAMA_REMOTES", p.Hostname())
s := Server{usage: NewUsageTracker()}
s := Server{}
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "test-cloud",
RemoteHost: rs.URL,
@@ -179,61 +159,6 @@ func TestGenerateChatRemote(t *testing.T) {
t.Errorf("expected done reason load, got %s", actual.DoneReason)
}
})
t.Run("remote chat usage tracking", func(t *testing.T) {
stats := s.usage.Stats()
found := false
for _, m := range stats.Usage {
if m.Model == "test-cloud" {
found = true
if m.Requests != 1 {
t.Errorf("expected 1 request, got %d", m.Requests)
}
if m.PromptTokens != 10 {
t.Errorf("expected 10 prompt tokens, got %d", m.PromptTokens)
}
if m.CompletionTokens != 20 {
t.Errorf("expected 20 completion tokens, got %d", m.CompletionTokens)
}
}
}
if !found {
t.Error("expected usage entry for test-cloud")
}
})
t.Run("remote generate usage tracking", func(t *testing.T) {
// Reset the tracker for a clean test
s.usage = NewUsageTracker()
w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
Model: "test-cloud",
Prompt: "hello",
})
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d", w.Code)
}
stats := s.usage.Stats()
found := false
for _, m := range stats.Usage {
if m.Model == "test-cloud" {
found = true
if m.Requests != 1 {
t.Errorf("expected 1 request, got %d", m.Requests)
}
if m.PromptTokens != 5 {
t.Errorf("expected 5 prompt tokens, got %d", m.PromptTokens)
}
if m.CompletionTokens != 15 {
t.Errorf("expected 15 completion tokens, got %d", m.CompletionTokens)
}
}
}
if !found {
t.Error("expected usage entry for test-cloud")
}
})
}
func TestGenerateChat(t *testing.T) {
@@ -252,7 +177,6 @@ func TestGenerateChat(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -970,7 +894,6 @@ func TestGenerate(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -1455,7 +1378,6 @@ func TestGenerateLogprobs(t *testing.T) {
}
s := &Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -1636,7 +1558,6 @@ func TestChatLogprobs(t *testing.T) {
}
s := &Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -1747,7 +1668,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
}
s := &Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -2194,7 +2114,6 @@ func TestGenerateUnload(t *testing.T) {
var loadFnCalled bool
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -2296,7 +2215,6 @@ func TestGenerateWithImages(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -2475,7 +2393,6 @@ func TestImageGenerateStreamFalse(t *testing.T) {
opts := api.DefaultOptions()
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),

View File

@@ -255,7 +255,6 @@ func TestChatHarmonyParserStreamingRealtime(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -407,7 +406,6 @@ func TestChatHarmonyParserStreamingSimple(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),
@@ -590,7 +588,6 @@ func TestChatHarmonyParserStreaming(t *testing.T) {
}
s := Server{
usage: NewUsageTracker(),
sched: &Scheduler{
pendingReqCh: make(chan *LlmRequest, 1),
finishedReqCh: make(chan *LlmRequest, 1),

View File

@@ -1,62 +0,0 @@
package server
import (
"sync"
"time"
"github.com/ollama/ollama/api"
)
type ModelUsage struct {
Requests int64
PromptTokens int64
CompletionTokens int64
}
type UsageTracker struct {
mu sync.Mutex
start time.Time
models map[string]*ModelUsage
}
func NewUsageTracker() *UsageTracker {
return &UsageTracker{
start: time.Now().UTC(),
models: make(map[string]*ModelUsage),
}
}
func (u *UsageTracker) Record(model string, promptTokens, completionTokens int) {
u.mu.Lock()
defer u.mu.Unlock()
m, ok := u.models[model]
if !ok {
m = &ModelUsage{}
u.models[model] = m
}
m.Requests++
m.PromptTokens += int64(promptTokens)
m.CompletionTokens += int64(completionTokens)
}
func (u *UsageTracker) Stats() api.UsageResponse {
u.mu.Lock()
defer u.mu.Unlock()
byModel := make([]api.ModelUsageData, 0, len(u.models))
for model, usage := range u.models {
byModel = append(byModel, api.ModelUsageData{
Model: model,
Requests: usage.Requests,
PromptTokens: usage.PromptTokens,
CompletionTokens: usage.CompletionTokens,
})
}
return api.UsageResponse{
Start: u.start,
Usage: byModel,
}
}

View File

@@ -1,136 +0,0 @@
package server
import (
"encoding/json"
"net/http"
"net/http/httptest"
"sync"
"testing"
"github.com/gin-gonic/gin"
"github.com/ollama/ollama/api"
)
func TestUsageTrackerRecord(t *testing.T) {
tracker := NewUsageTracker()
tracker.Record("model-a", 10, 20)
tracker.Record("model-a", 5, 15)
tracker.Record("model-b", 100, 200)
stats := tracker.Stats()
if len(stats.Usage) != 2 {
t.Fatalf("expected 2 models, got %d", len(stats.Usage))
}
lookup := make(map[string]api.ModelUsageData)
for _, m := range stats.Usage {
lookup[m.Model] = m
}
a := lookup["model-a"]
if a.Requests != 2 {
t.Errorf("model-a requests: expected 2, got %d", a.Requests)
}
if a.PromptTokens != 15 {
t.Errorf("model-a prompt tokens: expected 15, got %d", a.PromptTokens)
}
if a.CompletionTokens != 35 {
t.Errorf("model-a completion tokens: expected 35, got %d", a.CompletionTokens)
}
b := lookup["model-b"]
if b.Requests != 1 {
t.Errorf("model-b requests: expected 1, got %d", b.Requests)
}
if b.PromptTokens != 100 {
t.Errorf("model-b prompt tokens: expected 100, got %d", b.PromptTokens)
}
if b.CompletionTokens != 200 {
t.Errorf("model-b completion tokens: expected 200, got %d", b.CompletionTokens)
}
}
func TestUsageTrackerConcurrent(t *testing.T) {
tracker := NewUsageTracker()
var wg sync.WaitGroup
for range 100 {
wg.Add(1)
go func() {
defer wg.Done()
tracker.Record("model-a", 1, 2)
}()
}
wg.Wait()
stats := tracker.Stats()
if len(stats.Usage) != 1 {
t.Fatalf("expected 1 model, got %d", len(stats.Usage))
}
m := stats.Usage[0]
if m.Requests != 100 {
t.Errorf("requests: expected 100, got %d", m.Requests)
}
if m.PromptTokens != 100 {
t.Errorf("prompt tokens: expected 100, got %d", m.PromptTokens)
}
if m.CompletionTokens != 200 {
t.Errorf("completion tokens: expected 200, got %d", m.CompletionTokens)
}
}
func TestUsageTrackerStart(t *testing.T) {
tracker := NewUsageTracker()
stats := tracker.Stats()
if stats.Start.IsZero() {
t.Error("expected non-zero start time")
}
}
func TestUsageHandler(t *testing.T) {
gin.SetMode(gin.TestMode)
s := &Server{
usage: NewUsageTracker(),
}
s.usage.Record("llama3", 50, 100)
s.usage.Record("llama3", 25, 50)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest(http.MethodGet, "/api/usage", nil)
s.UsageHandler(c)
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d", w.Code)
}
var resp api.UsageResponse
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to unmarshal response: %v", err)
}
if len(resp.Usage) != 1 {
t.Fatalf("expected 1 model, got %d", len(resp.Usage))
}
m := resp.Usage[0]
if m.Model != "llama3" {
t.Errorf("expected model llama3, got %s", m.Model)
}
if m.Requests != 2 {
t.Errorf("expected 2 requests, got %d", m.Requests)
}
if m.PromptTokens != 75 {
t.Errorf("expected 75 prompt tokens, got %d", m.PromptTokens)
}
if m.CompletionTokens != 150 {
t.Errorf("expected 150 completion tokens, got %d", m.CompletionTokens)
}
}