Compare commits

..

1 Commits

Author SHA1 Message Date
Bruce MacDonald
e6f5a982d3 cmd: add usage cmd to chat to see token consumption
Adding a `/usage` command to interactive cli chat sessions that displays the tokens used in the current sessions. This can be used alongside the models context window to understand when a context shift is going to happen.
2026-01-27 17:14:25 -08:00
6 changed files with 38 additions and 54 deletions

View File

@@ -1419,10 +1419,10 @@ func thinkingOutputClosingText(plainText bool) string {
return readline.ColorGrey + readline.ColorBold + text + readline.ColorDefault
}
func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
func chat(cmd *cobra.Command, opts runOptions) (*api.Message, *api.Metrics, error) {
client, err := api.ClientFromEnvironment()
if err != nil {
return nil, err
return nil, nil, err
}
p := progress.NewProgress(os.Stderr)
@@ -1515,7 +1515,7 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
return nil, nil, nil
}
// this error should ideally be wrapped properly by the client
@@ -1523,9 +1523,9 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
p.StopAndClear()
fmt.Println("An error occurred while processing your message. Please try again.")
fmt.Println()
return nil, nil
return nil, nil, nil
}
return nil, err
return nil, nil, err
}
if len(opts.Messages) > 0 {
@@ -1535,14 +1535,14 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
verbose, err := cmd.Flags().GetBool("verbose")
if err != nil {
return nil, err
return nil, nil, err
}
if verbose {
latest.Summary()
}
return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, nil
return &api.Message{Role: role, Thinking: thinkingContent.String(), Content: fullResponse.String()}, &latest.Metrics, nil
}
func generate(cmd *cobra.Command, opts runOptions) error {
@@ -1888,7 +1888,7 @@ func NewCLI() *cobra.Command {
serveCmd := &cobra.Command{
Use: "serve",
Aliases: []string{"start"},
Short: "Start Ollama",
Short: "Start ollama",
Args: cobra.ExactArgs(0),
RunE: RunServer,
}

View File

@@ -275,11 +275,7 @@ func parseInput(r io.Reader) (inputEvent, byte, error) {
func renderSelect(w io.Writer, prompt string, s *selectState) int {
filtered := s.filtered()
if s.filter == "" {
fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
} else {
fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
}
fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
lineCount := 1
if len(filtered) == 0 {
@@ -318,11 +314,7 @@ func renderSelect(w io.Writer, prompt string, s *selectState) int {
func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
filtered := s.filtered()
if s.filter == "" {
fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
} else {
fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
}
fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
lineCount := 1
if len(filtered) == 0 {

View File

@@ -30,6 +30,9 @@ const (
)
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
var sessionPromptTokens int64
var sessionCompletionTokens int64
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set Set session variables")
@@ -37,6 +40,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
fmt.Fprintln(os.Stderr, " /clear Clear session context")
fmt.Fprintln(os.Stderr, " /usage Show session token usage")
fmt.Fprintln(os.Stderr, " /bye Exit")
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
@@ -445,6 +449,9 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
} else {
usageShow()
}
case strings.HasPrefix(line, "/usage"):
fmt.Printf("prompt tokens: %d\n", sessionPromptTokens)
fmt.Printf("completion tokens: %d\n", sessionCompletionTokens)
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
args := strings.Fields(line)
if len(args) > 1 {
@@ -499,7 +506,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts.Messages = append(opts.Messages, newMessage)
assistant, err := chat(cmd, opts)
assistant, metrics, err := chat(cmd, opts)
if err != nil {
if strings.Contains(err.Error(), "does not support thinking") ||
strings.Contains(err.Error(), "invalid think value") {
@@ -509,6 +516,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
}
return err
}
if metrics != nil {
sessionPromptTokens += int64(metrics.PromptEvalCount)
sessionCompletionTokens += int64(metrics.EvalCount)
}
if assistant != nil {
opts.Messages = append(opts.Messages, *assistant)
}

View File

@@ -9,7 +9,7 @@ OpenCode is an open-source AI coding assistant that runs in your terminal.
Install the [OpenCode CLI](https://opencode.ai):
```bash
curl -fsSL https://opencode.ai/install | bash
curl -fsSL https://opencode.ai/install.sh | bash
```
<Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>

View File

@@ -19,9 +19,8 @@ import (
type qwenParserState int
const (
toolOpenTag = "<tool_call>"
toolCloseTag = "</tool_call>"
functionOpenStart = "<function=" // qwen3-coder sometimes omits <tool_call> but starts with this
toolOpenTag = "<tool_call>"
toolCloseTag = "</tool_call>"
)
const (
@@ -139,26 +138,11 @@ func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
p.acc.WriteString(after)
p.state = qwenParserState_CollectingToolContent
return events, true
} else if idx := strings.Index(p.acc.String(), functionOpenStart); idx != -1 {
// qwen3-coder sometimes omits <tool_call> but starts with <function=
// we treat this as the start of a tool call, keeping the <function= prefix
// since it's part of the raw tool call content
before := p.acc.String()[:idx]
before = strings.TrimRightFunc(before, unicode.IsSpace)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
after := p.acc.String()[idx:]
p.acc.Reset()
p.acc.WriteString(after)
p.state = qwenParserState_CollectingToolContent
return events, true
} else if toolOverlap, funcOverlap := overlap(p.acc.String(), toolOpenTag), overlap(p.acc.String(), functionOpenStart); toolOverlap > 0 || funcOverlap > 0 {
} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
// we found a partial tool open tag, so we can emit the unambiguous part,
// which is the (trailing-whitespace trimmed) content before the partial
// tool open tag
maxOverlap := max(toolOverlap, funcOverlap)
beforePartialTag := p.acc.String()[:len(p.acc.String())-maxOverlap]
beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.acc.String()[:ambiguousStart]

View File

@@ -343,23 +343,20 @@ func TestQwenParserStreaming(t *testing.T) {
},
},
},
// qwen3-coder:30b occasionally leaves off opening <tool_call> tags, but we
// want to parse it anyway
{
desc: "missing <tool_call> opening tag still parses",
steps: []step{
{
input: "before tool call<function=get_current_temperature>some tool content here</function></tool_call>",
wantEvents: []qwenEvent{
qwenEventContent{content: "before tool call"},
qwenEventRawToolCall{raw: "<function=get_current_temperature>some tool content here</function>"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3CoderParser{}