diff --git a/build.zig.zon b/build.zig.zon index b3490632..575582fd 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -35,8 +35,8 @@ .hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr", }, .zenai = .{ - .url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df", - .hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V", + .url = "git+https://github.com/lightpanda-io/zenai.git#c6a4c11de53cc5c39c825a57513b3071bda154f6", + .hash = "zenai-0.0.0-iOY_VDVqBAA3mH3_90Lg7Vz3JK8oD0gIhcete_M9c1UC", }, .isocline = .{ .url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8", diff --git a/docs/agent.md b/docs/agent.md index 99cf416f..37e2599f 100644 --- a/docs/agent.md +++ b/docs/agent.md @@ -304,9 +304,10 @@ See [agent-script.md](agent-script.md) for the full script format reference. the provider's fetched model list, and bare `/provider`/`/model` print the current selection — `/save [file.js]` writes the session to a script and `/load ` runs one from disk (Tab completes file paths), `/quit` exits - the REPL, `/verbosity ` tunes the log level, and + the REPL, `/verbosity ` tunes the log level, `/effort ` sets the per-turn reasoning - budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded. + budget (saved to `.lp-agent.zon`), and `/usage` prints cumulative token usage + and the cache hit rate for the session. These are REPL-only and never recorded. ``` > /goto https://example.com > /findElement role=button name=Submit diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index aa248d57..75722d10 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -633,6 +633,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand. .help => self.printSlashHelp(arena, rest), .verbosity => self.handleVerbosity(rest), .effort => self.handleEffort(rest), + .usage => self.handleUsage(), .save => self.handleSave(arena, rest), .load => self.handleLoad(rest), .model => self.handleModel(arena, rest), @@ -668,6 +669,27 @@ fn handleEffort(self: *Agent, rest: []const u8) void { self.reportSaved("effort", @tagName(level)); } +/// Print cumulative token usage for the session, broken down so the cache's +/// effect is visible — the REPL otherwise never surfaces the `$usage` line that +/// `--task` prints. Reads `total_usage`, accumulated across every turn by +/// `processUserMessage`; the fresh/cache split semantics live on `Usage`. +fn handleUsage(self: *Agent) void { + const u = self.total_usage; + const input = u.inputTokens(); + const output = u.completion_tokens orelse 0; + if (input == 0 and output == 0) { + self.terminal.printInfo("usage: no model turns yet this session", .{}); + return; + } + self.terminal.printInfo( + "usage: input={d} (fresh={d} · cache read={d} · cache write={d}), output={d}", + .{ input, u.prompt_tokens orelse 0, u.cached_tokens orelse 0, u.cache_creation_tokens orelse 0, output }, + ); + if (input > 0) { + self.terminal.printInfo("cache: {d}% of input served from cache", .{u.cacheHitPercent()}); + } +} + fn handleLoad(self: *Agent, rest: []const u8) void { const path = std.mem.trim(u8, rest, &std.ascii.whitespace); if (path.len == 0) { @@ -1147,6 +1169,10 @@ fn printSlashHelp(self: *Agent, arena: std.mem.Allocator, target: []const u8) vo "/effort — set per-turn reasoning effort (currently: {s}); saved to {s}. Bare /effort prints the level.", .{ @tagName(self.effort), settings.remembered_path }, ), + .usage => self.terminal.printInfo( + "/usage — show cumulative token usage and cache hit rate for this session", + .{}, + ), .save => self.terminal.printInfo( "/save [filename.js] [prompt] — save the session to [filename.js] (a random session-*.js if omitted). With an LLM, synthesizes an idiomatic script from the session and the optional prompt; with --no-llm, dumps the recorded actions verbatim.", .{}, diff --git a/src/agent/SlashCommand.zig b/src/agent/SlashCommand.zig index 0d336ff1..51c2f5cc 100644 --- a/src/agent/SlashCommand.zig +++ b/src/agent/SlashCommand.zig @@ -17,7 +17,7 @@ // along with this program. If not, see . //! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/effort`, -//! `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled +//! `/usage`, `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled //! by `Agent.handleMeta` and never reach the recorder. Tool slash-command //! schema primitives live in `lp.Schema`; consumers should import that //! directly. @@ -47,7 +47,7 @@ pub const MetaCommand = struct { /// Dispatched by `Agent.handleMeta` via an exhaustive switch so adding /// a new meta command is a compile error until it's wired up there too. - const Tag = enum { help, quit, verbosity, effort, save, load, model, provider }; + const Tag = enum { help, quit, verbosity, effort, usage, save, load, model, provider }; }; pub const meta_commands = [_]MetaCommand{ @@ -55,6 +55,7 @@ pub const meta_commands = [_]MetaCommand{ .{ .tag = .quit, .name = "quit", .hint = "", .values = &.{}, .description = "Exit the REPL" }, .{ .tag = .verbosity, .name = "verbosity", .hint = "", .values = &.{ "low", "medium", "high" }, .description = "Set agent verbosity" }, .{ .tag = .effort, .name = "effort", .hint = "", .values = &.{ "none", "minimal", "low", "medium", "high", "xhigh" }, .description = "Set per-turn reasoning effort" }, + .{ .tag = .usage, .name = "usage", .hint = "", .values = &.{}, .description = "Show token usage and cache stats for this session" }, .{ .tag = .save, .name = "save", .hint = "[filename.js] [prompt]", .values = &.{}, .description = "Save this session to a file" }, .{ .tag = .load, .name = "load", .hint = "", .values = &.{}, .description = "Load and run a script from disk" }, .{ .tag = .model, .name = "model", .hint = "[name]", .values = &.{}, .description = "Change the model" },