agent: add reasoning effort configuration

Adds the `--effort` CLI flag and `/effort` REPL command to control the reasoning budget, persisting the setting in `.lp-agent.zon`.
2026-08-02 10:47:15 -04:00 · 2026-06-05 12:40:34 +02:00
parent 76504604ba
commit 5f2330bb2f
6 changed files with 96 additions and 35 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -35,8 +35,8 @@
            .hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
        },
        .zenai = .{
-            .url = "git+https://github.com/lightpanda-io/zenai.git#c8d6cfa13a93bcd41a8a34b26f9a49b1656f555a",
-            .hash = "zenai-0.0.0-iOY_VNNkBAAUsSB8OAR_pDsBcxMT8k_Kc0U3b5jSKfeC",
+            .url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df",
+            .hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V",
        },
        .isocline = .{
            .url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8",
--- a/docs/agent.md
+++ b/docs/agent.md
@@ -63,6 +63,15 @@ system prompt, and `--verbosity <low|medium|high>` to tune how much progress
 detail goes to stderr (`--task` defaults to `low`, or `high` when stderr is
 piped/redirected so harnesses capture the full `[tool/result]` trace).

+`--effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
+budget for thinking models (it maps to each provider's native thinking /
+reasoning-effort knob and is ignored by non-thinking models). The interactive
+REPL defaults to `low` so turns stay snappy; `--task` and script runs default
+to `medium`, where answer quality matters more than per-turn latency. Higher
+effort can reduce the number of tool calls by planning better, so it's a real
+tradeoff rather than a pure slowdown. Change it live with `/effort`; the
+selection is remembered in `.lp-agent.zon`.
+
 `--model` is validated against the provider's catalog up front: an unknown name
 fails fast with a pointer to `--list-models` rather than erroring mid-task. For
 Ollama, the default model is checked against what's actually pulled — if it's
@@ -72,12 +81,12 @@ missing, the agent falls back to the first installed model (an explicit
 ### Provider auto-detection

 When `--provider` is omitted, lightpanda picks one in this order. The REPL shows
-the resolved provider and model in its status bar; the multi-key picker and any
+the resolved model and effort level in its status bar; the multi-key picker and any
 fallback notices (e.g. an Ollama default that isn't installed) print to stderr:

 1. **Remembered** → the provider/model you last selected with `/provider` or
-   `/model`, persisted per-directory in `.lp-agent.zon`, as long as its key is
-   still set.
+   `/model` (plus the `/effort` level), persisted per-directory in
+   `.lp-agent.zon`, as long as its key is still set.
 2. **Auto-detected** → otherwise the first key found in priority order
   (`ANTHROPIC_API_KEY` → `GOOGLE_API_KEY`/`GEMINI_API_KEY` → `OPENAI_API_KEY`).
   If several keys are set and you're in an interactive REPL, the agent prompts
@@ -295,8 +304,9 @@ See [agent-script.md](agent-script.md) for the full script format reference.
  the provider's fetched model list, and bare `/provider`/`/model` print the
  current selection — `/save [file.js]` writes the session to a script and
  `/load <path>` runs one from disk (Tab completes file paths), `/quit` exits
-  the REPL, `/verbosity <low|medium|high>` tunes the log level. These are
-  REPL-only and never recorded.
+  the REPL, `/verbosity <low|medium|high>` tunes the log level, and
+  `/effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
+  budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded.
  ```
  > /goto https://example.com
  > /findElement role=button name=Submit
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -123,6 +123,12 @@ fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat {

 pub const AiProvider = std.meta.Tag(zenai.provider.Client);

+/// Per-turn reasoning budget for `agent` mode, mirroring Claude's effort
+/// levels. Maps to each provider's native thinking/reasoning knob. Resolved
+/// in `Agent.init` (explicit flag > remembered > mode default), so there is
+/// no Config-level accessor like `agentVerbosity`.
+pub const Effort = zenai.provider.Effort;
+
 /// Controls how chatty `agent` mode is on stderr.
 pub const AgentVerbosity = enum {
    /// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only.
@@ -231,6 +237,7 @@ const Commands = cli.Builder(.{
            .{ .name = "task", .type = ?[]const u8 },
            .{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true },
            .{ .name = "verbosity", .type = ?AgentVerbosity },
+            .{ .name = "effort", .type = ?Effort },
            .{ .name = "list_models", .type = bool },
            .{ .name = "no_llm", .type = bool },
        },
--- a/src/agent/Agent.zig
+++ b/src/agent/Agent.zig
@@ -109,6 +109,8 @@ active_script_runtime: ?*ScriptRuntime = null,
 messages: std.ArrayList(zenai.provider.Message),
 message_arena: std.heap.ArenaAllocator,
 model: []u8,
+/// Per-turn reasoning budget for LLM turns. Mutable at runtime via `/effort`.
+effort: Config.Effort,
 system_prompt: []const u8,
 script_file: ?[]const u8,
 one_shot_task: ?[]const u8,
@@ -140,6 +142,16 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re
    return "";
 }

+/// Precedence: explicit `--effort` flag > remembered `.lp-agent.zon` value >
+/// mode default. The interactive REPL defaults to `.low` so turns stay snappy;
+/// one-shot `--task` and script runs default to `.medium`, where answer
+/// quality matters more than per-turn latency.
+fn resolveEffort(opts: Config.Agent, remembered: ?settings.Remembered, will_repl: bool) Config.Effort {
+    if (opts.effort) |e| return e;
+    if (remembered) |r| if (r.effort) |e| return e;
+    return if (will_repl) .low else .medium;
+}
+
 const ReconciledModel = union(enum) {
    /// Owned by the allocator passed to reconcileModel.
    use: []u8,
@@ -258,9 +270,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
        }
    }

+    const effort = resolveEffort(opts, remembered, will_repl);
+
    if (resolved) |r| {
        if (r.source == .picked) {
-            settings.saveRemembered(r.credentials.provider, model) catch {};
+            settings.saveRemembered(.{ .provider = r.credentials.provider, .model = model, .effort = effort }) catch {};
        }
        // provider/model now live in the status bar; just space before the help
        std.debug.print("\n", .{});
@@ -291,6 +305,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
        .messages = .empty,
        .message_arena = .init(allocator),
        .model = model,
+        .effort = effort,
        .system_prompt = opts.system_prompt orelse default_system_prompt,
        .script_file = opts.script_file,
        .one_shot_task = opts.task,
@@ -617,6 +632,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand.
        .quit => return true,
        .help => self.printSlashHelp(arena, rest),
        .verbosity => self.handleVerbosity(rest),
+        .effort => self.handleEffort(rest),
        .save => self.handleSave(arena, rest),
        .load => self.handleLoad(rest),
        .model => self.handleModel(arena, rest),
@@ -638,6 +654,20 @@ fn handleVerbosity(self: *Agent, rest: []const u8) void {
    self.terminal.printInfo("verbosity: {s}", .{@tagName(level)});
 }

+fn handleEffort(self: *Agent, rest: []const u8) void {
+    if (rest.len == 0) {
+        self.terminal.printInfo("effort: {s}", .{@tagName(self.effort)});
+        return;
+    }
+    const level = std.meta.stringToEnum(Config.Effort, rest) orelse {
+        self.terminal.printError("usage: /effort <none|minimal|low|medium|high|xhigh> (got {s})", .{rest});
+        return;
+    };
+    self.effort = level;
+    self.updateStatusBar();
+    self.reportSaved("effort", @tagName(level));
+}
+
 fn handleLoad(self: *Agent, rest: []const u8) void {
    const path = std.mem.trim(u8, rest, &std.ascii.whitespace);
    if (path.len == 0) {
@@ -684,19 +714,26 @@ fn containsString(haystack: []const []const u8, needle: []const u8) bool {
    return false;
 }

+/// Persist the current provider/model/effort to `.lp-agent.zon` and report it
+/// as "<label>: <value>", appending "(saved to …)" when the write succeeds.
+/// Reports without saving when there are no model credentials (basic REPL).
+fn reportSaved(self: *Agent, label: []const u8, value: []const u8) void {
+    const c = self.model_credentials orelse {
+        self.terminal.printInfo("{s}: {s}", .{ label, value });
+        return;
+    };
+    if (settings.saveRemembered(.{ .provider = c.provider, .model = self.model, .effort = self.effort })) {
+        self.terminal.printInfo("{s}: {s} (saved to {s})", .{ label, value, settings.remembered_path });
+    } else |_| {
+        self.terminal.printInfo("{s}: {s}", .{ label, value });
+    }
+}
+
 fn setModel(self: *Agent, model: []const u8) !void {
    const new_model = try self.allocator.dupe(u8, model);
    self.allocator.free(self.model);
    self.model = new_model;
-    const c = self.model_credentials orelse {
-        self.terminal.printInfo("model: {s}", .{self.model});
-        return;
-    };
-    if (settings.saveRemembered(c.provider, self.model)) {
-        self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
-    } else |_| {
-        self.terminal.printInfo("model: {s}", .{self.model});
-    }
+    self.reportSaved("model", self.model);
 }

 fn updateStatusBar(self: *Agent) void {
@@ -707,8 +744,10 @@ fn updateStatusBar(self: *Agent) void {
        });
        return;
    }
+    var status_buf: [256]u8 = undefined;
+    const left_text = std.fmt.bufPrint(&status_buf, "{s} · {s}", .{ self.model, @tagName(self.effort) }) catch self.model;
    self.terminal.setStatus(&.{
-        .{ .text = self.model, .side = .left, .rank = 3 },
+        .{ .text = left_text, .side = .left, .rank = 3 },
        .{ .text = "! JS", .side = .right, .rank = 2 },
        .{ .text = "Tab completes", .side = .right, .rank = 1 },
        .{ .text = "/help", .side = .right, .rank = 4 },
@@ -755,11 +794,7 @@ fn setProvider(self: *Agent, credentials: Credentials) !void {
    self.allocator.free(self.model);
    self.model = new_model;
    self.terminal.printInfo("provider: {s}", .{@tagName(credentials.provider)});
-    if (settings.saveRemembered(credentials.provider, self.model)) {
-        self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
-    } else |_| {
-        self.terminal.printInfo("model: {s}", .{self.model});
-    }
+    self.reportSaved("model", self.model);
    _ = completionModels(self, self.allocator);
 }

@@ -960,7 +995,7 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
            .max_turns = 1,
            .max_tokens = 8192,
            .tool_choice = .none,
-            .thinking_level = .medium,
+            .effort = .medium,
            .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
        },
    ) catch |err| {
@@ -1108,6 +1143,10 @@ fn printSlashHelp(self: *Agent, arena: std.mem.Allocator, target: []const u8) vo
                "/verbosity <low|medium|high> — set REPL agent verbosity (currently: {s}). Bare /verbosity prints the level.",
                .{@tagName(self.terminal.verbosity)},
            ),
+            .effort => self.terminal.printInfo(
+                "/effort <none|minimal|low|medium|high|xhigh> — set per-turn reasoning effort (currently: {s}); saved to {s}. Bare /effort prints the level.",
+                .{ @tagName(self.effort), settings.remembered_path },
+            ),
            .save => self.terminal.printInfo(
                "/save [filename.js] [prompt] — save the session to [filename.js] (a random session-*.js if omitted). With an LLM, synthesizes an idiomatic script from the session and the optional prompt; with --no-llm, dumps the recorded actions verbatim.",
                .{},
@@ -1415,9 +1454,10 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
            .max_tool_calls = 200,
            .max_tokens = 4096,
            .tool_choice = .auto,
-            // Cap per-turn reasoning so thinking models don't burn
-            // minutes per turn. Ignored by non-thinking models.
-            .thinking_level = .medium,
+            // Per-turn reasoning budget; resolved from --effort / .lp-agent.zon
+            // / mode default and adjustable at runtime via /effort. Ignored by
+            // non-thinking models.
+            .effort = self.effort,
            .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
        },
    ) catch |err| {
@@ -1495,7 +1535,7 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
                .tool_choice = .none,
                // .low (≈512 tokens) so reasoning models still pick an answer
                // but can't burn the whole turn on thinking and emit nothing.
-                .thinking_level = .low,
+                .effort = .low,
                .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
            },
        ) catch |err| {
--- a/src/agent/SlashCommand.zig
+++ b/src/agent/SlashCommand.zig
@@ -16,8 +16,8 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.

-//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/model`,
-//! `/provider`). Meta commands aren't tool slash commands — they're handled
+//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/effort`,
+//! `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled
 //! by `Agent.handleMeta` and never reach the recorder. Tool slash-command
 //! schema primitives live in `lp.Schema`; consumers should import that
 //! directly.
@@ -47,13 +47,14 @@ pub const MetaCommand = struct {

    /// Dispatched by `Agent.handleMeta` via an exhaustive switch so adding
    /// a new meta command is a compile error until it's wired up there too.
-    const Tag = enum { help, quit, verbosity, save, load, model, provider };
+    const Tag = enum { help, quit, verbosity, effort, save, load, model, provider };
 };

 pub const meta_commands = [_]MetaCommand{
    .{ .tag = .help, .name = "help", .hint = "[command]", .values = &.{}, .description = "List commands, or show help for one" },
    .{ .tag = .quit, .name = "quit", .hint = "", .values = &.{}, .description = "Exit the REPL" },
    .{ .tag = .verbosity, .name = "verbosity", .hint = "<low|medium|high>", .values = &.{ "low", "medium", "high" }, .description = "Set agent verbosity" },
+    .{ .tag = .effort, .name = "effort", .hint = "<none|minimal|low|medium|high|xhigh>", .values = &.{ "none", "minimal", "low", "medium", "high", "xhigh" }, .description = "Set per-turn reasoning effort" },
    .{ .tag = .save, .name = "save", .hint = "[filename.js] [prompt]", .values = &.{}, .description = "Save this session to a file" },
    .{ .tag = .load, .name = "load", .hint = "<path>", .values = &.{}, .description = "Load and run a script from disk" },
    .{ .tag = .model, .name = "model", .hint = "[name]", .values = &.{}, .description = "Change the model" },
--- a/src/agent/settings.zig
+++ b/src/agent/settings.zig
@@ -108,11 +108,14 @@ pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, reme

 pub const remembered_path = ".lp-agent.zon";

-/// Last user-selected provider/model, persisted per-directory in `.lp-agent.zon`.
-/// `model` is owned by the caller.
+/// Last user-selected provider/model/effort, persisted per-directory in
+/// `.lp-agent.zon`. `model` is owned by the caller. `effort` is optional so
+/// files written before it existed still parse; null means "use the mode
+/// default" (see `Agent.resolveEffort`).
 pub const Remembered = struct {
    provider: Config.AiProvider,
    model: []const u8,
+    effort: ?Config.Effort = null,
 };

 pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
@@ -127,10 +130,10 @@ pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
 }

 /// Best-effort persist of the current selection; failures are ignored.
-pub fn saveRemembered(provider: Config.AiProvider, model: []const u8) !void {
+pub fn saveRemembered(remembered: Remembered) !void {
    var buf: [512]u8 = undefined;
    var w: std.Io.Writer = .fixed(&buf);
-    try std.zon.stringify.serialize(Remembered{ .provider = provider, .model = model }, .{}, &w);
+    try std.zon.stringify.serialize(remembered, .{}, &w);
    try std.fs.cwd().writeFile(.{ .sub_path = remembered_path, .data = w.buffered() });
 }