From 5f2330bb2fff3f9f0d3c9ce0c2af16705488fa00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= <adria.arrufat@gmail.com>
Date: Fri, 5 Jun 2026 12:40:34 +0200
Subject: [PATCH] agent: add reasoning effort configuration

Adds the `--effort` CLI flag and `/effort` REPL command to control
the reasoning budget, persisting the setting in `.lp-agent.zon`.
---
 build.zig.zon              |  4 +-
 docs/agent.md              | 20 +++++++---
 src/Config.zig             |  7 ++++
 src/agent/Agent.zig        | 82 ++++++++++++++++++++++++++++----------
 src/agent/SlashCommand.zig |  7 ++--
 src/agent/settings.zig     | 11 +++--
 6 files changed, 96 insertions(+), 35 deletions(-)
diff --git a/build.zig.zon b/build.zig.zon
index 07c019ff..b3490632 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -35,8 +35,8 @@
             .hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
         },
         .zenai = .{
-            .url = "git+https://github.com/lightpanda-io/zenai.git#c8d6cfa13a93bcd41a8a34b26f9a49b1656f555a",
-            .hash = "zenai-0.0.0-iOY_VNNkBAAUsSB8OAR_pDsBcxMT8k_Kc0U3b5jSKfeC",
+            .url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df",
+            .hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V",
         },
         .isocline = .{
             .url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8",
diff --git a/docs/agent.md b/docs/agent.md
index 19c6feae..99cf416f 100644
--- a/docs/agent.md
+++ b/docs/agent.md
@@ -63,6 +63,15 @@ system prompt, and `--verbosity <low|medium|high>` to tune how much progress
 detail goes to stderr (`--task` defaults to `low`, or `high` when stderr is
 piped/redirected so harnesses capture the full `[tool/result]` trace).
 
+`--effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
+budget for thinking models (it maps to each provider's native thinking /
+reasoning-effort knob and is ignored by non-thinking models). The interactive
+REPL defaults to `low` so turns stay snappy; `--task` and script runs default
+to `medium`, where answer quality matters more than per-turn latency. Higher
+effort can reduce the number of tool calls by planning better, so it's a real
+tradeoff rather than a pure slowdown. Change it live with `/effort`; the
+selection is remembered in `.lp-agent.zon`.
+
 `--model` is validated against the provider's catalog up front: an unknown name
 fails fast with a pointer to `--list-models` rather than erroring mid-task. For
 Ollama, the default model is checked against what's actually pulled — if it's
@@ -72,12 +81,12 @@ missing, the agent falls back to the first installed model (an explicit
 ### Provider auto-detection
 
 When `--provider` is omitted, lightpanda picks one in this order. The REPL shows
-the resolved provider and model in its status bar; the multi-key picker and any
+the resolved model and effort level in its status bar; the multi-key picker and any
 fallback notices (e.g. an Ollama default that isn't installed) print to stderr:
 
 1. **Remembered** → the provider/model you last selected with `/provider` or
-   `/model`, persisted per-directory in `.lp-agent.zon`, as long as its key is
-   still set.
+   `/model` (plus the `/effort` level), persisted per-directory in
+   `.lp-agent.zon`, as long as its key is still set.
 2. **Auto-detected** → otherwise the first key found in priority order
    (`ANTHROPIC_API_KEY` → `GOOGLE_API_KEY`/`GEMINI_API_KEY` → `OPENAI_API_KEY`).
    If several keys are set and you're in an interactive REPL, the agent prompts
@@ -295,8 +304,9 @@ See [agent-script.md](agent-script.md) for the full script format reference.
   the provider's fetched model list, and bare `/provider`/`/model` print the
   current selection — `/save [file.js]` writes the session to a script and
   `/load <path>` runs one from disk (Tab completes file paths), `/quit` exits
-  the REPL, `/verbosity <low|medium|high>` tunes the log level. These are
-  REPL-only and never recorded.
+  the REPL, `/verbosity <low|medium|high>` tunes the log level, and
+  `/effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
+  budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded.
   ```
   > /goto https://example.com
   > /findElement role=button name=Submit
diff --git a/src/Config.zig b/src/Config.zig
index 6a10feac..0920698a 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -123,6 +123,12 @@ fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat {
 
 pub const AiProvider = std.meta.Tag(zenai.provider.Client);
 
+/// Per-turn reasoning budget for `agent` mode, mirroring Claude's effort
+/// levels. Maps to each provider's native thinking/reasoning knob. Resolved
+/// in `Agent.init` (explicit flag > remembered > mode default), so there is
+/// no Config-level accessor like `agentVerbosity`.
+pub const Effort = zenai.provider.Effort;
+
 /// Controls how chatty `agent` mode is on stderr.
 pub const AgentVerbosity = enum {
     /// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only.
@@ -231,6 +237,7 @@ const Commands = cli.Builder(.{
             .{ .name = "task", .type = ?[]const u8 },
             .{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true },
             .{ .name = "verbosity", .type = ?AgentVerbosity },
+            .{ .name = "effort", .type = ?Effort },
             .{ .name = "list_models", .type = bool },
             .{ .name = "no_llm", .type = bool },
         },
diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig
index c66203c0..aa248d57 100644
--- a/src/agent/Agent.zig
+++ b/src/agent/Agent.zig
@@ -109,6 +109,8 @@ active_script_runtime: ?*ScriptRuntime = null,
 messages: std.ArrayList(zenai.provider.Message),
 message_arena: std.heap.ArenaAllocator,
 model: []u8,
+/// Per-turn reasoning budget for LLM turns. Mutable at runtime via `/effort`.
+effort: Config.Effort,
 system_prompt: []const u8,
 script_file: ?[]const u8,
 one_shot_task: ?[]const u8,
@@ -140,6 +142,16 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re
     return "";
 }
 
+/// Precedence: explicit `--effort` flag > remembered `.lp-agent.zon` value >
+/// mode default. The interactive REPL defaults to `.low` so turns stay snappy;
+/// one-shot `--task` and script runs default to `.medium`, where answer
+/// quality matters more than per-turn latency.
+fn resolveEffort(opts: Config.Agent, remembered: ?settings.Remembered, will_repl: bool) Config.Effort {
+    if (opts.effort) |e| return e;
+    if (remembered) |r| if (r.effort) |e| return e;
+    return if (will_repl) .low else .medium;
+}
+
 const ReconciledModel = union(enum) {
     /// Owned by the allocator passed to reconcileModel.
     use: []u8,
@@ -258,9 +270,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
         }
     }
 
+    const effort = resolveEffort(opts, remembered, will_repl);
+
     if (resolved) |r| {
         if (r.source == .picked) {
-            settings.saveRemembered(r.credentials.provider, model) catch {};
+            settings.saveRemembered(.{ .provider = r.credentials.provider, .model = model, .effort = effort }) catch {};
         }
         // provider/model now live in the status bar; just space before the help
         std.debug.print("\n", .{});
@@ -291,6 +305,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
         .messages = .empty,
         .message_arena = .init(allocator),
         .model = model,
+        .effort = effort,
         .system_prompt = opts.system_prompt orelse default_system_prompt,
         .script_file = opts.script_file,
         .one_shot_task = opts.task,
@@ -617,6 +632,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand.
         .quit => return true,
         .help => self.printSlashHelp(arena, rest),
         .verbosity => self.handleVerbosity(rest),
+        .effort => self.handleEffort(rest),
         .save => self.handleSave(arena, rest),
         .load => self.handleLoad(rest),
         .model => self.handleModel(arena, rest),
@@ -638,6 +654,20 @@ fn handleVerbosity(self: *Agent, rest: []const u8) void {
     self.terminal.printInfo("verbosity: {s}", .{@tagName(level)});
 }
 
+fn handleEffort(self: *Agent, rest: []const u8) void {
+    if (rest.len == 0) {
+        self.terminal.printInfo("effort: {s}", .{@tagName(self.effort)});
+        return;
+    }
+    const level = std.meta.stringToEnum(Config.Effort, rest) orelse {
+        self.terminal.printError("usage: /effort <none|minimal|low|medium|high|xhigh> (got {s})", .{rest});
+        return;
+    };
+    self.effort = level;
+    self.updateStatusBar();
+    self.reportSaved("effort", @tagName(level));
+}
+
 fn handleLoad(self: *Agent, rest: []const u8) void {
     const path = std.mem.trim(u8, rest, &std.ascii.whitespace);
     if (path.len == 0) {
@@ -684,19 +714,26 @@ fn containsString(haystack: []const []const u8, needle: []const u8) bool {
     return false;
 }
 
+/// Persist the current provider/model/effort to `.lp-agent.zon` and report it
+/// as "<label>: <value>", appending "(saved to …)" when the write succeeds.
+/// Reports without saving when there are no model credentials (basic REPL).
+fn reportSaved(self: *Agent, label: []const u8, value: []const u8) void {
+    const c = self.model_credentials orelse {
+        self.terminal.printInfo("{s}: {s}", .{ label, value });
+        return;
+    };
+    if (settings.saveRemembered(.{ .provider = c.provider, .model = self.model, .effort = self.effort })) {
+        self.terminal.printInfo("{s}: {s} (saved to {s})", .{ label, value, settings.remembered_path });
+    } else |_| {
+        self.terminal.printInfo("{s}: {s}", .{ label, value });
+    }
+}
+
 fn setModel(self: *Agent, model: []const u8) !void {
     const new_model = try self.allocator.dupe(u8, model);
     self.allocator.free(self.model);
     self.model = new_model;
-    const c = self.model_credentials orelse {
-        self.terminal.printInfo("model: {s}", .{self.model});
-        return;
-    };
-    if (settings.saveRemembered(c.provider, self.model)) {
-        self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
-    } else |_| {
-        self.terminal.printInfo("model: {s}", .{self.model});
-    }
+    self.reportSaved("model", self.model);
 }
 
 fn updateStatusBar(self: *Agent) void {
@@ -707,8 +744,10 @@ fn updateStatusBar(self: *Agent) void {
         });
         return;
     }
+    var status_buf: [256]u8 = undefined;
+    const left_text = std.fmt.bufPrint(&status_buf, "{s} · {s}", .{ self.model, @tagName(self.effort) }) catch self.model;
     self.terminal.setStatus(&.{
-        .{ .text = self.model, .side = .left, .rank = 3 },
+        .{ .text = left_text, .side = .left, .rank = 3 },
         .{ .text = "! JS", .side = .right, .rank = 2 },
         .{ .text = "Tab completes", .side = .right, .rank = 1 },
         .{ .text = "/help", .side = .right, .rank = 4 },
@@ -755,11 +794,7 @@ fn setProvider(self: *Agent, credentials: Credentials) !void {
     self.allocator.free(self.model);
     self.model = new_model;
     self.terminal.printInfo("provider: {s}", .{@tagName(credentials.provider)});
-    if (settings.saveRemembered(credentials.provider, self.model)) {
-        self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
-    } else |_| {
-        self.terminal.printInfo("model: {s}", .{self.model});
-    }
+    self.reportSaved("model", self.model);
     _ = completionModels(self, self.allocator);
 }
 
@@ -960,7 +995,7 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
             .max_turns = 1,
             .max_tokens = 8192,
             .tool_choice = .none,
-            .thinking_level = .medium,
+            .effort = .medium,
             .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
         },
     ) catch |err| {
@@ -1108,6 +1143,10 @@ fn printSlashHelp(self: *Agent, arena: std.mem.Allocator, target: []const u8) vo
                 "/verbosity <low|medium|high> — set REPL agent verbosity (currently: {s}). Bare /verbosity prints the level.",
                 .{@tagName(self.terminal.verbosity)},
             ),
+            .effort => self.terminal.printInfo(
+                "/effort <none|minimal|low|medium|high|xhigh> — set per-turn reasoning effort (currently: {s}); saved to {s}. Bare /effort prints the level.",
+                .{ @tagName(self.effort), settings.remembered_path },
+            ),
             .save => self.terminal.printInfo(
                 "/save [filename.js] [prompt] — save the session to [filename.js] (a random session-*.js if omitted). With an LLM, synthesizes an idiomatic script from the session and the optional prompt; with --no-llm, dumps the recorded actions verbatim.",
                 .{},
@@ -1415,9 +1454,10 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
             .max_tool_calls = 200,
             .max_tokens = 4096,
             .tool_choice = .auto,
-            // Cap per-turn reasoning so thinking models don't burn
-            // minutes per turn. Ignored by non-thinking models.
-            .thinking_level = .medium,
+            // Per-turn reasoning budget; resolved from --effort / .lp-agent.zon
+            // / mode default and adjustable at runtime via /effort. Ignored by
+            // non-thinking models.
+            .effort = self.effort,
             .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
         },
     ) catch |err| {
@@ -1495,7 +1535,7 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
                 .tool_choice = .none,
                 // .low (≈512 tokens) so reasoning models still pick an answer
                 // but can't burn the whole turn on thinking and emit nothing.
-                .thinking_level = .low,
+                .effort = .low,
                 .cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
             },
         ) catch |err| {
diff --git a/src/agent/SlashCommand.zig b/src/agent/SlashCommand.zig
index 57a6ff73..0d336ff1 100644
--- a/src/agent/SlashCommand.zig
+++ b/src/agent/SlashCommand.zig
@@ -16,8 +16,8 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/model`,
-//! `/provider`). Meta commands aren't tool slash commands — they're handled
+//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/effort`,
+//! `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled
 //! by `Agent.handleMeta` and never reach the recorder. Tool slash-command
 //! schema primitives live in `lp.Schema`; consumers should import that
 //! directly.
@@ -47,13 +47,14 @@ pub const MetaCommand = struct {
 
     /// Dispatched by `Agent.handleMeta` via an exhaustive switch so adding
     /// a new meta command is a compile error until it's wired up there too.
-    const Tag = enum { help, quit, verbosity, save, load, model, provider };
+    const Tag = enum { help, quit, verbosity, effort, save, load, model, provider };
 };
 
 pub const meta_commands = [_]MetaCommand{
     .{ .tag = .help, .name = "help", .hint = "[command]", .values = &.{}, .description = "List commands, or show help for one" },
     .{ .tag = .quit, .name = "quit", .hint = "", .values = &.{}, .description = "Exit the REPL" },
     .{ .tag = .verbosity, .name = "verbosity", .hint = "<low|medium|high>", .values = &.{ "low", "medium", "high" }, .description = "Set agent verbosity" },
+    .{ .tag = .effort, .name = "effort", .hint = "<none|minimal|low|medium|high|xhigh>", .values = &.{ "none", "minimal", "low", "medium", "high", "xhigh" }, .description = "Set per-turn reasoning effort" },
     .{ .tag = .save, .name = "save", .hint = "[filename.js] [prompt]", .values = &.{}, .description = "Save this session to a file" },
     .{ .tag = .load, .name = "load", .hint = "<path>", .values = &.{}, .description = "Load and run a script from disk" },
     .{ .tag = .model, .name = "model", .hint = "[name]", .values = &.{}, .description = "Change the model" },
diff --git a/src/agent/settings.zig b/src/agent/settings.zig
index 39dabc1b..d2aed8dc 100644
--- a/src/agent/settings.zig
+++ b/src/agent/settings.zig
@@ -108,11 +108,14 @@ pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, reme
 
 pub const remembered_path = ".lp-agent.zon";
 
-/// Last user-selected provider/model, persisted per-directory in `.lp-agent.zon`.
-/// `model` is owned by the caller.
+/// Last user-selected provider/model/effort, persisted per-directory in
+/// `.lp-agent.zon`. `model` is owned by the caller. `effort` is optional so
+/// files written before it existed still parse; null means "use the mode
+/// default" (see `Agent.resolveEffort`).
 pub const Remembered = struct {
     provider: Config.AiProvider,
     model: []const u8,
+    effort: ?Config.Effort = null,
 };
 
 pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
@@ -127,10 +130,10 @@ pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
 }
 
 /// Best-effort persist of the current selection; failures are ignored.
-pub fn saveRemembered(provider: Config.AiProvider, model: []const u8) !void {
+pub fn saveRemembered(remembered: Remembered) !void {
     var buf: [512]u8 = undefined;
     var w: std.Io.Writer = .fixed(&buf);
-    try std.zon.stringify.serialize(Remembered{ .provider = provider, .model = model }, .{}, &w);
+    try std.zon.stringify.serialize(remembered, .{}, &w);
     try std.fs.cwd().writeFile(.{ .sub_path = remembered_path, .data = w.buffered() });
 }