From 5f2330bb2fff3f9f0d3c9ce0c2af16705488fa00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Fri, 5 Jun 2026 12:40:34 +0200 Subject: [PATCH] agent: add reasoning effort configuration Adds the `--effort` CLI flag and `/effort` REPL command to control the reasoning budget, persisting the setting in `.lp-agent.zon`. --- build.zig.zon | 4 +- docs/agent.md | 20 +++++++--- src/Config.zig | 7 ++++ src/agent/Agent.zig | 82 ++++++++++++++++++++++++++++---------- src/agent/SlashCommand.zig | 7 ++-- src/agent/settings.zig | 11 +++-- 6 files changed, 96 insertions(+), 35 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 07c019ff..b3490632 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -35,8 +35,8 @@ .hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr", }, .zenai = .{ - .url = "git+https://github.com/lightpanda-io/zenai.git#c8d6cfa13a93bcd41a8a34b26f9a49b1656f555a", - .hash = "zenai-0.0.0-iOY_VNNkBAAUsSB8OAR_pDsBcxMT8k_Kc0U3b5jSKfeC", + .url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df", + .hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V", }, .isocline = .{ .url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8", diff --git a/docs/agent.md b/docs/agent.md index 19c6feae..99cf416f 100644 --- a/docs/agent.md +++ b/docs/agent.md @@ -63,6 +63,15 @@ system prompt, and `--verbosity ` to tune how much progress detail goes to stderr (`--task` defaults to `low`, or `high` when stderr is piped/redirected so harnesses capture the full `[tool/result]` trace). +`--effort ` sets the per-turn reasoning +budget for thinking models (it maps to each provider's native thinking / +reasoning-effort knob and is ignored by non-thinking models). The interactive +REPL defaults to `low` so turns stay snappy; `--task` and script runs default +to `medium`, where answer quality matters more than per-turn latency. Higher +effort can reduce the number of tool calls by planning better, so it's a real +tradeoff rather than a pure slowdown. Change it live with `/effort`; the +selection is remembered in `.lp-agent.zon`. + `--model` is validated against the provider's catalog up front: an unknown name fails fast with a pointer to `--list-models` rather than erroring mid-task. For Ollama, the default model is checked against what's actually pulled — if it's @@ -72,12 +81,12 @@ missing, the agent falls back to the first installed model (an explicit ### Provider auto-detection When `--provider` is omitted, lightpanda picks one in this order. The REPL shows -the resolved provider and model in its status bar; the multi-key picker and any +the resolved model and effort level in its status bar; the multi-key picker and any fallback notices (e.g. an Ollama default that isn't installed) print to stderr: 1. **Remembered** → the provider/model you last selected with `/provider` or - `/model`, persisted per-directory in `.lp-agent.zon`, as long as its key is - still set. + `/model` (plus the `/effort` level), persisted per-directory in + `.lp-agent.zon`, as long as its key is still set. 2. **Auto-detected** → otherwise the first key found in priority order (`ANTHROPIC_API_KEY` → `GOOGLE_API_KEY`/`GEMINI_API_KEY` → `OPENAI_API_KEY`). If several keys are set and you're in an interactive REPL, the agent prompts @@ -295,8 +304,9 @@ See [agent-script.md](agent-script.md) for the full script format reference. the provider's fetched model list, and bare `/provider`/`/model` print the current selection — `/save [file.js]` writes the session to a script and `/load ` runs one from disk (Tab completes file paths), `/quit` exits - the REPL, `/verbosity ` tunes the log level. These are - REPL-only and never recorded. + the REPL, `/verbosity ` tunes the log level, and + `/effort ` sets the per-turn reasoning + budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded. ``` > /goto https://example.com > /findElement role=button name=Submit diff --git a/src/Config.zig b/src/Config.zig index 6a10feac..0920698a 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -123,6 +123,12 @@ fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat { pub const AiProvider = std.meta.Tag(zenai.provider.Client); +/// Per-turn reasoning budget for `agent` mode, mirroring Claude's effort +/// levels. Maps to each provider's native thinking/reasoning knob. Resolved +/// in `Agent.init` (explicit flag > remembered > mode default), so there is +/// no Config-level accessor like `agentVerbosity`. +pub const Effort = zenai.provider.Effort; + /// Controls how chatty `agent` mode is on stderr. pub const AgentVerbosity = enum { /// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only. @@ -231,6 +237,7 @@ const Commands = cli.Builder(.{ .{ .name = "task", .type = ?[]const u8 }, .{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true }, .{ .name = "verbosity", .type = ?AgentVerbosity }, + .{ .name = "effort", .type = ?Effort }, .{ .name = "list_models", .type = bool }, .{ .name = "no_llm", .type = bool }, }, diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig index c66203c0..aa248d57 100644 --- a/src/agent/Agent.zig +++ b/src/agent/Agent.zig @@ -109,6 +109,8 @@ active_script_runtime: ?*ScriptRuntime = null, messages: std.ArrayList(zenai.provider.Message), message_arena: std.heap.ArenaAllocator, model: []u8, +/// Per-turn reasoning budget for LLM turns. Mutable at runtime via `/effort`. +effort: Config.Effort, system_prompt: []const u8, script_file: ?[]const u8, one_shot_task: ?[]const u8, @@ -140,6 +142,16 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re return ""; } +/// Precedence: explicit `--effort` flag > remembered `.lp-agent.zon` value > +/// mode default. The interactive REPL defaults to `.low` so turns stay snappy; +/// one-shot `--task` and script runs default to `.medium`, where answer +/// quality matters more than per-turn latency. +fn resolveEffort(opts: Config.Agent, remembered: ?settings.Remembered, will_repl: bool) Config.Effort { + if (opts.effort) |e| return e; + if (remembered) |r| if (r.effort) |e| return e; + return if (will_repl) .low else .medium; +} + const ReconciledModel = union(enum) { /// Owned by the allocator passed to reconcileModel. use: []u8, @@ -258,9 +270,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent } } + const effort = resolveEffort(opts, remembered, will_repl); + if (resolved) |r| { if (r.source == .picked) { - settings.saveRemembered(r.credentials.provider, model) catch {}; + settings.saveRemembered(.{ .provider = r.credentials.provider, .model = model, .effort = effort }) catch {}; } // provider/model now live in the status bar; just space before the help std.debug.print("\n", .{}); @@ -291,6 +305,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent .messages = .empty, .message_arena = .init(allocator), .model = model, + .effort = effort, .system_prompt = opts.system_prompt orelse default_system_prompt, .script_file = opts.script_file, .one_shot_task = opts.task, @@ -617,6 +632,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand. .quit => return true, .help => self.printSlashHelp(arena, rest), .verbosity => self.handleVerbosity(rest), + .effort => self.handleEffort(rest), .save => self.handleSave(arena, rest), .load => self.handleLoad(rest), .model => self.handleModel(arena, rest), @@ -638,6 +654,20 @@ fn handleVerbosity(self: *Agent, rest: []const u8) void { self.terminal.printInfo("verbosity: {s}", .{@tagName(level)}); } +fn handleEffort(self: *Agent, rest: []const u8) void { + if (rest.len == 0) { + self.terminal.printInfo("effort: {s}", .{@tagName(self.effort)}); + return; + } + const level = std.meta.stringToEnum(Config.Effort, rest) orelse { + self.terminal.printError("usage: /effort (got {s})", .{rest}); + return; + }; + self.effort = level; + self.updateStatusBar(); + self.reportSaved("effort", @tagName(level)); +} + fn handleLoad(self: *Agent, rest: []const u8) void { const path = std.mem.trim(u8, rest, &std.ascii.whitespace); if (path.len == 0) { @@ -684,19 +714,26 @@ fn containsString(haystack: []const []const u8, needle: []const u8) bool { return false; } +/// Persist the current provider/model/effort to `.lp-agent.zon` and report it +/// as "