agent: add reasoning effort configuration

Adds the `--effort` CLI flag and `/effort` REPL command to control
the reasoning budget, persisting the setting in `.lp-agent.zon`.
This commit is contained in:
Adrià Arrufat
2026-06-05 12:40:34 +02:00
parent 76504604ba
commit 5f2330bb2f
6 changed files with 96 additions and 35 deletions

View File

@@ -35,8 +35,8 @@
.hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
},
.zenai = .{
.url = "git+https://github.com/lightpanda-io/zenai.git#c8d6cfa13a93bcd41a8a34b26f9a49b1656f555a",
.hash = "zenai-0.0.0-iOY_VNNkBAAUsSB8OAR_pDsBcxMT8k_Kc0U3b5jSKfeC",
.url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df",
.hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V",
},
.isocline = .{
.url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8",

View File

@@ -63,6 +63,15 @@ system prompt, and `--verbosity <low|medium|high>` to tune how much progress
detail goes to stderr (`--task` defaults to `low`, or `high` when stderr is
piped/redirected so harnesses capture the full `[tool/result]` trace).
`--effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
budget for thinking models (it maps to each provider's native thinking /
reasoning-effort knob and is ignored by non-thinking models). The interactive
REPL defaults to `low` so turns stay snappy; `--task` and script runs default
to `medium`, where answer quality matters more than per-turn latency. Higher
effort can reduce the number of tool calls by planning better, so it's a real
tradeoff rather than a pure slowdown. Change it live with `/effort`; the
selection is remembered in `.lp-agent.zon`.
`--model` is validated against the provider's catalog up front: an unknown name
fails fast with a pointer to `--list-models` rather than erroring mid-task. For
Ollama, the default model is checked against what's actually pulled — if it's
@@ -72,12 +81,12 @@ missing, the agent falls back to the first installed model (an explicit
### Provider auto-detection
When `--provider` is omitted, lightpanda picks one in this order. The REPL shows
the resolved provider and model in its status bar; the multi-key picker and any
the resolved model and effort level in its status bar; the multi-key picker and any
fallback notices (e.g. an Ollama default that isn't installed) print to stderr:
1. **Remembered** → the provider/model you last selected with `/provider` or
`/model`, persisted per-directory in `.lp-agent.zon`, as long as its key is
still set.
`/model` (plus the `/effort` level), persisted per-directory in
`.lp-agent.zon`, as long as its key is still set.
2. **Auto-detected** → otherwise the first key found in priority order
(`ANTHROPIC_API_KEY``GOOGLE_API_KEY`/`GEMINI_API_KEY``OPENAI_API_KEY`).
If several keys are set and you're in an interactive REPL, the agent prompts
@@ -295,8 +304,9 @@ See [agent-script.md](agent-script.md) for the full script format reference.
the provider's fetched model list, and bare `/provider`/`/model` print the
current selection — `/save [file.js]` writes the session to a script and
`/load <path>` runs one from disk (Tab completes file paths), `/quit` exits
the REPL, `/verbosity <low|medium|high>` tunes the log level. These are
REPL-only and never recorded.
the REPL, `/verbosity <low|medium|high>` tunes the log level, and
`/effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded.
```
> /goto https://example.com
> /findElement role=button name=Submit

View File

@@ -123,6 +123,12 @@ fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat {
pub const AiProvider = std.meta.Tag(zenai.provider.Client);
/// Per-turn reasoning budget for `agent` mode, mirroring Claude's effort
/// levels. Maps to each provider's native thinking/reasoning knob. Resolved
/// in `Agent.init` (explicit flag > remembered > mode default), so there is
/// no Config-level accessor like `agentVerbosity`.
pub const Effort = zenai.provider.Effort;
/// Controls how chatty `agent` mode is on stderr.
pub const AgentVerbosity = enum {
/// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only.
@@ -231,6 +237,7 @@ const Commands = cli.Builder(.{
.{ .name = "task", .type = ?[]const u8 },
.{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true },
.{ .name = "verbosity", .type = ?AgentVerbosity },
.{ .name = "effort", .type = ?Effort },
.{ .name = "list_models", .type = bool },
.{ .name = "no_llm", .type = bool },
},

View File

@@ -109,6 +109,8 @@ active_script_runtime: ?*ScriptRuntime = null,
messages: std.ArrayList(zenai.provider.Message),
message_arena: std.heap.ArenaAllocator,
model: []u8,
/// Per-turn reasoning budget for LLM turns. Mutable at runtime via `/effort`.
effort: Config.Effort,
system_prompt: []const u8,
script_file: ?[]const u8,
one_shot_task: ?[]const u8,
@@ -140,6 +142,16 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re
return "";
}
/// Precedence: explicit `--effort` flag > remembered `.lp-agent.zon` value >
/// mode default. The interactive REPL defaults to `.low` so turns stay snappy;
/// one-shot `--task` and script runs default to `.medium`, where answer
/// quality matters more than per-turn latency.
fn resolveEffort(opts: Config.Agent, remembered: ?settings.Remembered, will_repl: bool) Config.Effort {
if (opts.effort) |e| return e;
if (remembered) |r| if (r.effort) |e| return e;
return if (will_repl) .low else .medium;
}
const ReconciledModel = union(enum) {
/// Owned by the allocator passed to reconcileModel.
use: []u8,
@@ -258,9 +270,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
}
}
const effort = resolveEffort(opts, remembered, will_repl);
if (resolved) |r| {
if (r.source == .picked) {
settings.saveRemembered(r.credentials.provider, model) catch {};
settings.saveRemembered(.{ .provider = r.credentials.provider, .model = model, .effort = effort }) catch {};
}
// provider/model now live in the status bar; just space before the help
std.debug.print("\n", .{});
@@ -291,6 +305,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
.messages = .empty,
.message_arena = .init(allocator),
.model = model,
.effort = effort,
.system_prompt = opts.system_prompt orelse default_system_prompt,
.script_file = opts.script_file,
.one_shot_task = opts.task,
@@ -617,6 +632,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand.
.quit => return true,
.help => self.printSlashHelp(arena, rest),
.verbosity => self.handleVerbosity(rest),
.effort => self.handleEffort(rest),
.save => self.handleSave(arena, rest),
.load => self.handleLoad(rest),
.model => self.handleModel(arena, rest),
@@ -638,6 +654,20 @@ fn handleVerbosity(self: *Agent, rest: []const u8) void {
self.terminal.printInfo("verbosity: {s}", .{@tagName(level)});
}
fn handleEffort(self: *Agent, rest: []const u8) void {
if (rest.len == 0) {
self.terminal.printInfo("effort: {s}", .{@tagName(self.effort)});
return;
}
const level = std.meta.stringToEnum(Config.Effort, rest) orelse {
self.terminal.printError("usage: /effort <none|minimal|low|medium|high|xhigh> (got {s})", .{rest});
return;
};
self.effort = level;
self.updateStatusBar();
self.reportSaved("effort", @tagName(level));
}
fn handleLoad(self: *Agent, rest: []const u8) void {
const path = std.mem.trim(u8, rest, &std.ascii.whitespace);
if (path.len == 0) {
@@ -684,19 +714,26 @@ fn containsString(haystack: []const []const u8, needle: []const u8) bool {
return false;
}
/// Persist the current provider/model/effort to `.lp-agent.zon` and report it
/// as "<label>: <value>", appending "(saved to …)" when the write succeeds.
/// Reports without saving when there are no model credentials (basic REPL).
fn reportSaved(self: *Agent, label: []const u8, value: []const u8) void {
const c = self.model_credentials orelse {
self.terminal.printInfo("{s}: {s}", .{ label, value });
return;
};
if (settings.saveRemembered(.{ .provider = c.provider, .model = self.model, .effort = self.effort })) {
self.terminal.printInfo("{s}: {s} (saved to {s})", .{ label, value, settings.remembered_path });
} else |_| {
self.terminal.printInfo("{s}: {s}", .{ label, value });
}
}
fn setModel(self: *Agent, model: []const u8) !void {
const new_model = try self.allocator.dupe(u8, model);
self.allocator.free(self.model);
self.model = new_model;
const c = self.model_credentials orelse {
self.terminal.printInfo("model: {s}", .{self.model});
return;
};
if (settings.saveRemembered(c.provider, self.model)) {
self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
} else |_| {
self.terminal.printInfo("model: {s}", .{self.model});
}
self.reportSaved("model", self.model);
}
fn updateStatusBar(self: *Agent) void {
@@ -707,8 +744,10 @@ fn updateStatusBar(self: *Agent) void {
});
return;
}
var status_buf: [256]u8 = undefined;
const left_text = std.fmt.bufPrint(&status_buf, "{s} · {s}", .{ self.model, @tagName(self.effort) }) catch self.model;
self.terminal.setStatus(&.{
.{ .text = self.model, .side = .left, .rank = 3 },
.{ .text = left_text, .side = .left, .rank = 3 },
.{ .text = "! JS", .side = .right, .rank = 2 },
.{ .text = "Tab completes", .side = .right, .rank = 1 },
.{ .text = "/help", .side = .right, .rank = 4 },
@@ -755,11 +794,7 @@ fn setProvider(self: *Agent, credentials: Credentials) !void {
self.allocator.free(self.model);
self.model = new_model;
self.terminal.printInfo("provider: {s}", .{@tagName(credentials.provider)});
if (settings.saveRemembered(credentials.provider, self.model)) {
self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
} else |_| {
self.terminal.printInfo("model: {s}", .{self.model});
}
self.reportSaved("model", self.model);
_ = completionModels(self, self.allocator);
}
@@ -960,7 +995,7 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
.max_turns = 1,
.max_tokens = 8192,
.tool_choice = .none,
.thinking_level = .medium,
.effort = .medium,
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
},
) catch |err| {
@@ -1108,6 +1143,10 @@ fn printSlashHelp(self: *Agent, arena: std.mem.Allocator, target: []const u8) vo
"/verbosity <low|medium|high> — set REPL agent verbosity (currently: {s}). Bare /verbosity prints the level.",
.{@tagName(self.terminal.verbosity)},
),
.effort => self.terminal.printInfo(
"/effort <none|minimal|low|medium|high|xhigh> — set per-turn reasoning effort (currently: {s}); saved to {s}. Bare /effort prints the level.",
.{ @tagName(self.effort), settings.remembered_path },
),
.save => self.terminal.printInfo(
"/save [filename.js] [prompt] — save the session to [filename.js] (a random session-*.js if omitted). With an LLM, synthesizes an idiomatic script from the session and the optional prompt; with --no-llm, dumps the recorded actions verbatim.",
.{},
@@ -1415,9 +1454,10 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
.max_tool_calls = 200,
.max_tokens = 4096,
.tool_choice = .auto,
// Cap per-turn reasoning so thinking models don't burn
// minutes per turn. Ignored by non-thinking models.
.thinking_level = .medium,
// Per-turn reasoning budget; resolved from --effort / .lp-agent.zon
// / mode default and adjustable at runtime via /effort. Ignored by
// non-thinking models.
.effort = self.effort,
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
},
) catch |err| {
@@ -1495,7 +1535,7 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
.tool_choice = .none,
// .low (≈512 tokens) so reasoning models still pick an answer
// but can't burn the whole turn on thinking and emit nothing.
.thinking_level = .low,
.effort = .low,
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
},
) catch |err| {

View File

@@ -16,8 +16,8 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/model`,
//! `/provider`). Meta commands aren't tool slash commands — they're handled
//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/effort`,
//! `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled
//! by `Agent.handleMeta` and never reach the recorder. Tool slash-command
//! schema primitives live in `lp.Schema`; consumers should import that
//! directly.
@@ -47,13 +47,14 @@ pub const MetaCommand = struct {
/// Dispatched by `Agent.handleMeta` via an exhaustive switch so adding
/// a new meta command is a compile error until it's wired up there too.
const Tag = enum { help, quit, verbosity, save, load, model, provider };
const Tag = enum { help, quit, verbosity, effort, save, load, model, provider };
};
pub const meta_commands = [_]MetaCommand{
.{ .tag = .help, .name = "help", .hint = "[command]", .values = &.{}, .description = "List commands, or show help for one" },
.{ .tag = .quit, .name = "quit", .hint = "", .values = &.{}, .description = "Exit the REPL" },
.{ .tag = .verbosity, .name = "verbosity", .hint = "<low|medium|high>", .values = &.{ "low", "medium", "high" }, .description = "Set agent verbosity" },
.{ .tag = .effort, .name = "effort", .hint = "<none|minimal|low|medium|high|xhigh>", .values = &.{ "none", "minimal", "low", "medium", "high", "xhigh" }, .description = "Set per-turn reasoning effort" },
.{ .tag = .save, .name = "save", .hint = "[filename.js] [prompt]", .values = &.{}, .description = "Save this session to a file" },
.{ .tag = .load, .name = "load", .hint = "<path>", .values = &.{}, .description = "Load and run a script from disk" },
.{ .tag = .model, .name = "model", .hint = "[name]", .values = &.{}, .description = "Change the model" },

View File

@@ -108,11 +108,14 @@ pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, reme
pub const remembered_path = ".lp-agent.zon";
/// Last user-selected provider/model, persisted per-directory in `.lp-agent.zon`.
/// `model` is owned by the caller.
/// Last user-selected provider/model/effort, persisted per-directory in
/// `.lp-agent.zon`. `model` is owned by the caller. `effort` is optional so
/// files written before it existed still parse; null means "use the mode
/// default" (see `Agent.resolveEffort`).
pub const Remembered = struct {
provider: Config.AiProvider,
model: []const u8,
effort: ?Config.Effort = null,
};
pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
@@ -127,10 +130,10 @@ pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
}
/// Best-effort persist of the current selection; failures are ignored.
pub fn saveRemembered(provider: Config.AiProvider, model: []const u8) !void {
pub fn saveRemembered(remembered: Remembered) !void {
var buf: [512]u8 = undefined;
var w: std.Io.Writer = .fixed(&buf);
try std.zon.stringify.serialize(Remembered{ .provider = provider, .model = model }, .{}, &w);
try std.zon.stringify.serialize(remembered, .{}, &w);
try std.fs.cwd().writeFile(.{ .sub_path = remembered_path, .data = w.buffered() });
}