mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
agent: add reasoning effort configuration
Adds the `--effort` CLI flag and `/effort` REPL command to control the reasoning budget, persisting the setting in `.lp-agent.zon`.
This commit is contained in:
@@ -35,8 +35,8 @@
|
||||
.hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
|
||||
},
|
||||
.zenai = .{
|
||||
.url = "git+https://github.com/lightpanda-io/zenai.git#c8d6cfa13a93bcd41a8a34b26f9a49b1656f555a",
|
||||
.hash = "zenai-0.0.0-iOY_VNNkBAAUsSB8OAR_pDsBcxMT8k_Kc0U3b5jSKfeC",
|
||||
.url = "git+https://github.com/lightpanda-io/zenai.git#f2031a854aa0533a8bf3dfa7d0ede6cd77dc59df",
|
||||
.hash = "zenai-0.0.0-iOY_VOhjBAB5sEZuzUdokns-GYzvrbPcanneYdpJRW1V",
|
||||
},
|
||||
.isocline = .{
|
||||
.url = "git+https://github.com/arrufat/isocline?ref=lightpanda#75f26db298f679e8aec82d8fc50c7115158a63e8",
|
||||
|
||||
@@ -63,6 +63,15 @@ system prompt, and `--verbosity <low|medium|high>` to tune how much progress
|
||||
detail goes to stderr (`--task` defaults to `low`, or `high` when stderr is
|
||||
piped/redirected so harnesses capture the full `[tool/result]` trace).
|
||||
|
||||
`--effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
|
||||
budget for thinking models (it maps to each provider's native thinking /
|
||||
reasoning-effort knob and is ignored by non-thinking models). The interactive
|
||||
REPL defaults to `low` so turns stay snappy; `--task` and script runs default
|
||||
to `medium`, where answer quality matters more than per-turn latency. Higher
|
||||
effort can reduce the number of tool calls by planning better, so it's a real
|
||||
tradeoff rather than a pure slowdown. Change it live with `/effort`; the
|
||||
selection is remembered in `.lp-agent.zon`.
|
||||
|
||||
`--model` is validated against the provider's catalog up front: an unknown name
|
||||
fails fast with a pointer to `--list-models` rather than erroring mid-task. For
|
||||
Ollama, the default model is checked against what's actually pulled — if it's
|
||||
@@ -72,12 +81,12 @@ missing, the agent falls back to the first installed model (an explicit
|
||||
### Provider auto-detection
|
||||
|
||||
When `--provider` is omitted, lightpanda picks one in this order. The REPL shows
|
||||
the resolved provider and model in its status bar; the multi-key picker and any
|
||||
the resolved model and effort level in its status bar; the multi-key picker and any
|
||||
fallback notices (e.g. an Ollama default that isn't installed) print to stderr:
|
||||
|
||||
1. **Remembered** → the provider/model you last selected with `/provider` or
|
||||
`/model`, persisted per-directory in `.lp-agent.zon`, as long as its key is
|
||||
still set.
|
||||
`/model` (plus the `/effort` level), persisted per-directory in
|
||||
`.lp-agent.zon`, as long as its key is still set.
|
||||
2. **Auto-detected** → otherwise the first key found in priority order
|
||||
(`ANTHROPIC_API_KEY` → `GOOGLE_API_KEY`/`GEMINI_API_KEY` → `OPENAI_API_KEY`).
|
||||
If several keys are set and you're in an interactive REPL, the agent prompts
|
||||
@@ -295,8 +304,9 @@ See [agent-script.md](agent-script.md) for the full script format reference.
|
||||
the provider's fetched model list, and bare `/provider`/`/model` print the
|
||||
current selection — `/save [file.js]` writes the session to a script and
|
||||
`/load <path>` runs one from disk (Tab completes file paths), `/quit` exits
|
||||
the REPL, `/verbosity <low|medium|high>` tunes the log level. These are
|
||||
REPL-only and never recorded.
|
||||
the REPL, `/verbosity <low|medium|high>` tunes the log level, and
|
||||
`/effort <none|minimal|low|medium|high|xhigh>` sets the per-turn reasoning
|
||||
budget (saved to `.lp-agent.zon`). These are REPL-only and never recorded.
|
||||
```
|
||||
> /goto https://example.com
|
||||
> /findElement role=button name=Submit
|
||||
|
||||
@@ -123,6 +123,12 @@ fn dumpValidator(_: Allocator, args: *std.process.ArgIterator) !?DumpFormat {
|
||||
|
||||
pub const AiProvider = std.meta.Tag(zenai.provider.Client);
|
||||
|
||||
/// Per-turn reasoning budget for `agent` mode, mirroring Claude's effort
|
||||
/// levels. Maps to each provider's native thinking/reasoning knob. Resolved
|
||||
/// in `Agent.init` (explicit flag > remembered > mode default), so there is
|
||||
/// no Config-level accessor like `agentVerbosity`.
|
||||
pub const Effort = zenai.provider.Effort;
|
||||
|
||||
/// Controls how chatty `agent` mode is on stderr.
|
||||
pub const AgentVerbosity = enum {
|
||||
/// REPL: spinner + per-turn summary. Non-REPL: final answer + errors only.
|
||||
@@ -231,6 +237,7 @@ const Commands = cli.Builder(.{
|
||||
.{ .name = "task", .type = ?[]const u8 },
|
||||
.{ .name = "attach", .short = 'a', .type = []const u8, .multiple = true },
|
||||
.{ .name = "verbosity", .type = ?AgentVerbosity },
|
||||
.{ .name = "effort", .type = ?Effort },
|
||||
.{ .name = "list_models", .type = bool },
|
||||
.{ .name = "no_llm", .type = bool },
|
||||
},
|
||||
|
||||
@@ -109,6 +109,8 @@ active_script_runtime: ?*ScriptRuntime = null,
|
||||
messages: std.ArrayList(zenai.provider.Message),
|
||||
message_arena: std.heap.ArenaAllocator,
|
||||
model: []u8,
|
||||
/// Per-turn reasoning budget for LLM turns. Mutable at runtime via `/effort`.
|
||||
effort: Config.Effort,
|
||||
system_prompt: []const u8,
|
||||
script_file: ?[]const u8,
|
||||
one_shot_task: ?[]const u8,
|
||||
@@ -140,6 +142,16 @@ fn resolveModelName(opts: Config.Agent, resolved: ?settings.ResolvedProvider, re
|
||||
return "";
|
||||
}
|
||||
|
||||
/// Precedence: explicit `--effort` flag > remembered `.lp-agent.zon` value >
|
||||
/// mode default. The interactive REPL defaults to `.low` so turns stay snappy;
|
||||
/// one-shot `--task` and script runs default to `.medium`, where answer
|
||||
/// quality matters more than per-turn latency.
|
||||
fn resolveEffort(opts: Config.Agent, remembered: ?settings.Remembered, will_repl: bool) Config.Effort {
|
||||
if (opts.effort) |e| return e;
|
||||
if (remembered) |r| if (r.effort) |e| return e;
|
||||
return if (will_repl) .low else .medium;
|
||||
}
|
||||
|
||||
const ReconciledModel = union(enum) {
|
||||
/// Owned by the allocator passed to reconcileModel.
|
||||
use: []u8,
|
||||
@@ -258,9 +270,11 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
|
||||
}
|
||||
}
|
||||
|
||||
const effort = resolveEffort(opts, remembered, will_repl);
|
||||
|
||||
if (resolved) |r| {
|
||||
if (r.source == .picked) {
|
||||
settings.saveRemembered(r.credentials.provider, model) catch {};
|
||||
settings.saveRemembered(.{ .provider = r.credentials.provider, .model = model, .effort = effort }) catch {};
|
||||
}
|
||||
// provider/model now live in the status bar; just space before the help
|
||||
std.debug.print("\n", .{});
|
||||
@@ -291,6 +305,7 @@ pub fn init(allocator: std.mem.Allocator, app: *App, opts: Config.Agent) !*Agent
|
||||
.messages = .empty,
|
||||
.message_arena = .init(allocator),
|
||||
.model = model,
|
||||
.effort = effort,
|
||||
.system_prompt = opts.system_prompt orelse default_system_prompt,
|
||||
.script_file = opts.script_file,
|
||||
.one_shot_task = opts.task,
|
||||
@@ -617,6 +632,7 @@ fn handleMeta(self: *Agent, arena: std.mem.Allocator, meta: *const SlashCommand.
|
||||
.quit => return true,
|
||||
.help => self.printSlashHelp(arena, rest),
|
||||
.verbosity => self.handleVerbosity(rest),
|
||||
.effort => self.handleEffort(rest),
|
||||
.save => self.handleSave(arena, rest),
|
||||
.load => self.handleLoad(rest),
|
||||
.model => self.handleModel(arena, rest),
|
||||
@@ -638,6 +654,20 @@ fn handleVerbosity(self: *Agent, rest: []const u8) void {
|
||||
self.terminal.printInfo("verbosity: {s}", .{@tagName(level)});
|
||||
}
|
||||
|
||||
fn handleEffort(self: *Agent, rest: []const u8) void {
|
||||
if (rest.len == 0) {
|
||||
self.terminal.printInfo("effort: {s}", .{@tagName(self.effort)});
|
||||
return;
|
||||
}
|
||||
const level = std.meta.stringToEnum(Config.Effort, rest) orelse {
|
||||
self.terminal.printError("usage: /effort <none|minimal|low|medium|high|xhigh> (got {s})", .{rest});
|
||||
return;
|
||||
};
|
||||
self.effort = level;
|
||||
self.updateStatusBar();
|
||||
self.reportSaved("effort", @tagName(level));
|
||||
}
|
||||
|
||||
fn handleLoad(self: *Agent, rest: []const u8) void {
|
||||
const path = std.mem.trim(u8, rest, &std.ascii.whitespace);
|
||||
if (path.len == 0) {
|
||||
@@ -684,19 +714,26 @@ fn containsString(haystack: []const []const u8, needle: []const u8) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Persist the current provider/model/effort to `.lp-agent.zon` and report it
|
||||
/// as "<label>: <value>", appending "(saved to …)" when the write succeeds.
|
||||
/// Reports without saving when there are no model credentials (basic REPL).
|
||||
fn reportSaved(self: *Agent, label: []const u8, value: []const u8) void {
|
||||
const c = self.model_credentials orelse {
|
||||
self.terminal.printInfo("{s}: {s}", .{ label, value });
|
||||
return;
|
||||
};
|
||||
if (settings.saveRemembered(.{ .provider = c.provider, .model = self.model, .effort = self.effort })) {
|
||||
self.terminal.printInfo("{s}: {s} (saved to {s})", .{ label, value, settings.remembered_path });
|
||||
} else |_| {
|
||||
self.terminal.printInfo("{s}: {s}", .{ label, value });
|
||||
}
|
||||
}
|
||||
|
||||
fn setModel(self: *Agent, model: []const u8) !void {
|
||||
const new_model = try self.allocator.dupe(u8, model);
|
||||
self.allocator.free(self.model);
|
||||
self.model = new_model;
|
||||
const c = self.model_credentials orelse {
|
||||
self.terminal.printInfo("model: {s}", .{self.model});
|
||||
return;
|
||||
};
|
||||
if (settings.saveRemembered(c.provider, self.model)) {
|
||||
self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
|
||||
} else |_| {
|
||||
self.terminal.printInfo("model: {s}", .{self.model});
|
||||
}
|
||||
self.reportSaved("model", self.model);
|
||||
}
|
||||
|
||||
fn updateStatusBar(self: *Agent) void {
|
||||
@@ -707,8 +744,10 @@ fn updateStatusBar(self: *Agent) void {
|
||||
});
|
||||
return;
|
||||
}
|
||||
var status_buf: [256]u8 = undefined;
|
||||
const left_text = std.fmt.bufPrint(&status_buf, "{s} · {s}", .{ self.model, @tagName(self.effort) }) catch self.model;
|
||||
self.terminal.setStatus(&.{
|
||||
.{ .text = self.model, .side = .left, .rank = 3 },
|
||||
.{ .text = left_text, .side = .left, .rank = 3 },
|
||||
.{ .text = "! JS", .side = .right, .rank = 2 },
|
||||
.{ .text = "Tab completes", .side = .right, .rank = 1 },
|
||||
.{ .text = "/help", .side = .right, .rank = 4 },
|
||||
@@ -755,11 +794,7 @@ fn setProvider(self: *Agent, credentials: Credentials) !void {
|
||||
self.allocator.free(self.model);
|
||||
self.model = new_model;
|
||||
self.terminal.printInfo("provider: {s}", .{@tagName(credentials.provider)});
|
||||
if (settings.saveRemembered(credentials.provider, self.model)) {
|
||||
self.terminal.printInfo("model: {s} (saved to {s})", .{ self.model, settings.remembered_path });
|
||||
} else |_| {
|
||||
self.terminal.printInfo("model: {s}", .{self.model});
|
||||
}
|
||||
self.reportSaved("model", self.model);
|
||||
_ = completionModels(self, self.allocator);
|
||||
}
|
||||
|
||||
@@ -960,7 +995,7 @@ fn synthesizeSave(self: *Agent, arena: std.mem.Allocator, filename: ?[]const u8,
|
||||
.max_turns = 1,
|
||||
.max_tokens = 8192,
|
||||
.tool_choice = .none,
|
||||
.thinking_level = .medium,
|
||||
.effort = .medium,
|
||||
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
|
||||
},
|
||||
) catch |err| {
|
||||
@@ -1108,6 +1143,10 @@ fn printSlashHelp(self: *Agent, arena: std.mem.Allocator, target: []const u8) vo
|
||||
"/verbosity <low|medium|high> — set REPL agent verbosity (currently: {s}). Bare /verbosity prints the level.",
|
||||
.{@tagName(self.terminal.verbosity)},
|
||||
),
|
||||
.effort => self.terminal.printInfo(
|
||||
"/effort <none|minimal|low|medium|high|xhigh> — set per-turn reasoning effort (currently: {s}); saved to {s}. Bare /effort prints the level.",
|
||||
.{ @tagName(self.effort), settings.remembered_path },
|
||||
),
|
||||
.save => self.terminal.printInfo(
|
||||
"/save [filename.js] [prompt] — save the session to [filename.js] (a random session-*.js if omitted). With an LLM, synthesizes an idiomatic script from the session and the optional prompt; with --no-llm, dumps the recorded actions verbatim.",
|
||||
.{},
|
||||
@@ -1415,9 +1454,10 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
|
||||
.max_tool_calls = 200,
|
||||
.max_tokens = 4096,
|
||||
.tool_choice = .auto,
|
||||
// Cap per-turn reasoning so thinking models don't burn
|
||||
// minutes per turn. Ignored by non-thinking models.
|
||||
.thinking_level = .medium,
|
||||
// Per-turn reasoning budget; resolved from --effort / .lp-agent.zon
|
||||
// / mode default and adjustable at runtime via /effort. Ignored by
|
||||
// non-thinking models.
|
||||
.effort = self.effort,
|
||||
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
|
||||
},
|
||||
) catch |err| {
|
||||
@@ -1495,7 +1535,7 @@ fn processUserMessage(self: *Agent, input: TurnInput) !?[]const u8 {
|
||||
.tool_choice = .none,
|
||||
// .low (≈512 tokens) so reasoning models still pick an answer
|
||||
// but can't burn the whole turn on thinking and emit nothing.
|
||||
.thinking_level = .low,
|
||||
.effort = .low,
|
||||
.cancel = .{ .context = @ptrCast(self), .checkFn = checkCancel },
|
||||
},
|
||||
) catch |err| {
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/model`,
|
||||
//! `/provider`). Meta commands aren't tool slash commands — they're handled
|
||||
//! REPL-only meta slash commands (`/help`, `/quit`, `/verbosity`, `/effort`,
|
||||
//! `/model`, `/provider`). Meta commands aren't tool slash commands — they're handled
|
||||
//! by `Agent.handleMeta` and never reach the recorder. Tool slash-command
|
||||
//! schema primitives live in `lp.Schema`; consumers should import that
|
||||
//! directly.
|
||||
@@ -47,13 +47,14 @@ pub const MetaCommand = struct {
|
||||
|
||||
/// Dispatched by `Agent.handleMeta` via an exhaustive switch so adding
|
||||
/// a new meta command is a compile error until it's wired up there too.
|
||||
const Tag = enum { help, quit, verbosity, save, load, model, provider };
|
||||
const Tag = enum { help, quit, verbosity, effort, save, load, model, provider };
|
||||
};
|
||||
|
||||
pub const meta_commands = [_]MetaCommand{
|
||||
.{ .tag = .help, .name = "help", .hint = "[command]", .values = &.{}, .description = "List commands, or show help for one" },
|
||||
.{ .tag = .quit, .name = "quit", .hint = "", .values = &.{}, .description = "Exit the REPL" },
|
||||
.{ .tag = .verbosity, .name = "verbosity", .hint = "<low|medium|high>", .values = &.{ "low", "medium", "high" }, .description = "Set agent verbosity" },
|
||||
.{ .tag = .effort, .name = "effort", .hint = "<none|minimal|low|medium|high|xhigh>", .values = &.{ "none", "minimal", "low", "medium", "high", "xhigh" }, .description = "Set per-turn reasoning effort" },
|
||||
.{ .tag = .save, .name = "save", .hint = "[filename.js] [prompt]", .values = &.{}, .description = "Save this session to a file" },
|
||||
.{ .tag = .load, .name = "load", .hint = "<path>", .values = &.{}, .description = "Load and run a script from disk" },
|
||||
.{ .tag = .model, .name = "model", .hint = "[name]", .values = &.{}, .description = "Change the model" },
|
||||
|
||||
@@ -108,11 +108,14 @@ pub fn resolveCredentials(allocator: std.mem.Allocator, opts: Config.Agent, reme
|
||||
|
||||
pub const remembered_path = ".lp-agent.zon";
|
||||
|
||||
/// Last user-selected provider/model, persisted per-directory in `.lp-agent.zon`.
|
||||
/// `model` is owned by the caller.
|
||||
/// Last user-selected provider/model/effort, persisted per-directory in
|
||||
/// `.lp-agent.zon`. `model` is owned by the caller. `effort` is optional so
|
||||
/// files written before it existed still parse; null means "use the mode
|
||||
/// default" (see `Agent.resolveEffort`).
|
||||
pub const Remembered = struct {
|
||||
provider: Config.AiProvider,
|
||||
model: []const u8,
|
||||
effort: ?Config.Effort = null,
|
||||
};
|
||||
|
||||
pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
|
||||
@@ -127,10 +130,10 @@ pub fn loadRemembered(allocator: std.mem.Allocator) ?Remembered {
|
||||
}
|
||||
|
||||
/// Best-effort persist of the current selection; failures are ignored.
|
||||
pub fn saveRemembered(provider: Config.AiProvider, model: []const u8) !void {
|
||||
pub fn saveRemembered(remembered: Remembered) !void {
|
||||
var buf: [512]u8 = undefined;
|
||||
var w: std.Io.Writer = .fixed(&buf);
|
||||
try std.zon.stringify.serialize(Remembered{ .provider = provider, .model = model }, .{}, &w);
|
||||
try std.zon.stringify.serialize(remembered, .{}, &w);
|
||||
try std.fs.cwd().writeFile(.{ .sub_path = remembered_path, .data = w.buffered() });
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user