Improve extraction (#2577)

* tools: add session-scoped bridge store

Exposes `globalThis.lp` to `/eval` calls, allowing state to persist
across evaluations and page navigations. Adds a `save` parameter to
both `/eval` and `/extract` to store results in the bridge.

* browser: await promises in eval and support inline args

- Await JS Promises in `eval` tool with a 30s timeout
- Support inline arguments in multi-line slash commands
- Silence output on successful `save=`
- Add `limit` option to extract schema walker

* eval: return empty text for undefined async IIFE

* extract: support limit on simple string arrays

Treats `["<sel>"]` as sugar for `[{"selector": "<sel>"}]` in the schema
walker. This enables the `"limit"` option on simple string arrays.
Also updates agent documentation to cover cross-call state with `lp.*`.

* refactor: optimize bridge store and schema lookup

- Introduce `bridgeStorePut` to skip redundant JSON validation for
  trusted stringified values in `bridgeSync`.
- Store the schema pointer in `BlockOpener` to avoid re-parsing and
  looking up the schema in `Iterator.next`.
- Clean up error handling and optional unwrapping in `execEval`.
This commit is contained in:
Adrià Arrufat
2026-05-29 17:15:21 +02:00
committed by GitHub
parent 0a107e07a2
commit 9689aa0412
8 changed files with 666 additions and 14 deletions

View File

@@ -131,10 +131,14 @@ as a single JSON object. Supported value forms:
- `"<sel>"` — `textContent.trim()` of the first match (string or `null`).
- `""` — the matched element's own text (only inside a `fields` block).
- `["<sel>"]` — text of every match (string array).
- `["<sel>"]` — text of every match (string array). Sugar for
`[{"selector": "<sel>"}]`.
- `{"selector": "<sel>", "attr": "<name>"}` — attribute of the first match.
- `[{"selector": "<sel>", "fields": {…}}]` — array of records, each
`fields` value resolved relative to the matched element.
- Add `"limit": N` inside any array's object spec to cap matches at N
(works for text, attribute, and `fields` shapes — e.g.
`[{"selector": ".story .title", "limit": 5}]` for top 5 titles).
Use `/extract '''…'''` (or `"""…"""`) to spread a schema across multiple
lines. The schema is parsed in Zig before the page-side walker runs,
@@ -142,6 +146,79 @@ so a malformed schema fails with `Error: invalid /extract schema JSON`
rather than a V8 stack trace. See [agent-tutorial.md](agent-tutorial.md)
section 3 for a worked example against Hacker News.
### Cross-call state with `lp.*`
`/extract` and `/eval` each return one value per call, but real scrapes
often need to carry data forward — capture a list on one page, then walk
it across navigations. Two primitives keep that simple.
**`save=<name>`** on `/extract` or `/eval` stashes the result in a
Session-scoped store keyed by `<name>` instead of dumping it to stdout.
The stored value is then exposed to every subsequent `/eval` as
`globalThis.lp.<name>`:
```pandascript
/goto 'https://news.ycombinator.com/'
/extract save=front '''
{
"stories": [{
"selector": "tr.athing",
"limit": 5,
"fields": {
"id": {"attr": "id"},
"title": ".titleline > a"
}
}]
}
'''
/eval '''
console.log(lp.front.stories[0].title);
'''
```
`save=`d commands print nothing on success so scripts pipe cleanly.
**Auto-sync.** Any mutation of `lp.*` inside an `/eval` is persisted at
the end of the call. Adding a key (`lp.x = …`), updating a nested value
(`lp.front.stories[0].comments = […]`), or removing a key
(`delete lp.x`) all propagate to the store. The next `/eval` sees the
update — even after a navigation, because the store lives Session-side,
not on the page.
**Async eval.** If your `/eval` body returns a Promise, `runEval`
pumps the event loop until it settles, then surfaces the resolved value
(or the rejection as an error). Combined with the bridge this lets a
single `/eval` do an async `fetch` loop over `lp.*` data:
```pandascript
/eval '''
(async () => {
for (const s of lp.front.stories) {
const html = await fetch('/item?id=' + s.id).then(r => r.text());
const doc = new DOMParser().parseFromString(html, 'text/html');
s.comments = [...doc.querySelectorAll('tr.athing.comtr')].slice(0, 3)
.map(r => r.querySelector('.commtext')?.textContent.trim())
.filter(Boolean);
}
})()
'''
/eval '''
JSON.stringify(lp.front.stories)
'''
```
An async IIFE with no explicit `return` resolves to `undefined`, which
the eval treats as silent — so the loop above prints nothing, and only
the final `JSON.stringify` lands on stdout.
The store is **script-run scoped**: it's bound to the Session that runs
the script, and goes away when that Session does. There is no
cross-session persistence; if you need that, use `localStorage` (which
is now origin-scoped and persists across navigations within a session).
### Recording
Interactive sessions can write back to a `.lp` file:

View File

@@ -701,6 +701,7 @@ pub fn promptNumberedChoice(header: []const u8, items: []const []const u8, defau
}
pub fn printAssistant(_: *Terminal, text: []const u8) void {
if (text.len == 0) return;
const fd = std.posix.STDOUT_FILENO;
_ = std.posix.write(fd, text) catch {};
_ = std.posix.write(fd, "\n") catch {};

View File

@@ -55,6 +55,9 @@ arena: Allocator,
history: History,
navigation: Navigation,
storage_shed: storage.Shed,
// Backs `globalThis.lp.*`; values pre-stringified so the prelude splices
// them in without re-encoding.
bridge_store: std.StringHashMapUnmanaged([]const u8) = .empty,
notification: *Notification,
cookie_jar: storage.Cookie.Jar,
/// User-provided scripts to inject into header.
@@ -164,6 +167,15 @@ pub fn deinit(self: *Session) void {
self.browser.env.memoryPressureNotification(.critical);
self.storage_shed.deinit(self.browser.app.allocator);
{
const allocator = self.browser.app.allocator;
var it = self.bridge_store.iterator();
while (it.next()) |kv| {
allocator.free(kv.key_ptr.*);
allocator.free(kv.value_ptr.*);
}
self.bridge_store.deinit(allocator);
}
self._console_messages.deinit();
self.arena_pool.release(self.arena);
}

View File

@@ -49,6 +49,30 @@ pub fn thenAndCatch(self: Promise, on_fulfilled: js.Function, on_rejected: js.Fu
return error.PromiseChainFailed;
}
pub const State = enum(u32) {
pending = v8.kPending,
fulfilled = v8.kFulfilled,
rejected = v8.kRejected,
};
pub fn state(self: Promise) State {
return @enumFromInt(v8.v8__Promise__State(self.handle));
}
/// Settled value (fulfillment or rejection). Caller must check `state` first.
pub fn result(self: Promise) js.Value {
return .{
.local = self.local,
.handle = v8.v8__Promise__Result(self.handle).?,
};
}
/// Suppress the global unhandled-rejection callback when handling the
/// rejection inline.
pub fn markAsHandled(self: Promise) void {
v8.v8__Promise__MarkAsHandled(self.handle);
}
pub fn persist(self: Promise) !Global {
return self._persist(true);
}

View File

@@ -185,7 +185,7 @@ pub const Tool = enum {
.input_schema = url_params_schema,
},
.eval => .{
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first.",
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first. The `globalThis.lp` object exposes a Session-scoped bridge store: values written via `lp.foo = ...` auto-sync at end of eval, surviving navigation; values previously set via `/extract save=` or `/eval save=` appear as `lp.<name>`.",
.input_schema = minify(
\\{
\\ "type": "object",
@@ -193,7 +193,8 @@ pub const Tool = enum {
\\ "script": { "type": "string" },
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." },
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." },
\\ "save": { "type": "string", "description": "Optional bridge-store key. The eval's return value is stored under this name and re-exposed as `lp.<name>` to subsequent evals. Value must be JSON; wrap non-strings with JSON.stringify(...)." }
\\ },
\\ "required": ["script"]
\\}
@@ -204,14 +205,16 @@ pub const Tool = enum {
\\Extract structured data via a JSON schema. The only tool whose result is recorded as an `/extract` PandaScript line (replay-friendly); answering from `markdown` content in chat is not. Schema is a JSON object literal passed as a string in `schema`. Each value picks what to lift:
\\ "<sel>" → first match's textContent.trim() (string|null)
\\ "" → element's own textContent.trim() (only meaningful inside `fields`)
\\ ["<sel>"] → every match's text (string[])
\\ ["<sel>"] → every match's text (string[]) — sugar for [{"selector":"<sel>"}]
\\ {"selector":"<sel>","attr":"<name>"} → first match's attribute (string|null)
\\ [{"selector":"<sel>","attr":"<name>"}] → every match's attribute (string[])
\\ [{"selector":"<sel>","fields":{…}}] → array of objects, fields resolved relative to each match
\\ add `"limit": N` inside any array's object spec to cap matches at N (works for text, attr, and fields shapes)
\\
\\Examples (schema → result):
\\ {"karma": "#karma"} → {"karma":"42"}
\\ {"items": [".story .title"]} → {"items":["Title 1","Title 2"]}
\\ {"top3": [{"selector":".story .title","limit":3}]} → {"top3":["A","B","C"]}
\\ {"links": [{"selector":"a.title","attr":"href"}]} → {"links":["/a","/b"]}
\\ {"stories": [{"selector":".athing","fields":{"title":".titleline","rank":".rank"}}]} → {"stories":[{"title":"Foo","rank":"1"}]}
,
@@ -219,7 +222,8 @@ pub const Tool = enum {
\\{
\\ "type": "object",
\\ "properties": {
\\ "schema": { "type": "string", "description": "JSON schema object (as a string) describing what to extract. Must be a JSON object literal." }
\\ "schema": { "type": "string", "description": "JSON schema object (as a string) describing what to extract. Must be a JSON object literal." },
\\ "save": { "type": "string", "description": "Optional bridge-store key. The extracted JSON is stored under this name and exposed as `lp.<name>` in subsequent /eval calls." }
\\ },
\\ "required": ["schema"]
\\}
@@ -669,11 +673,10 @@ const schema_walker_prefix =
\\ return m ? m.textContent.trim() : null;
\\ }
\\ if (Array.isArray(v)) {
\\ const inner = v[0];
\\ if (typeof inner === 'string') {
\\ return Array.from(el.querySelectorAll(inner)).map(function(m){ return m.textContent.trim(); });
\\ }
\\ return Array.from(el.querySelectorAll(inner.selector)).map(function(m){ return valueOf(m, inner); });
\\ const inner = typeof v[0] === 'string' ? { selector: v[0] } : v[0];
\\ let matches = Array.from(el.querySelectorAll(inner.selector));
\\ if (typeof inner.limit === 'number') matches = matches.slice(0, inner.limit);
\\ return matches.map(function(m){ return valueOf(m, inner); });
\\ }
\\ const t = v.selector ? el.querySelector(v.selector) : el;
\\ if (!t) return null;
@@ -918,10 +921,15 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
url: ?[:0]const u8 = null,
timeout: ?u32 = null,
waitUntil: ?lp.Config.WaitUntil = null,
save: ?[]const u8 = null,
};
const args = try parseArgs(Params, arena, arguments);
const page = try ensurePage(session, registry, args.url, args.timeout, args.waitUntil);
const before = session.currentFrame();
const app_allocator = session.browser.app.allocator;
const prelude = bridgePrelude(arena, &session.bridge_store) catch return ToolError.OutOfMemory;
_ = try runEval(arena, page, prelude);
// Block-scope so top-level `let`/`const` don't leak across calls.
const block_script = std.fmt.allocPrintSentinel(
@@ -944,12 +952,38 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
}
if (result.is_error == true) return result;
// Sync lp.* before any queued navigation tears down this JS context.
const postlude_result: ?ToolResult = runEval(arena, page, bridge_postlude) catch |err| switch (err) {
error.OutOfMemory => return ToolError.OutOfMemory,
else => null,
};
if (postlude_result) |pr| if (!pr.is_error) {
bridgeSync(app_allocator, &session.bridge_store, pr.text) catch |err| switch (err) {
error.OutOfMemory => return ToolError.OutOfMemory,
else => {},
};
};
// Silence on save= success so stdout pipes stay clean.
if (args.save) |name| {
bridgeStoreSet(app_allocator, &session.bridge_store, name, result.text) catch |err| switch (err) {
error.OutOfMemory => return ToolError.OutOfMemory,
error.InvalidJson => return .{
.text = "save= requires the eval to return JSON; wrap with JSON.stringify(...)",
.is_error = true,
},
};
result = .{ .text = "" };
}
// Script may have queued a navigation (e.g. `top.location = …`).
try awaitQueuedNavigation(session);
const after = session.currentFrame() orelse return result;
if (before == null or before.? == after) return result;
registry.reset();
if (result.text.len == 0) return result; // silenced save=; don't re-emit via nav suffix
const page_title = after.getTitle() catch null;
const text = std.fmt.allocPrint(arena, "{s}\n(Navigated to {s}, title: {s})", .{
result.text, after.url, page_title orelse "(none)",
@@ -958,11 +992,26 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
}
fn execExtract(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError!ToolResult {
const Params = struct { schema: []const u8 };
const Params = struct {
schema: []const u8,
save: ?[]const u8 = null,
};
const args = try parseArgs(Params, arena, arguments);
return extract(arena, session, registry, args.schema);
const result = try extract(arena, session, registry, args.schema);
if (!result.is_error) if (args.save) |name| {
bridgeStoreSet(session.browser.app.allocator, &session.bridge_store, name, result.text) catch |err| switch (err) {
error.OutOfMemory => return ToolError.OutOfMemory,
error.InvalidJson => return .{ .text = "extract: walker produced non-JSON output", .is_error = true },
};
return .{ .text = "" };
};
return result;
}
const eval_promise_timeout_ms: u32 = 30_000;
fn runEval(arena: std.mem.Allocator, page: *lp.Frame, script: [:0]const u8) ToolError!ToolResult {
var ls: lp.js.Local.Scope = undefined;
page.js.localScope(&ls);
@@ -975,6 +1024,36 @@ fn runEval(arena: std.mem.Allocator, page: *lp.Frame, script: [:0]const u8) Tool
const js_result = ls.local.compileAndRun(script, null) catch |err|
return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true };
if (js_result.isPromise()) {
const promise = js_result.toPromise();
promise.markAsHandled();
var runner = page._session.runner(.{}) catch {
return .{ .text = "promise: no runner available", .is_error = true };
};
var timer = std.time.Timer.start() catch unreachable;
while (promise.state() == .pending) {
const elapsed_ms: u32 = @intCast(timer.read() / std.time.ns_per_ms);
if (elapsed_ms >= eval_promise_timeout_ms) {
return .{ .text = "promise: timed out waiting for resolution", .is_error = true };
}
const budget = @min(eval_promise_timeout_ms - elapsed_ms, 50);
_ = runner.tick(.{ .ms = budget }) catch |err| switch (err) {
error.Cancelled => return .{ .text = "promise: cancelled", .is_error = true },
else => return .{ .text = "promise: tick failed", .is_error = true },
};
}
const settled = promise.result();
// No-return async IIFE → undefined → silence, so pipes stay clean.
if (promise.state() == .fulfilled and settled.isUndefined()) return .{ .text = "" };
const text = settled.toStringSliceWithAlloc(arena) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
else => return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true },
};
return .{ .text = text, .is_error = (promise.state() == .rejected) };
}
const text = js_result.toStringSliceWithAlloc(arena) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
else => return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true },
@@ -991,6 +1070,88 @@ fn formatJsError(arena: std.mem.Allocator, try_catch: *lp.js.TryCatch, err: anye
return aw.written();
}
const BridgeStore = std.StringHashMapUnmanaged([]const u8);
/// Stored values are already JSON; splice them straight into the literal
/// instead of round-tripping through json.Value.
fn bridgePrelude(arena: std.mem.Allocator, store: *const BridgeStore) ![:0]const u8 {
var aw: std.Io.Writer.Allocating = .init(arena);
try aw.writer.writeAll("globalThis.lp = {");
var it = store.iterator();
var first = true;
while (it.next()) |kv| {
if (!first) try aw.writer.writeByte(',');
first = false;
try std.json.Stringify.value(kv.key_ptr.*, .{}, &aw.writer);
try aw.writer.writeByte(':');
try aw.writer.writeAll(kv.value_ptr.*);
}
try aw.writer.writeAll("};");
return arena.dupeZ(u8, aw.written());
}
const bridge_postlude: [:0]const u8 = "JSON.stringify(globalThis.lp)";
/// Drops keys missing from the postlude so `delete lp.foo` propagates.
fn bridgeSync(allocator: std.mem.Allocator, store: *BridgeStore, postlude_json: []const u8) !void {
var parsed = std.json.parseFromSlice(std.json.Value, allocator, postlude_json, .{}) catch return;
defer parsed.deinit();
if (parsed.value != .object) return;
const new_obj = parsed.value.object;
var to_remove: std.ArrayList([]const u8) = .empty;
defer to_remove.deinit(allocator);
var key_it = store.keyIterator();
while (key_it.next()) |k| {
if (!new_obj.contains(k.*)) try to_remove.append(allocator, k.*);
}
for (to_remove.items) |k| {
if (store.fetchRemove(k)) |kv| {
allocator.free(kv.key);
allocator.free(kv.value);
}
}
var it = new_obj.iterator();
while (it.next()) |entry| {
var val_aw: std.Io.Writer.Allocating = .init(allocator);
defer val_aw.deinit();
try std.json.Stringify.value(entry.value_ptr.*, .{}, &val_aw.writer);
// Trusted JSON path: value was just stringified from a parsed Value.
try bridgeStorePut(allocator, store, entry.key_ptr.*, val_aw.written());
}
}
fn bridgeStoreSet(allocator: std.mem.Allocator, store: *BridgeStore, name: []const u8, json_value: []const u8) !void {
if (store.getPtr(name)) |slot| {
if (std.mem.eql(u8, slot.*, json_value)) return;
if (!try std.json.validate(allocator, json_value)) return error.InvalidJson;
const new_val = try allocator.dupe(u8, json_value);
allocator.free(slot.*);
slot.* = new_val;
return;
}
if (!try std.json.validate(allocator, json_value)) return error.InvalidJson;
try bridgeStorePut(allocator, store, name, json_value);
}
/// Same as bridgeStoreSet but skips JSON validation. Use only when the
/// caller already produced canonical JSON (e.g. via json.Stringify.value).
fn bridgeStorePut(allocator: std.mem.Allocator, store: *BridgeStore, name: []const u8, json_value: []const u8) !void {
if (store.getPtr(name)) |slot| {
if (std.mem.eql(u8, slot.*, json_value)) return;
const new_val = try allocator.dupe(u8, json_value);
allocator.free(slot.*);
slot.* = new_val;
return;
}
const key_owned = try allocator.dupe(u8, name);
errdefer allocator.free(key_owned);
const val_owned = try allocator.dupe(u8, json_value);
errdefer allocator.free(val_owned);
try store.put(allocator, key_owned, val_owned);
}
/// Resolve a target element from either a CSS selector or a backendNodeId.
fn resolveTarget(
session: *lp.Session,

View File

@@ -653,6 +653,350 @@ test "MCP - eval: localStorage persists across navigations and is origin-scoped"
} }, out.written());
}
test "MCP - eval: save= value is readable via lp.<name> in next eval" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const save_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "JSON.stringify('hello')", "save": "greeting" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, save_msg);
out.clearRetainingCapacity();
const read_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.greeting" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, read_msg);
try testing.expectJson(.{ .id = 2, .result = .{
.content = &.{.{ .type = "text", .text = "hello" }},
} }, out.written());
}
test "MCP - eval: lp.* mutations auto-sync between evals" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const first =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.counter = 7; lp.counter" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, first);
out.clearRetainingCapacity();
const second =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.counter + 1" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, second);
try testing.expectJson(.{ .id = 2, .result = .{
.content = &.{.{ .type = "text", .text = "8" }},
} }, out.written());
}
test "MCP - eval: lp.* survives navigation" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
defer server.deinit();
const set_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.token = 'abc'" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, set_msg);
out.clearRetainingCapacity();
const nav_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "goto",
\\ "arguments": { "url": "http://127.0.0.1:9582/src/browser/tests/mcp_actions.html" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, nav_msg);
out.clearRetainingCapacity();
const read_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 3,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.token" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, read_msg);
try testing.expectJson(.{ .id = 3, .result = .{
.content = &.{.{ .type = "text", .text = "abc" }},
} }, out.written());
}
test "MCP - eval: delete lp.<key> removes from bridge store" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const set_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.tmp = 1" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, set_msg);
out.clearRetainingCapacity();
const del_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "delete lp.tmp; 0" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, del_msg);
out.clearRetainingCapacity();
const check_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 3,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "typeof lp.tmp" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, check_msg);
try testing.expectJson(.{ .id = 3, .result = .{
.content = &.{.{ .type = "text", .text = "undefined" }},
} }, out.written());
}
test "MCP - extract: save= exposes the result as lp.<name>" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
defer server.deinit();
const extract_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "extract",
\\ "arguments": {
\\ "schema": "{\"btn\":\"#btn\"}",
\\ "save": "page"
\\ }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, extract_msg);
out.clearRetainingCapacity();
const read_msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.page.btn" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, read_msg);
try testing.expectJson(.{ .id = 2, .result = .{
.content = &.{.{ .type = "text", .text = "Click Me" }},
} }, out.written());
}
test "MCP - eval: Promise.resolve return value is awaited" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "Promise.resolve(7)" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, msg);
try testing.expectJson(.{ .id = 1, .result = .{
.content = &.{.{ .type = "text", .text = "7" }},
} }, out.written());
}
test "MCP - eval: async IIFE resolves to returned value" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "(async () => { const xs = [1,2,3]; let s = 0; for (const x of xs) s += await Promise.resolve(x); return s; })()" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, msg);
try testing.expectJson(.{ .id = 1, .result = .{
.content = &.{.{ .type = "text", .text = "6" }},
} }, out.written());
}
test "MCP - eval: rejected Promise surfaces as is_error" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "(async () => { throw new Error('nope'); })()" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "\"isError\":true") != null);
try testing.expect(std.mem.indexOf(u8, out.written(), "nope") != null);
}
test "MCP - eval: async IIFE without explicit return resolves to empty text" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const msg =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "(async () => { lp.touched = true; })()" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, msg);
try testing.expectJson(.{ .id = 1, .result = .{
.content = &.{.{ .type = "text", .text = "" }},
} }, out.written());
}
test "MCP - eval: lp.* mutations inside async IIFE survive to the next eval" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
const server = try testLoadPage("about:blank", &out.writer);
defer server.deinit();
const first =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 1,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "(async () => { lp.total = 0; for (const n of [10, 20, 30]) lp.total += await Promise.resolve(n); })()" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, first);
out.clearRetainingCapacity();
const second =
\\{
\\ "jsonrpc": "2.0",
\\ "id": 2,
\\ "method": "tools/call",
\\ "params": {
\\ "name": "eval",
\\ "arguments": { "script": "lp.total" }
\\ }
\\}
;
try router.handleMessage(server, testing.arena_allocator, second);
try testing.expectJson(.{ .id = 2, .result = .{
.content = &.{.{ .type = "text", .text = "60" }},
} }, out.written());
}
test "MCP - indexLines: exact match returns line + trailing newline" {
var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
defer arena.deinit();

View File

@@ -74,7 +74,14 @@ pub fn next(self: *Iterator) command.ParseError!?Entry {
// it if any allocation between here and successful return fails.
errdefer self.allocator.free(body);
const span_end = self.lines.index orelse self.lines.buffer.len;
var obj: std.json.ObjectMap = .init(self.allocator);
if (opener.inline_args.len > 0) {
if (try opener.schema.parseInlineKv(self.allocator, opener.inline_args)) |v| if (v == .object) {
var it = v.object.iterator();
while (it.next()) |kv| try obj.put(kv.key_ptr.*, kv.value_ptr.*);
};
}
try obj.put(opener.field, .{ .string = body });
return .{
.line_num = start_line,
@@ -100,16 +107,24 @@ pub fn next(self: *Iterator) command.ParseError!?Entry {
const BlockOpener = struct {
tool: BrowserTool,
schema: *const Schema,
field: []const u8,
quote_type: Schema.QuoteType,
/// Slice between the tool name and the triple-quote, e.g.
/// `save=stories` in `/extract save=stories '''`.
inline_args: []const u8,
};
fn tryBlockOpener(line: []const u8) ?BlockOpener {
const split = Schema.parseSlashCommand(line) orelse return null;
const s = Schema.findByName(split.name) orelse return null;
if (!s.isMultiLineCapable()) return null;
const qt = Schema.QuoteType.fromLiteral(split.rest) orelse return null;
return .{ .tool = s.tool, .field = s.required[0], .quote_type = qt };
const rest = std.mem.trimRight(u8, split.rest, &std.ascii.whitespace);
if (rest.len < 3) return null;
const qt = Schema.QuoteType.fromLiteral(rest[rest.len - 3 ..]) orelse return null;
const inline_args = std.mem.trim(u8, rest[0 .. rest.len - 3], &std.ascii.whitespace);
return .{ .tool = s.tool, .schema = s, .field = s.required[0], .quote_type = qt, .inline_args = inline_args };
}
fn collectMultiLineBlock(self: *Iterator, quote_type: Schema.QuoteType) std.mem.Allocator.Error!?[]const u8 {

View File

@@ -222,6 +222,24 @@ pub fn parseValueDiag(self: Schema, arena: std.mem.Allocator, rest_raw: []const
return try self.buildValue(arena, list.items, diag);
}
/// Like `parseValueDiag` but skips the required-field check: the
/// multi-line body fills the required field via a separate path.
pub fn parseInlineKv(self: Schema, arena: std.mem.Allocator, rest_raw: []const u8) ParseError!?std.json.Value {
const rest = std.mem.trim(u8, rest_raw, &std.ascii.whitespace);
if (rest.len == 0) return null;
const tokens = try tokenize(arena, rest);
var list = try std.ArrayList(KvPair).initCapacity(arena, tokens.len);
for (tokens) |tok| {
const eq = std.mem.indexOfScalar(u8, tok, '=') orelse return error.MalformedKv;
if (eq == 0 or eq == tok.len - 1) return error.MalformedKv;
const key = tok[0..eq];
const field = self.findField(key) orelse return error.UnknownField;
list.appendAssumeCapacity(.{ .key = field.name, .value = stripQuotes(tok[eq + 1 ..]) });
}
return try self.buildValue(arena, list.items, null);
}
fn validateAndFillObject(self: Schema, obj: *std.json.ObjectMap) ParseError!void {
// Stricter than the LLM path: an unknown field is a user typo, not noise to drop.
var it = obj.iterator();