mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
Improve extraction (#2577)
* tools: add session-scoped bridge store
Exposes `globalThis.lp` to `/eval` calls, allowing state to persist
across evaluations and page navigations. Adds a `save` parameter to
both `/eval` and `/extract` to store results in the bridge.
* browser: await promises in eval and support inline args
- Await JS Promises in `eval` tool with a 30s timeout
- Support inline arguments in multi-line slash commands
- Silence output on successful `save=`
- Add `limit` option to extract schema walker
* eval: return empty text for undefined async IIFE
* extract: support limit on simple string arrays
Treats `["<sel>"]` as sugar for `[{"selector": "<sel>"}]` in the schema
walker. This enables the `"limit"` option on simple string arrays.
Also updates agent documentation to cover cross-call state with `lp.*`.
* refactor: optimize bridge store and schema lookup
- Introduce `bridgeStorePut` to skip redundant JSON validation for
trusted stringified values in `bridgeSync`.
- Store the schema pointer in `BlockOpener` to avoid re-parsing and
looking up the schema in `Iterator.next`.
- Clean up error handling and optional unwrapping in `execEval`.
This commit is contained in:
@@ -131,10 +131,14 @@ as a single JSON object. Supported value forms:
|
||||
|
||||
- `"<sel>"` — `textContent.trim()` of the first match (string or `null`).
|
||||
- `""` — the matched element's own text (only inside a `fields` block).
|
||||
- `["<sel>"]` — text of every match (string array).
|
||||
- `["<sel>"]` — text of every match (string array). Sugar for
|
||||
`[{"selector": "<sel>"}]`.
|
||||
- `{"selector": "<sel>", "attr": "<name>"}` — attribute of the first match.
|
||||
- `[{"selector": "<sel>", "fields": {…}}]` — array of records, each
|
||||
`fields` value resolved relative to the matched element.
|
||||
- Add `"limit": N` inside any array's object spec to cap matches at N
|
||||
(works for text, attribute, and `fields` shapes — e.g.
|
||||
`[{"selector": ".story .title", "limit": 5}]` for top 5 titles).
|
||||
|
||||
Use `/extract '''…'''` (or `"""…"""`) to spread a schema across multiple
|
||||
lines. The schema is parsed in Zig before the page-side walker runs,
|
||||
@@ -142,6 +146,79 @@ so a malformed schema fails with `Error: invalid /extract schema JSON`
|
||||
rather than a V8 stack trace. See [agent-tutorial.md](agent-tutorial.md)
|
||||
section 3 for a worked example against Hacker News.
|
||||
|
||||
### Cross-call state with `lp.*`
|
||||
|
||||
`/extract` and `/eval` each return one value per call, but real scrapes
|
||||
often need to carry data forward — capture a list on one page, then walk
|
||||
it across navigations. Two primitives keep that simple.
|
||||
|
||||
**`save=<name>`** on `/extract` or `/eval` stashes the result in a
|
||||
Session-scoped store keyed by `<name>` instead of dumping it to stdout.
|
||||
The stored value is then exposed to every subsequent `/eval` as
|
||||
`globalThis.lp.<name>`:
|
||||
|
||||
```pandascript
|
||||
/goto 'https://news.ycombinator.com/'
|
||||
|
||||
/extract save=front '''
|
||||
{
|
||||
"stories": [{
|
||||
"selector": "tr.athing",
|
||||
"limit": 5,
|
||||
"fields": {
|
||||
"id": {"attr": "id"},
|
||||
"title": ".titleline > a"
|
||||
}
|
||||
}]
|
||||
}
|
||||
'''
|
||||
|
||||
/eval '''
|
||||
console.log(lp.front.stories[0].title);
|
||||
'''
|
||||
```
|
||||
|
||||
`save=`d commands print nothing on success so scripts pipe cleanly.
|
||||
|
||||
**Auto-sync.** Any mutation of `lp.*` inside an `/eval` is persisted at
|
||||
the end of the call. Adding a key (`lp.x = …`), updating a nested value
|
||||
(`lp.front.stories[0].comments = […]`), or removing a key
|
||||
(`delete lp.x`) all propagate to the store. The next `/eval` sees the
|
||||
update — even after a navigation, because the store lives Session-side,
|
||||
not on the page.
|
||||
|
||||
**Async eval.** If your `/eval` body returns a Promise, `runEval`
|
||||
pumps the event loop until it settles, then surfaces the resolved value
|
||||
(or the rejection as an error). Combined with the bridge this lets a
|
||||
single `/eval` do an async `fetch` loop over `lp.*` data:
|
||||
|
||||
```pandascript
|
||||
/eval '''
|
||||
(async () => {
|
||||
for (const s of lp.front.stories) {
|
||||
const html = await fetch('/item?id=' + s.id).then(r => r.text());
|
||||
const doc = new DOMParser().parseFromString(html, 'text/html');
|
||||
s.comments = [...doc.querySelectorAll('tr.athing.comtr')].slice(0, 3)
|
||||
.map(r => r.querySelector('.commtext')?.textContent.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
})()
|
||||
'''
|
||||
|
||||
/eval '''
|
||||
JSON.stringify(lp.front.stories)
|
||||
'''
|
||||
```
|
||||
|
||||
An async IIFE with no explicit `return` resolves to `undefined`, which
|
||||
the eval treats as silent — so the loop above prints nothing, and only
|
||||
the final `JSON.stringify` lands on stdout.
|
||||
|
||||
The store is **script-run scoped**: it's bound to the Session that runs
|
||||
the script, and goes away when that Session does. There is no
|
||||
cross-session persistence; if you need that, use `localStorage` (which
|
||||
is now origin-scoped and persists across navigations within a session).
|
||||
|
||||
### Recording
|
||||
|
||||
Interactive sessions can write back to a `.lp` file:
|
||||
|
||||
@@ -701,6 +701,7 @@ pub fn promptNumberedChoice(header: []const u8, items: []const []const u8, defau
|
||||
}
|
||||
|
||||
pub fn printAssistant(_: *Terminal, text: []const u8) void {
|
||||
if (text.len == 0) return;
|
||||
const fd = std.posix.STDOUT_FILENO;
|
||||
_ = std.posix.write(fd, text) catch {};
|
||||
_ = std.posix.write(fd, "\n") catch {};
|
||||
|
||||
@@ -55,6 +55,9 @@ arena: Allocator,
|
||||
history: History,
|
||||
navigation: Navigation,
|
||||
storage_shed: storage.Shed,
|
||||
// Backs `globalThis.lp.*`; values pre-stringified so the prelude splices
|
||||
// them in without re-encoding.
|
||||
bridge_store: std.StringHashMapUnmanaged([]const u8) = .empty,
|
||||
notification: *Notification,
|
||||
cookie_jar: storage.Cookie.Jar,
|
||||
/// User-provided scripts to inject into header.
|
||||
@@ -164,6 +167,15 @@ pub fn deinit(self: *Session) void {
|
||||
self.browser.env.memoryPressureNotification(.critical);
|
||||
|
||||
self.storage_shed.deinit(self.browser.app.allocator);
|
||||
{
|
||||
const allocator = self.browser.app.allocator;
|
||||
var it = self.bridge_store.iterator();
|
||||
while (it.next()) |kv| {
|
||||
allocator.free(kv.key_ptr.*);
|
||||
allocator.free(kv.value_ptr.*);
|
||||
}
|
||||
self.bridge_store.deinit(allocator);
|
||||
}
|
||||
self._console_messages.deinit();
|
||||
self.arena_pool.release(self.arena);
|
||||
}
|
||||
|
||||
@@ -49,6 +49,30 @@ pub fn thenAndCatch(self: Promise, on_fulfilled: js.Function, on_rejected: js.Fu
|
||||
return error.PromiseChainFailed;
|
||||
}
|
||||
|
||||
pub const State = enum(u32) {
|
||||
pending = v8.kPending,
|
||||
fulfilled = v8.kFulfilled,
|
||||
rejected = v8.kRejected,
|
||||
};
|
||||
|
||||
pub fn state(self: Promise) State {
|
||||
return @enumFromInt(v8.v8__Promise__State(self.handle));
|
||||
}
|
||||
|
||||
/// Settled value (fulfillment or rejection). Caller must check `state` first.
|
||||
pub fn result(self: Promise) js.Value {
|
||||
return .{
|
||||
.local = self.local,
|
||||
.handle = v8.v8__Promise__Result(self.handle).?,
|
||||
};
|
||||
}
|
||||
|
||||
/// Suppress the global unhandled-rejection callback when handling the
|
||||
/// rejection inline.
|
||||
pub fn markAsHandled(self: Promise) void {
|
||||
v8.v8__Promise__MarkAsHandled(self.handle);
|
||||
}
|
||||
|
||||
pub fn persist(self: Promise) !Global {
|
||||
return self._persist(true);
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ pub const Tool = enum {
|
||||
.input_schema = url_params_schema,
|
||||
},
|
||||
.eval => .{
|
||||
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first.",
|
||||
.description = "Evaluate JavaScript in the current page context. If a url is provided, it navigates to that url first. The `globalThis.lp` object exposes a Session-scoped bridge store: values written via `lp.foo = ...` auto-sync at end of eval, surviving navigation; values previously set via `/extract save=` or `/eval save=` appear as `lp.<name>`.",
|
||||
.input_schema = minify(
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
@@ -193,7 +193,8 @@ pub const Tool = enum {
|
||||
\\ "script": { "type": "string" },
|
||||
\\ "url": { "type": "string", "description": "Optional URL to navigate to before evaluating." },
|
||||
\\ "timeout": { "type": "integer", "description": "Optional timeout in milliseconds. Defaults to 10000." },
|
||||
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." }
|
||||
\\ "waitUntil": { "type": "string", "enum": ["load", "domcontentloaded", "networkidle", "done"], "description": "Optional wait strategy. Defaults to 'done'." },
|
||||
\\ "save": { "type": "string", "description": "Optional bridge-store key. The eval's return value is stored under this name and re-exposed as `lp.<name>` to subsequent evals. Value must be JSON; wrap non-strings with JSON.stringify(...)." }
|
||||
\\ },
|
||||
\\ "required": ["script"]
|
||||
\\}
|
||||
@@ -204,14 +205,16 @@ pub const Tool = enum {
|
||||
\\Extract structured data via a JSON schema. The only tool whose result is recorded as an `/extract` PandaScript line (replay-friendly); answering from `markdown` content in chat is not. Schema is a JSON object literal passed as a string in `schema`. Each value picks what to lift:
|
||||
\\ "<sel>" → first match's textContent.trim() (string|null)
|
||||
\\ "" → element's own textContent.trim() (only meaningful inside `fields`)
|
||||
\\ ["<sel>"] → every match's text (string[])
|
||||
\\ ["<sel>"] → every match's text (string[]) — sugar for [{"selector":"<sel>"}]
|
||||
\\ {"selector":"<sel>","attr":"<name>"} → first match's attribute (string|null)
|
||||
\\ [{"selector":"<sel>","attr":"<name>"}] → every match's attribute (string[])
|
||||
\\ [{"selector":"<sel>","fields":{…}}] → array of objects, fields resolved relative to each match
|
||||
\\ add `"limit": N` inside any array's object spec to cap matches at N (works for text, attr, and fields shapes)
|
||||
\\
|
||||
\\Examples (schema → result):
|
||||
\\ {"karma": "#karma"} → {"karma":"42"}
|
||||
\\ {"items": [".story .title"]} → {"items":["Title 1","Title 2"]}
|
||||
\\ {"top3": [{"selector":".story .title","limit":3}]} → {"top3":["A","B","C"]}
|
||||
\\ {"links": [{"selector":"a.title","attr":"href"}]} → {"links":["/a","/b"]}
|
||||
\\ {"stories": [{"selector":".athing","fields":{"title":".titleline","rank":".rank"}}]} → {"stories":[{"title":"Foo","rank":"1"}]}
|
||||
,
|
||||
@@ -219,7 +222,8 @@ pub const Tool = enum {
|
||||
\\{
|
||||
\\ "type": "object",
|
||||
\\ "properties": {
|
||||
\\ "schema": { "type": "string", "description": "JSON schema object (as a string) describing what to extract. Must be a JSON object literal." }
|
||||
\\ "schema": { "type": "string", "description": "JSON schema object (as a string) describing what to extract. Must be a JSON object literal." },
|
||||
\\ "save": { "type": "string", "description": "Optional bridge-store key. The extracted JSON is stored under this name and exposed as `lp.<name>` in subsequent /eval calls." }
|
||||
\\ },
|
||||
\\ "required": ["schema"]
|
||||
\\}
|
||||
@@ -669,11 +673,10 @@ const schema_walker_prefix =
|
||||
\\ return m ? m.textContent.trim() : null;
|
||||
\\ }
|
||||
\\ if (Array.isArray(v)) {
|
||||
\\ const inner = v[0];
|
||||
\\ if (typeof inner === 'string') {
|
||||
\\ return Array.from(el.querySelectorAll(inner)).map(function(m){ return m.textContent.trim(); });
|
||||
\\ }
|
||||
\\ return Array.from(el.querySelectorAll(inner.selector)).map(function(m){ return valueOf(m, inner); });
|
||||
\\ const inner = typeof v[0] === 'string' ? { selector: v[0] } : v[0];
|
||||
\\ let matches = Array.from(el.querySelectorAll(inner.selector));
|
||||
\\ if (typeof inner.limit === 'number') matches = matches.slice(0, inner.limit);
|
||||
\\ return matches.map(function(m){ return valueOf(m, inner); });
|
||||
\\ }
|
||||
\\ const t = v.selector ? el.querySelector(v.selector) : el;
|
||||
\\ if (!t) return null;
|
||||
@@ -918,10 +921,15 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
|
||||
url: ?[:0]const u8 = null,
|
||||
timeout: ?u32 = null,
|
||||
waitUntil: ?lp.Config.WaitUntil = null,
|
||||
save: ?[]const u8 = null,
|
||||
};
|
||||
const args = try parseArgs(Params, arena, arguments);
|
||||
const page = try ensurePage(session, registry, args.url, args.timeout, args.waitUntil);
|
||||
const before = session.currentFrame();
|
||||
const app_allocator = session.browser.app.allocator;
|
||||
|
||||
const prelude = bridgePrelude(arena, &session.bridge_store) catch return ToolError.OutOfMemory;
|
||||
_ = try runEval(arena, page, prelude);
|
||||
|
||||
// Block-scope so top-level `let`/`const` don't leak across calls.
|
||||
const block_script = std.fmt.allocPrintSentinel(
|
||||
@@ -944,12 +952,38 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
|
||||
}
|
||||
if (result.is_error == true) return result;
|
||||
|
||||
// Sync lp.* before any queued navigation tears down this JS context.
|
||||
const postlude_result: ?ToolResult = runEval(arena, page, bridge_postlude) catch |err| switch (err) {
|
||||
error.OutOfMemory => return ToolError.OutOfMemory,
|
||||
else => null,
|
||||
};
|
||||
if (postlude_result) |pr| if (!pr.is_error) {
|
||||
bridgeSync(app_allocator, &session.bridge_store, pr.text) catch |err| switch (err) {
|
||||
error.OutOfMemory => return ToolError.OutOfMemory,
|
||||
else => {},
|
||||
};
|
||||
};
|
||||
|
||||
// Silence on save= success so stdout pipes stay clean.
|
||||
if (args.save) |name| {
|
||||
bridgeStoreSet(app_allocator, &session.bridge_store, name, result.text) catch |err| switch (err) {
|
||||
error.OutOfMemory => return ToolError.OutOfMemory,
|
||||
error.InvalidJson => return .{
|
||||
.text = "save= requires the eval to return JSON; wrap with JSON.stringify(...)",
|
||||
.is_error = true,
|
||||
},
|
||||
};
|
||||
result = .{ .text = "" };
|
||||
}
|
||||
|
||||
// Script may have queued a navigation (e.g. `top.location = …`).
|
||||
try awaitQueuedNavigation(session);
|
||||
const after = session.currentFrame() orelse return result;
|
||||
if (before == null or before.? == after) return result;
|
||||
|
||||
registry.reset();
|
||||
if (result.text.len == 0) return result; // silenced save=; don't re-emit via nav suffix
|
||||
|
||||
const page_title = after.getTitle() catch null;
|
||||
const text = std.fmt.allocPrint(arena, "{s}\n(Navigated to {s}, title: {s})", .{
|
||||
result.text, after.url, page_title orelse "(none)",
|
||||
@@ -958,11 +992,26 @@ fn execEval(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.R
|
||||
}
|
||||
|
||||
fn execExtract(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError!ToolResult {
|
||||
const Params = struct { schema: []const u8 };
|
||||
const Params = struct {
|
||||
schema: []const u8,
|
||||
save: ?[]const u8 = null,
|
||||
};
|
||||
const args = try parseArgs(Params, arena, arguments);
|
||||
return extract(arena, session, registry, args.schema);
|
||||
const result = try extract(arena, session, registry, args.schema);
|
||||
|
||||
if (!result.is_error) if (args.save) |name| {
|
||||
bridgeStoreSet(session.browser.app.allocator, &session.bridge_store, name, result.text) catch |err| switch (err) {
|
||||
error.OutOfMemory => return ToolError.OutOfMemory,
|
||||
error.InvalidJson => return .{ .text = "extract: walker produced non-JSON output", .is_error = true },
|
||||
};
|
||||
return .{ .text = "" };
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const eval_promise_timeout_ms: u32 = 30_000;
|
||||
|
||||
fn runEval(arena: std.mem.Allocator, page: *lp.Frame, script: [:0]const u8) ToolError!ToolResult {
|
||||
var ls: lp.js.Local.Scope = undefined;
|
||||
page.js.localScope(&ls);
|
||||
@@ -975,6 +1024,36 @@ fn runEval(arena: std.mem.Allocator, page: *lp.Frame, script: [:0]const u8) Tool
|
||||
const js_result = ls.local.compileAndRun(script, null) catch |err|
|
||||
return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true };
|
||||
|
||||
if (js_result.isPromise()) {
|
||||
const promise = js_result.toPromise();
|
||||
promise.markAsHandled();
|
||||
|
||||
var runner = page._session.runner(.{}) catch {
|
||||
return .{ .text = "promise: no runner available", .is_error = true };
|
||||
};
|
||||
var timer = std.time.Timer.start() catch unreachable;
|
||||
while (promise.state() == .pending) {
|
||||
const elapsed_ms: u32 = @intCast(timer.read() / std.time.ns_per_ms);
|
||||
if (elapsed_ms >= eval_promise_timeout_ms) {
|
||||
return .{ .text = "promise: timed out waiting for resolution", .is_error = true };
|
||||
}
|
||||
const budget = @min(eval_promise_timeout_ms - elapsed_ms, 50);
|
||||
_ = runner.tick(.{ .ms = budget }) catch |err| switch (err) {
|
||||
error.Cancelled => return .{ .text = "promise: cancelled", .is_error = true },
|
||||
else => return .{ .text = "promise: tick failed", .is_error = true },
|
||||
};
|
||||
}
|
||||
|
||||
const settled = promise.result();
|
||||
// No-return async IIFE → undefined → silence, so pipes stay clean.
|
||||
if (promise.state() == .fulfilled and settled.isUndefined()) return .{ .text = "" };
|
||||
const text = settled.toStringSliceWithAlloc(arena) catch |err| switch (err) {
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
else => return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true },
|
||||
};
|
||||
return .{ .text = text, .is_error = (promise.state() == .rejected) };
|
||||
}
|
||||
|
||||
const text = js_result.toStringSliceWithAlloc(arena) catch |err| switch (err) {
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
else => return .{ .text = try formatJsError(arena, &try_catch, err), .is_error = true },
|
||||
@@ -991,6 +1070,88 @@ fn formatJsError(arena: std.mem.Allocator, try_catch: *lp.js.TryCatch, err: anye
|
||||
return aw.written();
|
||||
}
|
||||
|
||||
const BridgeStore = std.StringHashMapUnmanaged([]const u8);
|
||||
|
||||
/// Stored values are already JSON; splice them straight into the literal
|
||||
/// instead of round-tripping through json.Value.
|
||||
fn bridgePrelude(arena: std.mem.Allocator, store: *const BridgeStore) ![:0]const u8 {
|
||||
var aw: std.Io.Writer.Allocating = .init(arena);
|
||||
try aw.writer.writeAll("globalThis.lp = {");
|
||||
var it = store.iterator();
|
||||
var first = true;
|
||||
while (it.next()) |kv| {
|
||||
if (!first) try aw.writer.writeByte(',');
|
||||
first = false;
|
||||
try std.json.Stringify.value(kv.key_ptr.*, .{}, &aw.writer);
|
||||
try aw.writer.writeByte(':');
|
||||
try aw.writer.writeAll(kv.value_ptr.*);
|
||||
}
|
||||
try aw.writer.writeAll("};");
|
||||
return arena.dupeZ(u8, aw.written());
|
||||
}
|
||||
|
||||
const bridge_postlude: [:0]const u8 = "JSON.stringify(globalThis.lp)";
|
||||
|
||||
/// Drops keys missing from the postlude so `delete lp.foo` propagates.
|
||||
fn bridgeSync(allocator: std.mem.Allocator, store: *BridgeStore, postlude_json: []const u8) !void {
|
||||
var parsed = std.json.parseFromSlice(std.json.Value, allocator, postlude_json, .{}) catch return;
|
||||
defer parsed.deinit();
|
||||
if (parsed.value != .object) return;
|
||||
const new_obj = parsed.value.object;
|
||||
|
||||
var to_remove: std.ArrayList([]const u8) = .empty;
|
||||
defer to_remove.deinit(allocator);
|
||||
var key_it = store.keyIterator();
|
||||
while (key_it.next()) |k| {
|
||||
if (!new_obj.contains(k.*)) try to_remove.append(allocator, k.*);
|
||||
}
|
||||
for (to_remove.items) |k| {
|
||||
if (store.fetchRemove(k)) |kv| {
|
||||
allocator.free(kv.key);
|
||||
allocator.free(kv.value);
|
||||
}
|
||||
}
|
||||
|
||||
var it = new_obj.iterator();
|
||||
while (it.next()) |entry| {
|
||||
var val_aw: std.Io.Writer.Allocating = .init(allocator);
|
||||
defer val_aw.deinit();
|
||||
try std.json.Stringify.value(entry.value_ptr.*, .{}, &val_aw.writer);
|
||||
// Trusted JSON path: value was just stringified from a parsed Value.
|
||||
try bridgeStorePut(allocator, store, entry.key_ptr.*, val_aw.written());
|
||||
}
|
||||
}
|
||||
|
||||
fn bridgeStoreSet(allocator: std.mem.Allocator, store: *BridgeStore, name: []const u8, json_value: []const u8) !void {
|
||||
if (store.getPtr(name)) |slot| {
|
||||
if (std.mem.eql(u8, slot.*, json_value)) return;
|
||||
if (!try std.json.validate(allocator, json_value)) return error.InvalidJson;
|
||||
const new_val = try allocator.dupe(u8, json_value);
|
||||
allocator.free(slot.*);
|
||||
slot.* = new_val;
|
||||
return;
|
||||
}
|
||||
if (!try std.json.validate(allocator, json_value)) return error.InvalidJson;
|
||||
try bridgeStorePut(allocator, store, name, json_value);
|
||||
}
|
||||
|
||||
/// Same as bridgeStoreSet but skips JSON validation. Use only when the
|
||||
/// caller already produced canonical JSON (e.g. via json.Stringify.value).
|
||||
fn bridgeStorePut(allocator: std.mem.Allocator, store: *BridgeStore, name: []const u8, json_value: []const u8) !void {
|
||||
if (store.getPtr(name)) |slot| {
|
||||
if (std.mem.eql(u8, slot.*, json_value)) return;
|
||||
const new_val = try allocator.dupe(u8, json_value);
|
||||
allocator.free(slot.*);
|
||||
slot.* = new_val;
|
||||
return;
|
||||
}
|
||||
const key_owned = try allocator.dupe(u8, name);
|
||||
errdefer allocator.free(key_owned);
|
||||
const val_owned = try allocator.dupe(u8, json_value);
|
||||
errdefer allocator.free(val_owned);
|
||||
try store.put(allocator, key_owned, val_owned);
|
||||
}
|
||||
|
||||
/// Resolve a target element from either a CSS selector or a backendNodeId.
|
||||
fn resolveTarget(
|
||||
session: *lp.Session,
|
||||
|
||||
@@ -653,6 +653,350 @@ test "MCP - eval: localStorage persists across navigations and is origin-scoped"
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: save= value is readable via lp.<name> in next eval" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const save_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "JSON.stringify('hello')", "save": "greeting" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, save_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const read_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.greeting" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, read_msg);
|
||||
try testing.expectJson(.{ .id = 2, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "hello" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: lp.* mutations auto-sync between evals" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const first =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.counter = 7; lp.counter" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, first);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const second =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.counter + 1" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, second);
|
||||
try testing.expectJson(.{ .id = 2, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "8" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: lp.* survives navigation" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const set_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.token = 'abc'" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, set_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const nav_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "goto",
|
||||
\\ "arguments": { "url": "http://127.0.0.1:9582/src/browser/tests/mcp_actions.html" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, nav_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const read_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 3,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.token" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, read_msg);
|
||||
try testing.expectJson(.{ .id = 3, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "abc" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: delete lp.<key> removes from bridge store" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const set_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.tmp = 1" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, set_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const del_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "delete lp.tmp; 0" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, del_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const check_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 3,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "typeof lp.tmp" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, check_msg);
|
||||
try testing.expectJson(.{ .id = 3, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "undefined" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - extract: save= exposes the result as lp.<name>" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_actions.html", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const extract_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "extract",
|
||||
\\ "arguments": {
|
||||
\\ "schema": "{\"btn\":\"#btn\"}",
|
||||
\\ "save": "page"
|
||||
\\ }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, extract_msg);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const read_msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.page.btn" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, read_msg);
|
||||
try testing.expectJson(.{ .id = 2, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "Click Me" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: Promise.resolve return value is awaited" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "Promise.resolve(7)" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, msg);
|
||||
try testing.expectJson(.{ .id = 1, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "7" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: async IIFE resolves to returned value" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "(async () => { const xs = [1,2,3]; let s = 0; for (const x of xs) s += await Promise.resolve(x); return s; })()" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, msg);
|
||||
try testing.expectJson(.{ .id = 1, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "6" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: rejected Promise surfaces as is_error" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "(async () => { throw new Error('nope'); })()" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, msg);
|
||||
try testing.expect(std.mem.indexOf(u8, out.written(), "\"isError\":true") != null);
|
||||
try testing.expect(std.mem.indexOf(u8, out.written(), "nope") != null);
|
||||
}
|
||||
|
||||
test "MCP - eval: async IIFE without explicit return resolves to empty text" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const msg =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "(async () => { lp.touched = true; })()" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, msg);
|
||||
try testing.expectJson(.{ .id = 1, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - eval: lp.* mutations inside async IIFE survive to the next eval" {
|
||||
defer testing.reset();
|
||||
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);
|
||||
const server = try testLoadPage("about:blank", &out.writer);
|
||||
defer server.deinit();
|
||||
|
||||
const first =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 1,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "(async () => { lp.total = 0; for (const n of [10, 20, 30]) lp.total += await Promise.resolve(n); })()" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, first);
|
||||
|
||||
out.clearRetainingCapacity();
|
||||
const second =
|
||||
\\{
|
||||
\\ "jsonrpc": "2.0",
|
||||
\\ "id": 2,
|
||||
\\ "method": "tools/call",
|
||||
\\ "params": {
|
||||
\\ "name": "eval",
|
||||
\\ "arguments": { "script": "lp.total" }
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
try router.handleMessage(server, testing.arena_allocator, second);
|
||||
try testing.expectJson(.{ .id = 2, .result = .{
|
||||
.content = &.{.{ .type = "text", .text = "60" }},
|
||||
} }, out.written());
|
||||
}
|
||||
|
||||
test "MCP - indexLines: exact match returns line + trailing newline" {
|
||||
var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
@@ -74,7 +74,14 @@ pub fn next(self: *Iterator) command.ParseError!?Entry {
|
||||
// it if any allocation between here and successful return fails.
|
||||
errdefer self.allocator.free(body);
|
||||
const span_end = self.lines.index orelse self.lines.buffer.len;
|
||||
|
||||
var obj: std.json.ObjectMap = .init(self.allocator);
|
||||
if (opener.inline_args.len > 0) {
|
||||
if (try opener.schema.parseInlineKv(self.allocator, opener.inline_args)) |v| if (v == .object) {
|
||||
var it = v.object.iterator();
|
||||
while (it.next()) |kv| try obj.put(kv.key_ptr.*, kv.value_ptr.*);
|
||||
};
|
||||
}
|
||||
try obj.put(opener.field, .{ .string = body });
|
||||
return .{
|
||||
.line_num = start_line,
|
||||
@@ -100,16 +107,24 @@ pub fn next(self: *Iterator) command.ParseError!?Entry {
|
||||
|
||||
const BlockOpener = struct {
|
||||
tool: BrowserTool,
|
||||
schema: *const Schema,
|
||||
field: []const u8,
|
||||
quote_type: Schema.QuoteType,
|
||||
/// Slice between the tool name and the triple-quote, e.g.
|
||||
/// `save=stories` in `/extract save=stories '''`.
|
||||
inline_args: []const u8,
|
||||
};
|
||||
|
||||
fn tryBlockOpener(line: []const u8) ?BlockOpener {
|
||||
const split = Schema.parseSlashCommand(line) orelse return null;
|
||||
const s = Schema.findByName(split.name) orelse return null;
|
||||
if (!s.isMultiLineCapable()) return null;
|
||||
const qt = Schema.QuoteType.fromLiteral(split.rest) orelse return null;
|
||||
return .{ .tool = s.tool, .field = s.required[0], .quote_type = qt };
|
||||
|
||||
const rest = std.mem.trimRight(u8, split.rest, &std.ascii.whitespace);
|
||||
if (rest.len < 3) return null;
|
||||
const qt = Schema.QuoteType.fromLiteral(rest[rest.len - 3 ..]) orelse return null;
|
||||
const inline_args = std.mem.trim(u8, rest[0 .. rest.len - 3], &std.ascii.whitespace);
|
||||
return .{ .tool = s.tool, .schema = s, .field = s.required[0], .quote_type = qt, .inline_args = inline_args };
|
||||
}
|
||||
|
||||
fn collectMultiLineBlock(self: *Iterator, quote_type: Schema.QuoteType) std.mem.Allocator.Error!?[]const u8 {
|
||||
|
||||
@@ -222,6 +222,24 @@ pub fn parseValueDiag(self: Schema, arena: std.mem.Allocator, rest_raw: []const
|
||||
return try self.buildValue(arena, list.items, diag);
|
||||
}
|
||||
|
||||
/// Like `parseValueDiag` but skips the required-field check: the
|
||||
/// multi-line body fills the required field via a separate path.
|
||||
pub fn parseInlineKv(self: Schema, arena: std.mem.Allocator, rest_raw: []const u8) ParseError!?std.json.Value {
|
||||
const rest = std.mem.trim(u8, rest_raw, &std.ascii.whitespace);
|
||||
if (rest.len == 0) return null;
|
||||
|
||||
const tokens = try tokenize(arena, rest);
|
||||
var list = try std.ArrayList(KvPair).initCapacity(arena, tokens.len);
|
||||
for (tokens) |tok| {
|
||||
const eq = std.mem.indexOfScalar(u8, tok, '=') orelse return error.MalformedKv;
|
||||
if (eq == 0 or eq == tok.len - 1) return error.MalformedKv;
|
||||
const key = tok[0..eq];
|
||||
const field = self.findField(key) orelse return error.UnknownField;
|
||||
list.appendAssumeCapacity(.{ .key = field.name, .value = stripQuotes(tok[eq + 1 ..]) });
|
||||
}
|
||||
return try self.buildValue(arena, list.items, null);
|
||||
}
|
||||
|
||||
fn validateAndFillObject(self: Schema, obj: *std.json.ObjectMap) ParseError!void {
|
||||
// Stricter than the LLM path: an unknown field is a user typo, not noise to drop.
|
||||
var it = obj.iterator();
|
||||
|
||||
Reference in New Issue
Block a user