browser: normalize keys and support form submit in press

- Map common key shorthand aliases to canonical KeyboardEvent keys.
- Trigger implicit form submission when pressing Enter on inputs.
- Add CSS selector support to the press tool.
This commit is contained in:
Adrià Arrufat
2026-05-22 22:16:56 +02:00
parent 4e69be8543
commit 312276f968
4 changed files with 105 additions and 9 deletions

View File

@@ -80,16 +80,18 @@ pub fn hover(node: *DOMNode, frame: *Frame) !void {
}
pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void {
const target = if (node) |n|
(n.is(Element) orelse return error.InvalidNodeType).asEventTarget()
const target_el: ?*Element = if (node) |n|
(n.is(Element) orelse return error.InvalidNodeType)
else
frame.document.asNode().asEventTarget();
null;
const target = if (target_el) |el| el.asEventTarget() else frame.document.asNode().asEventTarget();
const canonical = canonicalKey(key);
const keydown_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keydown"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
.key = canonical,
}, frame);
frame._event_manager.dispatch(target, keydown_event.asEvent()) catch |err| {
@@ -97,11 +99,15 @@ pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void {
return error.ActionFailed;
};
if (std.mem.eql(u8, canonical, "Enter") and !keydown_event.asEvent().getDefaultPrevented()) {
if (target_el) |el| try implicitFormSubmit(el, frame);
}
const keyup_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keyup"), .{
.bubbles = true,
.cancelable = true,
.composed = true,
.key = key,
.key = canonical,
}, frame);
frame._event_manager.dispatch(target, keyup_event.asEvent()) catch |err| {
@@ -110,6 +116,55 @@ pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void {
};
}
/// Map common shorthand to the canonical KeyboardEvent.key string so users
/// can type "enter" instead of "Enter" without surprises.
fn canonicalKey(key: []const u8) []const u8 {
const aliases = [_]struct { in: []const u8, out: []const u8 }{
.{ .in = "enter", .out = "Enter" },
.{ .in = "return", .out = "Enter" },
.{ .in = "\n", .out = "Enter" },
.{ .in = "\\n", .out = "Enter" },
.{ .in = "esc", .out = "Escape" },
.{ .in = "escape", .out = "Escape" },
.{ .in = "tab", .out = "Tab" },
.{ .in = "\t", .out = "Tab" },
.{ .in = "space", .out = " " },
.{ .in = "backspace", .out = "Backspace" },
.{ .in = "delete", .out = "Delete" },
.{ .in = "del", .out = "Delete" },
.{ .in = "up", .out = "ArrowUp" },
.{ .in = "down", .out = "ArrowDown" },
.{ .in = "left", .out = "ArrowLeft" },
.{ .in = "right", .out = "ArrowRight" },
};
for (aliases) |a| {
if (std.ascii.eqlIgnoreCase(key, a.in)) return a.out;
}
return key;
}
fn implicitFormSubmit(el: *Element, frame: *Frame) !void {
const Input = Element.Html.Input;
const Button = Element.Html.Button;
if (el.is(Input)) |input| {
const form = input.getForm(frame) orelse return;
const submitter: ?*Element = switch (input._input_type) {
.submit, .image => el,
// Non-text controls (checkbox, radio, file, ...) don't trigger
// implicit submission; only the text-like family does.
.text, .password, .email, .url, .tel, .search, .number, .date, .time, .@"datetime-local", .month, .week => null,
else => return,
};
return form.requestSubmit(submitter, frame);
}
if (el.is(Button)) |button| {
if (!std.mem.eql(u8, button.getType(), "submit")) return;
const form = button.getForm(frame) orelse return;
return form.requestSubmit(el, frame);
}
}
pub fn selectOption(node: *DOMNode, value: []const u8, frame: *Frame) !void {
const el = node.is(Element) orelse return error.InvalidNodeType;
const select = el.is(Element.Html.Select) orelse return error.InvalidNodeType;

View File

@@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<body>
<form id="f" onsubmit="window.submitted = true; window.submittedValue = document.getElementById('q').value; event.preventDefault();">
<input id="q" type="text" name="q">
<input type="submit" value="Go">
</form>
</body>
</html>

View File

@@ -302,13 +302,14 @@ pub const Tool = enum {
),
},
.press => .{
.description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'.",
.description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'. Common shorthand is normalized: 'enter'/'return' → 'Enter', 'esc' → 'Escape', 'up'/'down'/'left'/'right' → 'Arrow*', 'space' → ' '. Pressing 'Enter' on a form input or submit button triggers implicit form submission.",
.input_schema = minify(
\\{
\\ "type": "object",
\\ "properties": {
\\ "key": { "type": "string", "description": "The key to press (e.g. 'Enter', 'Tab', 'a')." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document." }
\\ "selector": { "type": "string", "description": "Optional CSS selector of the element to target. Preferred over backendNodeId." },
\\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document when neither selector nor backendNodeId is provided." }
\\ },
\\ "required": ["key"]
\\}
@@ -1026,12 +1027,21 @@ fn execHover(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.
fn execPress(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 {
const Params = struct {
key: []const u8,
selector: ?[]const u8 = null,
backendNodeId: ?CDPNode.Id = null,
};
const args = try parseArgs(Params, arena, arguments);
const page = try requireFrame(session);
const target_node = try resolveOptionalNode(registry, args.backendNodeId);
var page: *lp.Frame = undefined;
var target_node: ?*DOMNode = null;
if (args.selector) |sel| {
const resolved = try resolveBySelector(session, sel);
page = resolved.page;
target_node = resolved.node;
} else {
page = try requireFrame(session);
target_node = try resolveOptionalNode(registry, args.backendNodeId);
}
lp.actions.press(target_node, args.key, page) catch |err| return mapActionError(err);

View File

@@ -898,6 +898,28 @@ test "MCP - waitForSelector: timeout" {
}, out.written());
}
test "MCP - press Enter on form input triggers submit (lowercase alias)" {
defer testing.reset();
const aa = testing.arena_allocator;
var out: std.io.Writer.Allocating = .init(aa);
const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_press_form.html", &out.writer);
defer server.deinit();
// Fill the input then press "enter" (lowercase alias) on it. The form's
// submit handler sets window.submitted and snapshots the input value.
const fill = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":\"fill\",\"arguments\":{\"selector\":\"#q\",\"value\":\"hello\"}}}");
try router.handleMessage(server, aa, fill);
out.clearRetainingCapacity();
const press_msg = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"press\",\"arguments\":{\"selector\":\"#q\",\"key\":\"enter\"}}}");
try router.handleMessage(server, aa, press_msg);
out.clearRetainingCapacity();
const eval_msg = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"eval\",\"arguments\":{\"script\":\"window.submitted === true && window.submittedValue === 'hello'\"}}}");
try router.handleMessage(server, aa, eval_msg);
try testing.expect(std.mem.indexOf(u8, out.written(), "true") != null);
}
test "MCP - getCookies: defaults to current page, url filter, all flag" {
defer testing.reset();
var out: std.io.Writer.Allocating = .init(testing.arena_allocator);