diff --git a/src/browser/actions.zig b/src/browser/actions.zig index 54868951..d532f22a 100644 --- a/src/browser/actions.zig +++ b/src/browser/actions.zig @@ -80,16 +80,18 @@ pub fn hover(node: *DOMNode, frame: *Frame) !void { } pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void { - const target = if (node) |n| - (n.is(Element) orelse return error.InvalidNodeType).asEventTarget() + const target_el: ?*Element = if (node) |n| + (n.is(Element) orelse return error.InvalidNodeType) else - frame.document.asNode().asEventTarget(); + null; + const target = if (target_el) |el| el.asEventTarget() else frame.document.asNode().asEventTarget(); + const canonical = canonicalKey(key); const keydown_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keydown"), .{ .bubbles = true, .cancelable = true, .composed = true, - .key = key, + .key = canonical, }, frame); frame._event_manager.dispatch(target, keydown_event.asEvent()) catch |err| { @@ -97,11 +99,15 @@ pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void { return error.ActionFailed; }; + if (std.mem.eql(u8, canonical, "Enter") and !keydown_event.asEvent().getDefaultPrevented()) { + if (target_el) |el| try implicitFormSubmit(el, frame); + } + const keyup_event: *KeyboardEvent = try .initTrusted(comptime .wrap("keyup"), .{ .bubbles = true, .cancelable = true, .composed = true, - .key = key, + .key = canonical, }, frame); frame._event_manager.dispatch(target, keyup_event.asEvent()) catch |err| { @@ -110,6 +116,55 @@ pub fn press(node: ?*DOMNode, key: []const u8, frame: *Frame) !void { }; } +/// Map common shorthand to the canonical KeyboardEvent.key string so users +/// can type "enter" instead of "Enter" without surprises. +fn canonicalKey(key: []const u8) []const u8 { + const aliases = [_]struct { in: []const u8, out: []const u8 }{ + .{ .in = "enter", .out = "Enter" }, + .{ .in = "return", .out = "Enter" }, + .{ .in = "\n", .out = "Enter" }, + .{ .in = "\\n", .out = "Enter" }, + .{ .in = "esc", .out = "Escape" }, + .{ .in = "escape", .out = "Escape" }, + .{ .in = "tab", .out = "Tab" }, + .{ .in = "\t", .out = "Tab" }, + .{ .in = "space", .out = " " }, + .{ .in = "backspace", .out = "Backspace" }, + .{ .in = "delete", .out = "Delete" }, + .{ .in = "del", .out = "Delete" }, + .{ .in = "up", .out = "ArrowUp" }, + .{ .in = "down", .out = "ArrowDown" }, + .{ .in = "left", .out = "ArrowLeft" }, + .{ .in = "right", .out = "ArrowRight" }, + }; + for (aliases) |a| { + if (std.ascii.eqlIgnoreCase(key, a.in)) return a.out; + } + return key; +} + +fn implicitFormSubmit(el: *Element, frame: *Frame) !void { + const Input = Element.Html.Input; + const Button = Element.Html.Button; + + if (el.is(Input)) |input| { + const form = input.getForm(frame) orelse return; + const submitter: ?*Element = switch (input._input_type) { + .submit, .image => el, + // Non-text controls (checkbox, radio, file, ...) don't trigger + // implicit submission; only the text-like family does. + .text, .password, .email, .url, .tel, .search, .number, .date, .time, .@"datetime-local", .month, .week => null, + else => return, + }; + return form.requestSubmit(submitter, frame); + } + if (el.is(Button)) |button| { + if (!std.mem.eql(u8, button.getType(), "submit")) return; + const form = button.getForm(frame) orelse return; + return form.requestSubmit(el, frame); + } +} + pub fn selectOption(node: *DOMNode, value: []const u8, frame: *Frame) !void { const el = node.is(Element) orelse return error.InvalidNodeType; const select = el.is(Element.Html.Select) orelse return error.InvalidNodeType; diff --git a/src/browser/tests/mcp_press_form.html b/src/browser/tests/mcp_press_form.html new file mode 100644 index 00000000..4ecc077b --- /dev/null +++ b/src/browser/tests/mcp_press_form.html @@ -0,0 +1,9 @@ + + +
+ + + diff --git a/src/browser/tools.zig b/src/browser/tools.zig index 3f57a1d9..69ed97d1 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -302,13 +302,14 @@ pub const Tool = enum { ), }, .press => .{ - .description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'.", + .description = "Press a keyboard key, dispatching keydown and keyup events. Use key names like 'Enter', 'Tab', 'Escape', 'ArrowDown', 'Backspace', or single characters like 'a', '1'. Common shorthand is normalized: 'enter'/'return' → 'Enter', 'esc' → 'Escape', 'up'/'down'/'left'/'right' → 'Arrow*', 'space' → ' '. Pressing 'Enter' on a form input or submit button triggers implicit form submission.", .input_schema = minify( \\{ \\ "type": "object", \\ "properties": { \\ "key": { "type": "string", "description": "The key to press (e.g. 'Enter', 'Tab', 'a')." }, - \\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document." } + \\ "selector": { "type": "string", "description": "Optional CSS selector of the element to target. Preferred over backendNodeId." }, + \\ "backendNodeId": { "type": "integer", "description": "Optional backend node ID of the element to target. Defaults to the document when neither selector nor backendNodeId is provided." } \\ }, \\ "required": ["key"] \\} @@ -1026,12 +1027,21 @@ fn execHover(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode. fn execPress(arena: std.mem.Allocator, session: *lp.Session, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 { const Params = struct { key: []const u8, + selector: ?[]const u8 = null, backendNodeId: ?CDPNode.Id = null, }; const args = try parseArgs(Params, arena, arguments); - const page = try requireFrame(session); - const target_node = try resolveOptionalNode(registry, args.backendNodeId); + var page: *lp.Frame = undefined; + var target_node: ?*DOMNode = null; + if (args.selector) |sel| { + const resolved = try resolveBySelector(session, sel); + page = resolved.page; + target_node = resolved.node; + } else { + page = try requireFrame(session); + target_node = try resolveOptionalNode(registry, args.backendNodeId); + } lp.actions.press(target_node, args.key, page) catch |err| return mapActionError(err); diff --git a/src/mcp/tools.zig b/src/mcp/tools.zig index 2ac84db6..4515945b 100644 --- a/src/mcp/tools.zig +++ b/src/mcp/tools.zig @@ -898,6 +898,28 @@ test "MCP - waitForSelector: timeout" { }, out.written()); } +test "MCP - press Enter on form input triggers submit (lowercase alias)" { + defer testing.reset(); + const aa = testing.arena_allocator; + var out: std.io.Writer.Allocating = .init(aa); + const server = try testLoadPage("http://localhost:9582/src/browser/tests/mcp_press_form.html", &out.writer); + defer server.deinit(); + + // Fill the input then press "enter" (lowercase alias) on it. The form's + // submit handler sets window.submitted and snapshots the input value. + const fill = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/call\",\"params\":{\"name\":\"fill\",\"arguments\":{\"selector\":\"#q\",\"value\":\"hello\"}}}"); + try router.handleMessage(server, aa, fill); + out.clearRetainingCapacity(); + + const press_msg = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"press\",\"arguments\":{\"selector\":\"#q\",\"key\":\"enter\"}}}"); + try router.handleMessage(server, aa, press_msg); + out.clearRetainingCapacity(); + + const eval_msg = try aa.dupe(u8, "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"eval\",\"arguments\":{\"script\":\"window.submitted === true && window.submittedValue === 'hello'\"}}}"); + try router.handleMessage(server, aa, eval_msg); + try testing.expect(std.mem.indexOf(u8, out.written(), "true") != null); +} + test "MCP - getCookies: defaults to current page, url filter, all flag" { defer testing.reset(); var out: std.io.Writer.Allocating = .init(testing.arena_allocator);