diff --git a/src/Config.zig b/src/Config.zig
index 63aa3d94..f0fd8c3a 100644
--- a/src/Config.zig
+++ b/src/Config.zig
@@ -140,6 +140,7 @@ const Commands = cli.Builder(.{
.{ .name = "wait_until", .type = ?WaitUntil },
.{ .name = "wait_script", .type = ?[:0]const u8 },
.{ .name = "wait_selector", .type = ?[:0]const u8 },
+ .{ .name = "terminate_ms", .type = ?u32 },
},
.shared_options = CommonOptions,
},
@@ -638,6 +639,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\--wait-script-file
\\ Like --wait-script, but reads the script from a file.
\\
+ \\--terminate-ms Hard deadline in milliseconds. After this time elapses,
+ \\ JavaScript execution is forcibly terminated (e.g. for
+ \\ pages with endless scripts). Unlike --wait-ms, which
+ \\ only stops waiting, --terminate-ms aborts the page.
+ \\ Defaults to no terminate.
+ \\
\\--cookie Path to a JSON file to load cookies from (read-only).
\\ Defaults to no cookie loading.
\\
diff --git a/src/Sighandler.zig b/src/Sighandler.zig
index 2b2d7f29..85a8d8e5 100644
--- a/src/Sighandler.zig
+++ b/src/Sighandler.zig
@@ -22,11 +22,11 @@
//! The structure does not clear the memory allocated in the arena,
//! clear the entire arena when exiting the program.
const std = @import("std");
-const assert = std.debug.assert;
-const Allocator = std.mem.Allocator;
const lp = @import("lightpanda");
const log = lp.log;
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
const SigHandler = @This();
@@ -44,17 +44,41 @@ pub const Listener = struct {
};
pub fn install(self: *SigHandler) !void {
- // Block SIGINT and SIGTERM for the current thread and all created from it
+ // Block these signals for the current thread and all created from it.
+ // SIGALRM is included so arm() can wake the sighandler thread on a deadline.
self.sigset = std.posix.sigemptyset();
std.posix.sigaddset(&self.sigset, std.posix.SIG.INT);
std.posix.sigaddset(&self.sigset, std.posix.SIG.TERM);
std.posix.sigaddset(&self.sigset, std.posix.SIG.QUIT);
+ std.posix.sigaddset(&self.sigset, std.posix.SIG.ALRM);
std.posix.sigprocmask(std.posix.SIG.BLOCK, &self.sigset, null);
self.handle_thread = try std.Thread.spawn(.{ .allocator = self.arena }, SigHandler.sighandle, .{self});
self.handle_thread.?.detach();
}
+const itimerval = extern struct {
+ interval: std.c.timeval,
+ value: std.c.timeval,
+};
+const ITIMER_REAL: c_int = 0;
+extern "c" fn setitimer(which: c_int, new_value: *const itimerval, old_value: ?*itimerval) c_int;
+
+/// Schedule a SIGALRM after `ms` milliseconds, which wakes the sighandler
+/// thread and runs the registered listeners. Used to enforce --terminate-ms.
+pub fn deadline(_: *SigHandler, ms: u32) !void {
+ const it = itimerval{
+ .interval = .{ .sec = 0, .usec = 0 },
+ .value = .{
+ .sec = @intCast(ms / std.time.ms_per_s),
+ .usec = @intCast((ms % std.time.ms_per_s) * std.time.us_per_ms),
+ },
+ };
+ if (setitimer(ITIMER_REAL, &it, null) != 0) {
+ return error.SetItimerFailed;
+ }
+}
+
pub fn on(self: *SigHandler, func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) !void {
assert(@typeInfo(@TypeOf(func)).@"fn".return_type.? == void);
@@ -101,6 +125,16 @@ fn sighandle(self: *SigHandler) noreturn {
}
continue;
},
+ std.posix.SIG.ALRM => {
+ // Deadline tripped (e.g. --terminate-ms). Run the same listeners,
+ // but don't bump `attempt` — a subsequent ctrl-c should still get
+ // the normal first-attempt graceful path before hard-exiting.
+ log.info(.app, "Deadline reached ", .{});
+ for (self.listeners.items) |*item| {
+ item.start(item.args.ptr);
+ }
+ continue;
+ },
else => continue,
}
}
diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig
index 872f2389..7faedb4b 100644
--- a/src/browser/Frame.zig
+++ b/src/browser/Frame.zig
@@ -1380,8 +1380,22 @@ pub fn removeElementIdWithMaps(self: *Frame, id_maps: ElementIdMaps, id: []const
pub fn getElementByIdFromNode(self: *Frame, node: *Node, id: []const u8) ?*Element {
if (node.isConnected() or node.isInShadowTree()) {
- const lookup = self.getElementIdMap(node).lookup;
- return lookup.get(id);
+ var current = node;
+ while (true) {
+ if (current.is(ShadowRoot)) |shadow_root| {
+ return shadow_root.getElementById(id, self);
+ }
+ const parent = current._parent orelse {
+ if (current._type == .document) {
+ return current._type.document.getElementById(id, self);
+ }
+ if (IS_DEBUG) {
+ std.debug.assert(false);
+ }
+ return null;
+ };
+ current = parent;
+ }
}
var tw = @import("webapi/TreeWalker.zig").Full.Elements.init(node, .{});
while (tw.next()) |el| {
@@ -3715,7 +3729,15 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For
};
if (submit_opts.fire_event) {
- const submitter_html: ?*HtmlElement = if (submitter_) |s| s.is(HtmlElement) else null;
+ // Per HTML spec "submit a form element" algorithm: SubmitEvent.submitter
+ // must be null when the submitter is the form itself, which is what
+ // Form.requestSubmit() passes when called with no submitter argument.
+ // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#concept-form-submit
+ const submitter_html: ?*HtmlElement = blk: {
+ const s = submitter_ orelse break :blk null;
+ if (s == form_element) break :blk null;
+ break :blk s.is(HtmlElement);
+ };
const submit_event = (try SubmitEvent.initTrusted(comptime .wrap("submit"), .{ .bubbles = true, .cancelable = true, .submitter = submitter_html }, self)).asEvent();
// so submit_event is still valid when we check _prevent_default
diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig
index 7754d8a3..8ea3c9bc 100644
--- a/src/browser/js/Env.zig
+++ b/src/browser/js/Env.zig
@@ -362,6 +362,10 @@ pub fn runMicrotasks(self: *Env) void {
if (self.microtask_queues_are_running == false) {
const v8_isolate = self.isolate.handle;
+ if (v8.v8__Isolate__IsExecutionTerminating(v8_isolate)) {
+ return;
+ }
+
self.microtask_queues_are_running = true;
defer self.microtask_queues_are_running = false;
@@ -374,6 +378,10 @@ pub fn runMicrotasks(self: *Env) void {
}
pub fn runMacrotasks(self: *Env) !void {
+ if (v8.v8__Isolate__IsExecutionTerminating(self.isolate.handle)) {
+ return;
+ }
+
for (self.contexts[0..self.context_count]) |ctx| {
if (comptime builtin.is_test == false) {
// I hate this comptime check as much as you do. But we have tests
@@ -485,6 +493,13 @@ pub fn terminate(self: *const Env) void {
v8.v8__Isolate__TerminateExecution(self.isolate.handle);
}
+/// Clears a pending termination so V8 calls (e.g. those made during cleanup)
+/// don't keep tripping over the terminating-state asserts. Safe to call
+/// unconditionally; a no-op if termination wasn't pending.
+pub fn cancelTerminate(self: *const Env) void {
+ v8.v8__Isolate__CancelTerminateExecution(self.isolate.handle);
+}
+
fn promiseRejectCallback(message_handle: v8.PromiseRejectMessage) callconv(.c) void {
const promise_event = v8.v8__PromiseRejectMessage__GetEvent(&message_handle);
if (promise_event != v8.kPromiseRejectWithNoHandler and promise_event != v8.kPromiseHandlerAddedAfterReject) {
diff --git a/src/browser/tests/element/duplicate_ids.html b/src/browser/tests/element/duplicate_ids.html
index 52ef7da6..2e8b5894 100644
--- a/src/browser/tests/element/duplicate_ids.html
+++ b/src/browser/tests/element/duplicate_ids.html
@@ -17,3 +17,24 @@
// testing.expectEqual(null, document.getElementById('test'));
+
+
first
+second
+
+
diff --git a/src/browser/tests/element/html/form.html b/src/browser/tests/element/html/form.html
index 31f5d648..8a92cb10 100644
--- a/src/browser/tests/element/html/form.html
+++ b/src/browser/tests/element/html/form.html
@@ -485,12 +485,13 @@
}
-
+
-
diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig
index c60aa7a7..ff5778e9 100644
--- a/src/cdp/domains/network.zig
+++ b/src/cdp/domains/network.zig
@@ -45,6 +45,7 @@ pub fn processMessage(cmd: *CDP.Command) !void {
setCookie,
setCookies,
getCookies,
+ getAllCookies,
getResponseBody,
}, cmd.input.action) orelse return error.UnknownMethod;
@@ -59,6 +60,7 @@ pub fn processMessage(cmd: *CDP.Command) !void {
.setCookie => return setCookie(cmd),
.setCookies => return setCookies(cmd),
.getCookies => return getCookies(cmd),
+ .getAllCookies => return getAllCookies(cmd),
.getResponseBody => return getResponseBody(cmd),
}
}
@@ -149,7 +151,10 @@ fn deleteCookies(cmd: *CDP.Command) !void {
}
fn clearBrowserCookies(cmd: *CDP.Command) !void {
- if (try cmd.params(struct {}) != null) return error.InvalidParams;
+ // Network.clearBrowserCookies takes no parameters per the CDP spec, but most
+ // CDP clients (chrome-remote-interface, chromedp, custom websocket clients)
+ // include an empty `"params":{}` object on every command for ergonomics.
+ // Chrome accepts that and clears the jar; reject only on truly malformed JSON.
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
bc.session.cookie_jar.clearRetainingCapacity();
return cmd.sendResult(null, .{});
@@ -205,6 +210,18 @@ fn getCookies(cmd: *CDP.Command) !void {
try cmd.sendResult(.{ .cookies = writer }, .{});
}
+fn getAllCookies(cmd: *CDP.Command) !void {
+ // Returns every cookie in the jar regardless of the current frame's origin.
+ // Mirrors Chrome's Network.getAllCookies and Storage.getCookies (without
+ // the latter's browserContextId filter, since Network commands are scoped
+ // to the current browser context already).
+ const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
+ var jar = &bc.session.cookie_jar;
+ jar.removeExpired(null);
+ const writer = CdpStorage.CookieWriter{ .cookies = jar.cookies.items };
+ try cmd.sendResult(.{ .cookies = writer }, .{});
+}
+
fn getResponseBody(cmd: *CDP.Command) !void {
const params = (try cmd.params(struct {
requestId: []const u8, // "REQ-{d}" or "LID-{d}"
@@ -565,3 +582,83 @@ test "cdp.Network: cookies" {
});
try ctx.expectSentResult(.{ .cookies = &[_]ResCookie{} }, .{ .id = 10 });
}
+
+test "cdp.Network: clearBrowserCookies accepts empty params object" {
+ const CdpCookie = CdpStorage.CdpCookie;
+ const ResCookie = CdpStorage.ResCookie;
+
+ var ctx = try testing.context();
+ defer ctx.deinit();
+ _ = try ctx.loadBrowserContext(.{ .id = "BID-N1" });
+
+ try ctx.processMessage(.{
+ .id = 1,
+ .method = "Network.setCookie",
+ .params = CdpCookie{ .name = "foo", .value = "bar", .url = "https://example.com/" },
+ });
+ try ctx.expectSentResult(null, .{ .id = 1 });
+
+ // Most CDP clients (chrome-remote-interface, chromedp, etc.) always include
+ // a `params` field on every command, even for methods that take none.
+ // Chrome ignores the empty object; we should too. Sent as raw JSON because
+ // an empty Zig anonymous struct serializes as `[]`, not `{}`.
+ try ctx.processMessage(
+ \\{"id":2,"method":"Network.clearBrowserCookies","params":{}}
+ );
+ try ctx.expectSentResult(null, .{ .id = 2 });
+
+ try ctx.processMessage(.{
+ .id = 3,
+ .method = "Storage.getCookies",
+ .params = .{ .browserContextId = "BID-N1" },
+ });
+ try ctx.expectSentResult(.{ .cookies = &[_]ResCookie{} }, .{ .id = 3 });
+}
+
+test "cdp.Network: getAllCookies returns whole jar regardless of current origin" {
+ const CdpCookie = CdpStorage.CdpCookie;
+ const ResCookie = CdpStorage.ResCookie;
+
+ var ctx = try testing.context();
+ defer ctx.deinit();
+ _ = try ctx.loadBrowserContext(.{ .id = "BID-N2" });
+
+ // Two cookies on different origins. With no current frame URL,
+ // Network.getCookies (no `urls`) would return -32602 InvalidParams;
+ // Network.getAllCookies must still return both.
+ try ctx.processMessage(.{
+ .id = 1,
+ .method = "Network.setCookies",
+ .params = .{
+ .cookies = &[_]CdpCookie{
+ .{ .name = "a", .value = "1", .url = "https://example.com/" },
+ .{ .name = "b", .value = "2", .url = "https://other.test/" },
+ },
+ },
+ });
+ try ctx.expectSentResult(null, .{ .id = 1 });
+
+ // Empty params object — sent as raw JSON because an empty Zig anonymous
+ // struct serializes as `[]`, not `{}`.
+ try ctx.processMessage(
+ \\{"id":2,"method":"Network.getAllCookies","params":{}}
+ );
+ try ctx.expectSentResult(.{
+ .cookies = &[_]ResCookie{
+ .{ .name = "a", .value = "1", .domain = "example.com", .path = "/", .size = 2, .secure = true },
+ .{ .name = "b", .value = "2", .domain = "other.test", .path = "/", .size = 2, .secure = true },
+ },
+ }, .{ .id = 2 });
+
+ // Also works without any params field at all (CDP-spec literal "no params").
+ try ctx.processMessage(.{
+ .id = 3,
+ .method = "Network.getAllCookies",
+ });
+ try ctx.expectSentResult(.{
+ .cookies = &[_]ResCookie{
+ .{ .name = "a", .value = "1", .domain = "example.com", .path = "/", .size = 2, .secure = true },
+ .{ .name = "b", .value = "2", .domain = "other.test", .path = "/", .size = 2, .secure = true },
+ },
+ }, .{ .id = 3 });
+}
diff --git a/src/lightpanda.zig b/src/lightpanda.zig
index 292ba44c..bdeac93f 100644
--- a/src/lightpanda.zig
+++ b/src/lightpanda.zig
@@ -62,16 +62,10 @@ pub const FetchOpts = struct {
dump_mode: ?Config.DumpFormat = null,
writer: ?*std.Io.Writer = null,
};
-pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void {
- const http_client = try HttpClient.init(app.allocator, &app.network);
- defer http_client.deinit();
-
+pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !void {
const notification = try Notification.init(app.allocator);
defer notification.deinit();
- var browser = try Browser.init(app, .{ .http_client = http_client });
- defer browser.deinit();
-
var session = try browser.newSession(notification);
if (app.config.cookieFile()) |cookie_path| {
diff --git a/src/main.zig b/src/main.zig
index 00940c5d..378af6e2 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -139,7 +139,18 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
fetch_opts.writer = &writer.interface;
}
- var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, url.?, fetch_opts });
+ // Browser owns a V8 isolate, which has thread affinity — it must
+ // be init/used/deinit on the same thread (fetchThread, below). So
+ // we can't treat Browser like the above serve path treats Server.
+ // We need Browser to be createdin fetchThread and to get a reference
+ // to it here.
+ var ft: FetchTerminator = .{};
+ try sighandler.on(FetchTerminator.terminate, .{&ft});
+ if (opts.terminate_ms) |ms| {
+ try sighandler.deadline(ms);
+ }
+
+ var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, &ft, url.?, fetch_opts });
defer worker_thread.join();
app.network.run();
@@ -197,9 +208,55 @@ fn agentThread(allocator: std.mem.Allocator, app: *App, opts: Config.Agent, fail
}
}
-fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void {
+const FetchTerminator = struct {
+ mutex: std.Thread.Mutex = .{},
+ browser: ?*lp.Browser = null,
+
+ fn storeBrowser(self: *FetchTerminator, browser: *lp.Browser) void {
+ self.mutex.lock();
+ defer self.mutex.unlock();
+ self.browser = browser;
+ }
+
+ fn releaseBrowser(self: *FetchTerminator) void {
+ self.mutex.lock();
+ defer self.mutex.unlock();
+ const b = self.browser orelse return;
+ b.env.cancelTerminate();
+ self.browser = null;
+ }
+
+ fn terminate(self: *FetchTerminator) void {
+ self.mutex.lock();
+ defer self.mutex.unlock();
+ const b = self.browser orelse return;
+ b.env.terminate();
+ self.browser = null;
+ }
+};
+
+fn fetchThread(app: *App, ft: *FetchTerminator, url: [:0]const u8, fetch_opts: lp.FetchOpts) void {
defer app.network.stop();
- lp.fetch(app, url, fetch_opts) catch |err| {
+
+ const http_client = lp.HttpClient.init(app.allocator, &app.network) catch |err| {
+ log.fatal(.app, "http client init error", .{ .err = err });
+ return;
+ };
+ defer http_client.deinit();
+
+ var browser = lp.Browser.init(app, .{ .http_client = http_client }) catch |err| {
+ log.fatal(.app, "browser init error", .{ .err = err });
+ return;
+ };
+ defer browser.deinit();
+
+ ft.storeBrowser(&browser);
+ // if this exits normally, we want to disarm the FetchTerminator so that
+ // any subsequent sighandlers don't try to shutdown an already (or in-the-
+ // process-of) shutting down browser/env
+ defer ft.releaseBrowser();
+
+ lp.fetch(app, &browser, url, fetch_opts) catch |err| {
log.fatal(.app, "fetch error", .{ .err = err, .url = url });
};
}