mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
Merge branch 'main' into agent
This commit is contained in:
@@ -140,6 +140,7 @@ const Commands = cli.Builder(.{
|
||||
.{ .name = "wait_until", .type = ?WaitUntil },
|
||||
.{ .name = "wait_script", .type = ?[:0]const u8 },
|
||||
.{ .name = "wait_selector", .type = ?[:0]const u8 },
|
||||
.{ .name = "terminate_ms", .type = ?u32 },
|
||||
},
|
||||
.shared_options = CommonOptions,
|
||||
},
|
||||
@@ -638,6 +639,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
|
||||
\\--wait-script-file
|
||||
\\ Like --wait-script, but reads the script from a file.
|
||||
\\
|
||||
\\--terminate-ms Hard deadline in milliseconds. After this time elapses,
|
||||
\\ JavaScript execution is forcibly terminated (e.g. for
|
||||
\\ pages with endless scripts). Unlike --wait-ms, which
|
||||
\\ only stops waiting, --terminate-ms aborts the page.
|
||||
\\ Defaults to no terminate.
|
||||
\\
|
||||
\\--cookie Path to a JSON file to load cookies from (read-only).
|
||||
\\ Defaults to no cookie loading.
|
||||
\\
|
||||
|
||||
@@ -22,11 +22,11 @@
|
||||
//! The structure does not clear the memory allocated in the arena,
|
||||
//! clear the entire arena when exiting the program.
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const log = lp.log;
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const SigHandler = @This();
|
||||
|
||||
@@ -44,17 +44,41 @@ pub const Listener = struct {
|
||||
};
|
||||
|
||||
pub fn install(self: *SigHandler) !void {
|
||||
// Block SIGINT and SIGTERM for the current thread and all created from it
|
||||
// Block these signals for the current thread and all created from it.
|
||||
// SIGALRM is included so arm() can wake the sighandler thread on a deadline.
|
||||
self.sigset = std.posix.sigemptyset();
|
||||
std.posix.sigaddset(&self.sigset, std.posix.SIG.INT);
|
||||
std.posix.sigaddset(&self.sigset, std.posix.SIG.TERM);
|
||||
std.posix.sigaddset(&self.sigset, std.posix.SIG.QUIT);
|
||||
std.posix.sigaddset(&self.sigset, std.posix.SIG.ALRM);
|
||||
std.posix.sigprocmask(std.posix.SIG.BLOCK, &self.sigset, null);
|
||||
|
||||
self.handle_thread = try std.Thread.spawn(.{ .allocator = self.arena }, SigHandler.sighandle, .{self});
|
||||
self.handle_thread.?.detach();
|
||||
}
|
||||
|
||||
const itimerval = extern struct {
|
||||
interval: std.c.timeval,
|
||||
value: std.c.timeval,
|
||||
};
|
||||
const ITIMER_REAL: c_int = 0;
|
||||
extern "c" fn setitimer(which: c_int, new_value: *const itimerval, old_value: ?*itimerval) c_int;
|
||||
|
||||
/// Schedule a SIGALRM after `ms` milliseconds, which wakes the sighandler
|
||||
/// thread and runs the registered listeners. Used to enforce --terminate-ms.
|
||||
pub fn deadline(_: *SigHandler, ms: u32) !void {
|
||||
const it = itimerval{
|
||||
.interval = .{ .sec = 0, .usec = 0 },
|
||||
.value = .{
|
||||
.sec = @intCast(ms / std.time.ms_per_s),
|
||||
.usec = @intCast((ms % std.time.ms_per_s) * std.time.us_per_ms),
|
||||
},
|
||||
};
|
||||
if (setitimer(ITIMER_REAL, &it, null) != 0) {
|
||||
return error.SetItimerFailed;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on(self: *SigHandler, func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) !void {
|
||||
assert(@typeInfo(@TypeOf(func)).@"fn".return_type.? == void);
|
||||
|
||||
@@ -101,6 +125,16 @@ fn sighandle(self: *SigHandler) noreturn {
|
||||
}
|
||||
continue;
|
||||
},
|
||||
std.posix.SIG.ALRM => {
|
||||
// Deadline tripped (e.g. --terminate-ms). Run the same listeners,
|
||||
// but don't bump `attempt` — a subsequent ctrl-c should still get
|
||||
// the normal first-attempt graceful path before hard-exiting.
|
||||
log.info(.app, "Deadline reached ", .{});
|
||||
for (self.listeners.items) |*item| {
|
||||
item.start(item.args.ptr);
|
||||
}
|
||||
continue;
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1380,8 +1380,22 @@ pub fn removeElementIdWithMaps(self: *Frame, id_maps: ElementIdMaps, id: []const
|
||||
|
||||
pub fn getElementByIdFromNode(self: *Frame, node: *Node, id: []const u8) ?*Element {
|
||||
if (node.isConnected() or node.isInShadowTree()) {
|
||||
const lookup = self.getElementIdMap(node).lookup;
|
||||
return lookup.get(id);
|
||||
var current = node;
|
||||
while (true) {
|
||||
if (current.is(ShadowRoot)) |shadow_root| {
|
||||
return shadow_root.getElementById(id, self);
|
||||
}
|
||||
const parent = current._parent orelse {
|
||||
if (current._type == .document) {
|
||||
return current._type.document.getElementById(id, self);
|
||||
}
|
||||
if (IS_DEBUG) {
|
||||
std.debug.assert(false);
|
||||
}
|
||||
return null;
|
||||
};
|
||||
current = parent;
|
||||
}
|
||||
}
|
||||
var tw = @import("webapi/TreeWalker.zig").Full.Elements.init(node, .{});
|
||||
while (tw.next()) |el| {
|
||||
@@ -3715,7 +3729,15 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For
|
||||
};
|
||||
|
||||
if (submit_opts.fire_event) {
|
||||
const submitter_html: ?*HtmlElement = if (submitter_) |s| s.is(HtmlElement) else null;
|
||||
// Per HTML spec "submit a form element" algorithm: SubmitEvent.submitter
|
||||
// must be null when the submitter is the form itself, which is what
|
||||
// Form.requestSubmit() passes when called with no submitter argument.
|
||||
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#concept-form-submit
|
||||
const submitter_html: ?*HtmlElement = blk: {
|
||||
const s = submitter_ orelse break :blk null;
|
||||
if (s == form_element) break :blk null;
|
||||
break :blk s.is(HtmlElement);
|
||||
};
|
||||
const submit_event = (try SubmitEvent.initTrusted(comptime .wrap("submit"), .{ .bubbles = true, .cancelable = true, .submitter = submitter_html }, self)).asEvent();
|
||||
|
||||
// so submit_event is still valid when we check _prevent_default
|
||||
|
||||
@@ -362,6 +362,10 @@ pub fn runMicrotasks(self: *Env) void {
|
||||
if (self.microtask_queues_are_running == false) {
|
||||
const v8_isolate = self.isolate.handle;
|
||||
|
||||
if (v8.v8__Isolate__IsExecutionTerminating(v8_isolate)) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.microtask_queues_are_running = true;
|
||||
defer self.microtask_queues_are_running = false;
|
||||
|
||||
@@ -374,6 +378,10 @@ pub fn runMicrotasks(self: *Env) void {
|
||||
}
|
||||
|
||||
pub fn runMacrotasks(self: *Env) !void {
|
||||
if (v8.v8__Isolate__IsExecutionTerminating(self.isolate.handle)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (self.contexts[0..self.context_count]) |ctx| {
|
||||
if (comptime builtin.is_test == false) {
|
||||
// I hate this comptime check as much as you do. But we have tests
|
||||
@@ -485,6 +493,13 @@ pub fn terminate(self: *const Env) void {
|
||||
v8.v8__Isolate__TerminateExecution(self.isolate.handle);
|
||||
}
|
||||
|
||||
/// Clears a pending termination so V8 calls (e.g. those made during cleanup)
|
||||
/// don't keep tripping over the terminating-state asserts. Safe to call
|
||||
/// unconditionally; a no-op if termination wasn't pending.
|
||||
pub fn cancelTerminate(self: *const Env) void {
|
||||
v8.v8__Isolate__CancelTerminateExecution(self.isolate.handle);
|
||||
}
|
||||
|
||||
fn promiseRejectCallback(message_handle: v8.PromiseRejectMessage) callconv(.c) void {
|
||||
const promise_event = v8.v8__PromiseRejectMessage__GetEvent(&message_handle);
|
||||
if (promise_event != v8.kPromiseRejectWithNoHandler and promise_event != v8.kPromiseHandlerAddedAfterReject) {
|
||||
|
||||
@@ -17,3 +17,24 @@
|
||||
|
||||
// testing.expectEqual(null, document.getElementById('test'));
|
||||
</script>
|
||||
|
||||
<div id="qs-test">first</div>
|
||||
<div id="qs-test">second</div>
|
||||
|
||||
<script id=duplicateIdsQuerySelector>
|
||||
{
|
||||
// Regression test: querySelector('#id') must agree with getElementById('id')
|
||||
// when the first duplicate is removed. Selector engine fast path goes through
|
||||
// Frame.getElementByIdFromNode, which previously only checked the lookup map
|
||||
// and missed the _removed_ids recovery, so it returned null.
|
||||
const first = document.querySelector('#qs-test');
|
||||
testing.expectEqual('first', first.textContent);
|
||||
|
||||
first.remove();
|
||||
|
||||
testing.expectEqual('second', document.querySelector('#qs-test').textContent);
|
||||
testing.expectEqual('second', document.body.querySelector('#qs-test').textContent);
|
||||
testing.expectEqual(1, document.querySelectorAll('#qs-test').length);
|
||||
testing.expectEqual('second', document.getElementById('qs-test').textContent);
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -485,12 +485,13 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- Test: requestSubmit() without submitter sets submitter to the form element -->
|
||||
<!-- Test: requestSubmit() without submitter sets SubmitEvent.submitter to null
|
||||
per the HTML spec "submit a form element" algorithm step 4. -->
|
||||
<form id="test_form_submitter2" action="/should-not-navigate7" method="get">
|
||||
<input type="text" name="q" value="test">
|
||||
</form>
|
||||
|
||||
<script id="requestSubmit_default_submitter_is_form">
|
||||
<script id="requestSubmit_default_submitter_is_null">
|
||||
{
|
||||
const form = $('#test_form_submitter2');
|
||||
let capturedSubmitter = undefined;
|
||||
@@ -501,7 +502,7 @@
|
||||
});
|
||||
|
||||
form.requestSubmit();
|
||||
testing.expectEqual(form, capturedSubmitter);
|
||||
testing.expectEqual(null, capturedSubmitter);
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ pub fn processMessage(cmd: *CDP.Command) !void {
|
||||
setCookie,
|
||||
setCookies,
|
||||
getCookies,
|
||||
getAllCookies,
|
||||
getResponseBody,
|
||||
}, cmd.input.action) orelse return error.UnknownMethod;
|
||||
|
||||
@@ -59,6 +60,7 @@ pub fn processMessage(cmd: *CDP.Command) !void {
|
||||
.setCookie => return setCookie(cmd),
|
||||
.setCookies => return setCookies(cmd),
|
||||
.getCookies => return getCookies(cmd),
|
||||
.getAllCookies => return getAllCookies(cmd),
|
||||
.getResponseBody => return getResponseBody(cmd),
|
||||
}
|
||||
}
|
||||
@@ -149,7 +151,10 @@ fn deleteCookies(cmd: *CDP.Command) !void {
|
||||
}
|
||||
|
||||
fn clearBrowserCookies(cmd: *CDP.Command) !void {
|
||||
if (try cmd.params(struct {}) != null) return error.InvalidParams;
|
||||
// Network.clearBrowserCookies takes no parameters per the CDP spec, but most
|
||||
// CDP clients (chrome-remote-interface, chromedp, custom websocket clients)
|
||||
// include an empty `"params":{}` object on every command for ergonomics.
|
||||
// Chrome accepts that and clears the jar; reject only on truly malformed JSON.
|
||||
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
|
||||
bc.session.cookie_jar.clearRetainingCapacity();
|
||||
return cmd.sendResult(null, .{});
|
||||
@@ -205,6 +210,18 @@ fn getCookies(cmd: *CDP.Command) !void {
|
||||
try cmd.sendResult(.{ .cookies = writer }, .{});
|
||||
}
|
||||
|
||||
fn getAllCookies(cmd: *CDP.Command) !void {
|
||||
// Returns every cookie in the jar regardless of the current frame's origin.
|
||||
// Mirrors Chrome's Network.getAllCookies and Storage.getCookies (without
|
||||
// the latter's browserContextId filter, since Network commands are scoped
|
||||
// to the current browser context already).
|
||||
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
|
||||
var jar = &bc.session.cookie_jar;
|
||||
jar.removeExpired(null);
|
||||
const writer = CdpStorage.CookieWriter{ .cookies = jar.cookies.items };
|
||||
try cmd.sendResult(.{ .cookies = writer }, .{});
|
||||
}
|
||||
|
||||
fn getResponseBody(cmd: *CDP.Command) !void {
|
||||
const params = (try cmd.params(struct {
|
||||
requestId: []const u8, // "REQ-{d}" or "LID-{d}"
|
||||
@@ -565,3 +582,83 @@ test "cdp.Network: cookies" {
|
||||
});
|
||||
try ctx.expectSentResult(.{ .cookies = &[_]ResCookie{} }, .{ .id = 10 });
|
||||
}
|
||||
|
||||
test "cdp.Network: clearBrowserCookies accepts empty params object" {
|
||||
const CdpCookie = CdpStorage.CdpCookie;
|
||||
const ResCookie = CdpStorage.ResCookie;
|
||||
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
_ = try ctx.loadBrowserContext(.{ .id = "BID-N1" });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 1,
|
||||
.method = "Network.setCookie",
|
||||
.params = CdpCookie{ .name = "foo", .value = "bar", .url = "https://example.com/" },
|
||||
});
|
||||
try ctx.expectSentResult(null, .{ .id = 1 });
|
||||
|
||||
// Most CDP clients (chrome-remote-interface, chromedp, etc.) always include
|
||||
// a `params` field on every command, even for methods that take none.
|
||||
// Chrome ignores the empty object; we should too. Sent as raw JSON because
|
||||
// an empty Zig anonymous struct serializes as `[]`, not `{}`.
|
||||
try ctx.processMessage(
|
||||
\\{"id":2,"method":"Network.clearBrowserCookies","params":{}}
|
||||
);
|
||||
try ctx.expectSentResult(null, .{ .id = 2 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 3,
|
||||
.method = "Storage.getCookies",
|
||||
.params = .{ .browserContextId = "BID-N1" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .cookies = &[_]ResCookie{} }, .{ .id = 3 });
|
||||
}
|
||||
|
||||
test "cdp.Network: getAllCookies returns whole jar regardless of current origin" {
|
||||
const CdpCookie = CdpStorage.CdpCookie;
|
||||
const ResCookie = CdpStorage.ResCookie;
|
||||
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
_ = try ctx.loadBrowserContext(.{ .id = "BID-N2" });
|
||||
|
||||
// Two cookies on different origins. With no current frame URL,
|
||||
// Network.getCookies (no `urls`) would return -32602 InvalidParams;
|
||||
// Network.getAllCookies must still return both.
|
||||
try ctx.processMessage(.{
|
||||
.id = 1,
|
||||
.method = "Network.setCookies",
|
||||
.params = .{
|
||||
.cookies = &[_]CdpCookie{
|
||||
.{ .name = "a", .value = "1", .url = "https://example.com/" },
|
||||
.{ .name = "b", .value = "2", .url = "https://other.test/" },
|
||||
},
|
||||
},
|
||||
});
|
||||
try ctx.expectSentResult(null, .{ .id = 1 });
|
||||
|
||||
// Empty params object — sent as raw JSON because an empty Zig anonymous
|
||||
// struct serializes as `[]`, not `{}`.
|
||||
try ctx.processMessage(
|
||||
\\{"id":2,"method":"Network.getAllCookies","params":{}}
|
||||
);
|
||||
try ctx.expectSentResult(.{
|
||||
.cookies = &[_]ResCookie{
|
||||
.{ .name = "a", .value = "1", .domain = "example.com", .path = "/", .size = 2, .secure = true },
|
||||
.{ .name = "b", .value = "2", .domain = "other.test", .path = "/", .size = 2, .secure = true },
|
||||
},
|
||||
}, .{ .id = 2 });
|
||||
|
||||
// Also works without any params field at all (CDP-spec literal "no params").
|
||||
try ctx.processMessage(.{
|
||||
.id = 3,
|
||||
.method = "Network.getAllCookies",
|
||||
});
|
||||
try ctx.expectSentResult(.{
|
||||
.cookies = &[_]ResCookie{
|
||||
.{ .name = "a", .value = "1", .domain = "example.com", .path = "/", .size = 2, .secure = true },
|
||||
.{ .name = "b", .value = "2", .domain = "other.test", .path = "/", .size = 2, .secure = true },
|
||||
},
|
||||
}, .{ .id = 3 });
|
||||
}
|
||||
|
||||
@@ -62,16 +62,10 @@ pub const FetchOpts = struct {
|
||||
dump_mode: ?Config.DumpFormat = null,
|
||||
writer: ?*std.Io.Writer = null,
|
||||
};
|
||||
pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void {
|
||||
const http_client = try HttpClient.init(app.allocator, &app.network);
|
||||
defer http_client.deinit();
|
||||
|
||||
pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !void {
|
||||
const notification = try Notification.init(app.allocator);
|
||||
defer notification.deinit();
|
||||
|
||||
var browser = try Browser.init(app, .{ .http_client = http_client });
|
||||
defer browser.deinit();
|
||||
|
||||
var session = try browser.newSession(notification);
|
||||
|
||||
if (app.config.cookieFile()) |cookie_path| {
|
||||
|
||||
63
src/main.zig
63
src/main.zig
@@ -139,7 +139,18 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
|
||||
fetch_opts.writer = &writer.interface;
|
||||
}
|
||||
|
||||
var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, url.?, fetch_opts });
|
||||
// Browser owns a V8 isolate, which has thread affinity — it must
|
||||
// be init/used/deinit on the same thread (fetchThread, below). So
|
||||
// we can't treat Browser like the above serve path treats Server.
|
||||
// We need Browser to be createdin fetchThread and to get a reference
|
||||
// to it here.
|
||||
var ft: FetchTerminator = .{};
|
||||
try sighandler.on(FetchTerminator.terminate, .{&ft});
|
||||
if (opts.terminate_ms) |ms| {
|
||||
try sighandler.deadline(ms);
|
||||
}
|
||||
|
||||
var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, &ft, url.?, fetch_opts });
|
||||
defer worker_thread.join();
|
||||
|
||||
app.network.run();
|
||||
@@ -197,9 +208,55 @@ fn agentThread(allocator: std.mem.Allocator, app: *App, opts: Config.Agent, fail
|
||||
}
|
||||
}
|
||||
|
||||
fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void {
|
||||
const FetchTerminator = struct {
|
||||
mutex: std.Thread.Mutex = .{},
|
||||
browser: ?*lp.Browser = null,
|
||||
|
||||
fn storeBrowser(self: *FetchTerminator, browser: *lp.Browser) void {
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
self.browser = browser;
|
||||
}
|
||||
|
||||
fn releaseBrowser(self: *FetchTerminator) void {
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
const b = self.browser orelse return;
|
||||
b.env.cancelTerminate();
|
||||
self.browser = null;
|
||||
}
|
||||
|
||||
fn terminate(self: *FetchTerminator) void {
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
const b = self.browser orelse return;
|
||||
b.env.terminate();
|
||||
self.browser = null;
|
||||
}
|
||||
};
|
||||
|
||||
fn fetchThread(app: *App, ft: *FetchTerminator, url: [:0]const u8, fetch_opts: lp.FetchOpts) void {
|
||||
defer app.network.stop();
|
||||
lp.fetch(app, url, fetch_opts) catch |err| {
|
||||
|
||||
const http_client = lp.HttpClient.init(app.allocator, &app.network) catch |err| {
|
||||
log.fatal(.app, "http client init error", .{ .err = err });
|
||||
return;
|
||||
};
|
||||
defer http_client.deinit();
|
||||
|
||||
var browser = lp.Browser.init(app, .{ .http_client = http_client }) catch |err| {
|
||||
log.fatal(.app, "browser init error", .{ .err = err });
|
||||
return;
|
||||
};
|
||||
defer browser.deinit();
|
||||
|
||||
ft.storeBrowser(&browser);
|
||||
// if this exits normally, we want to disarm the FetchTerminator so that
|
||||
// any subsequent sighandlers don't try to shutdown an already (or in-the-
|
||||
// process-of) shutting down browser/env
|
||||
defer ft.releaseBrowser();
|
||||
|
||||
lp.fetch(app, &browser, url, fetch_opts) catch |err| {
|
||||
log.fatal(.app, "fetch error", .{ .err = err, .url = url });
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user