diff --git a/src/Config.zig b/src/Config.zig index 52c541ff..5bc12f13 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -133,6 +133,7 @@ const Commands = cli.Builder(.{ .{ .name = "wait_until", .type = ?WaitUntil }, .{ .name = "wait_script", .type = ?[:0]const u8 }, .{ .name = "wait_selector", .type = ?[:0]const u8 }, + .{ .name = "terminate_ms", .type = ?u32 }, }, .shared_options = CommonOptions, }, @@ -610,6 +611,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\--wait-script-file \\ Like --wait-script, but reads the script from a file. \\ + \\--terminate-ms Hard deadline in milliseconds. After this time elapses, + \\ JavaScript execution is forcibly terminated (e.g. for + \\ pages with endless scripts). Unlike --wait-ms, which + \\ only stops waiting, --terminate-ms aborts the page. + \\ Defaults to no terminate. + \\ \\--cookie Path to a JSON file to load cookies from (read-only). \\ Defaults to no cookie loading. \\ diff --git a/src/Sighandler.zig b/src/Sighandler.zig index 2b2d7f29..85a8d8e5 100644 --- a/src/Sighandler.zig +++ b/src/Sighandler.zig @@ -22,11 +22,11 @@ //! The structure does not clear the memory allocated in the arena, //! clear the entire arena when exiting the program. const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); const log = lp.log; +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; const SigHandler = @This(); @@ -44,17 +44,41 @@ pub const Listener = struct { }; pub fn install(self: *SigHandler) !void { - // Block SIGINT and SIGTERM for the current thread and all created from it + // Block these signals for the current thread and all created from it. + // SIGALRM is included so arm() can wake the sighandler thread on a deadline. self.sigset = std.posix.sigemptyset(); std.posix.sigaddset(&self.sigset, std.posix.SIG.INT); std.posix.sigaddset(&self.sigset, std.posix.SIG.TERM); std.posix.sigaddset(&self.sigset, std.posix.SIG.QUIT); + std.posix.sigaddset(&self.sigset, std.posix.SIG.ALRM); std.posix.sigprocmask(std.posix.SIG.BLOCK, &self.sigset, null); self.handle_thread = try std.Thread.spawn(.{ .allocator = self.arena }, SigHandler.sighandle, .{self}); self.handle_thread.?.detach(); } +const itimerval = extern struct { + interval: std.c.timeval, + value: std.c.timeval, +}; +const ITIMER_REAL: c_int = 0; +extern "c" fn setitimer(which: c_int, new_value: *const itimerval, old_value: ?*itimerval) c_int; + +/// Schedule a SIGALRM after `ms` milliseconds, which wakes the sighandler +/// thread and runs the registered listeners. Used to enforce --terminate-ms. +pub fn deadline(_: *SigHandler, ms: u32) !void { + const it = itimerval{ + .interval = .{ .sec = 0, .usec = 0 }, + .value = .{ + .sec = @intCast(ms / std.time.ms_per_s), + .usec = @intCast((ms % std.time.ms_per_s) * std.time.us_per_ms), + }, + }; + if (setitimer(ITIMER_REAL, &it, null) != 0) { + return error.SetItimerFailed; + } +} + pub fn on(self: *SigHandler, func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) !void { assert(@typeInfo(@TypeOf(func)).@"fn".return_type.? == void); @@ -101,6 +125,16 @@ fn sighandle(self: *SigHandler) noreturn { } continue; }, + std.posix.SIG.ALRM => { + // Deadline tripped (e.g. --terminate-ms). Run the same listeners, + // but don't bump `attempt` — a subsequent ctrl-c should still get + // the normal first-attempt graceful path before hard-exiting. + log.info(.app, "Deadline reached ", .{}); + for (self.listeners.items) |*item| { + item.start(item.args.ptr); + } + continue; + }, else => continue, } } diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 7754d8a3..8ea3c9bc 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -362,6 +362,10 @@ pub fn runMicrotasks(self: *Env) void { if (self.microtask_queues_are_running == false) { const v8_isolate = self.isolate.handle; + if (v8.v8__Isolate__IsExecutionTerminating(v8_isolate)) { + return; + } + self.microtask_queues_are_running = true; defer self.microtask_queues_are_running = false; @@ -374,6 +378,10 @@ pub fn runMicrotasks(self: *Env) void { } pub fn runMacrotasks(self: *Env) !void { + if (v8.v8__Isolate__IsExecutionTerminating(self.isolate.handle)) { + return; + } + for (self.contexts[0..self.context_count]) |ctx| { if (comptime builtin.is_test == false) { // I hate this comptime check as much as you do. But we have tests @@ -485,6 +493,13 @@ pub fn terminate(self: *const Env) void { v8.v8__Isolate__TerminateExecution(self.isolate.handle); } +/// Clears a pending termination so V8 calls (e.g. those made during cleanup) +/// don't keep tripping over the terminating-state asserts. Safe to call +/// unconditionally; a no-op if termination wasn't pending. +pub fn cancelTerminate(self: *const Env) void { + v8.v8__Isolate__CancelTerminateExecution(self.isolate.handle); +} + fn promiseRejectCallback(message_handle: v8.PromiseRejectMessage) callconv(.c) void { const promise_event = v8.v8__PromiseRejectMessage__GetEvent(&message_handle); if (promise_event != v8.kPromiseRejectWithNoHandler and promise_event != v8.kPromiseHandlerAddedAfterReject) { diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 992388a1..f50e7f5d 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -60,16 +60,10 @@ pub const FetchOpts = struct { dump_mode: ?Config.DumpFormat = null, writer: ?*std.Io.Writer = null, }; -pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { - const http_client = try HttpClient.init(app.allocator, &app.network); - defer http_client.deinit(); - +pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !void { const notification = try Notification.init(app.allocator); defer notification.deinit(); - var browser = try Browser.init(app, .{ .http_client = http_client }); - defer browser.deinit(); - var session = try browser.newSession(notification); if (app.config.cookieFile()) |cookie_path| { diff --git a/src/main.zig b/src/main.zig index 9536001c..0a955839 100644 --- a/src/main.zig +++ b/src/main.zig @@ -139,7 +139,18 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { fetch_opts.writer = &writer.interface; } - var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, url.?, fetch_opts }); + // Browser owns a V8 isolate, which has thread affinity — it must + // be init/used/deinit on the same thread (fetchThread, below). So + // we can't treat Browser like the above serve path treats Server. + // We need Browser to be createdin fetchThread and to get a reference + // to it here. + var ft: FetchTerminator = .{}; + try sighandler.on(FetchTerminator.terminate, .{&ft}); + if (opts.terminate_ms) |ms| { + try sighandler.deadline(ms); + } + + var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, &ft, url.?, fetch_opts }); defer worker_thread.join(); app.network.run(); @@ -169,9 +180,55 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { } } -fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void { +const FetchTerminator = struct { + mutex: std.Thread.Mutex = .{}, + browser: ?*lp.Browser = null, + + fn storeBrowser(self: *FetchTerminator, browser: *lp.Browser) void { + self.mutex.lock(); + defer self.mutex.unlock(); + self.browser = browser; + } + + fn releaseBrowser(self: *FetchTerminator) void { + self.mutex.lock(); + defer self.mutex.unlock(); + const b = self.browser orelse return; + b.env.cancelTerminate(); + self.browser = null; + } + + fn terminate(self: *FetchTerminator) void { + self.mutex.lock(); + defer self.mutex.unlock(); + const b = self.browser orelse return; + b.env.terminate(); + self.browser = null; + } +}; + +fn fetchThread(app: *App, ft: *FetchTerminator, url: [:0]const u8, fetch_opts: lp.FetchOpts) void { defer app.network.stop(); - lp.fetch(app, url, fetch_opts) catch |err| { + + const http_client = lp.HttpClient.init(app.allocator, &app.network) catch |err| { + log.fatal(.app, "http client init error", .{ .err = err }); + return; + }; + defer http_client.deinit(); + + var browser = lp.Browser.init(app, .{ .http_client = http_client }) catch |err| { + log.fatal(.app, "browser init error", .{ .err = err }); + return; + }; + defer browser.deinit(); + + ft.storeBrowser(&browser); + // if this exits normally, we want to disarm the FetchTerminator so that + // any subsequent sighandlers don't try to shutdown an already (or in-the- + // process-of) shutting down browser/env + defer ft.releaseBrowser(); + + lp.fetch(app, &browser, url, fetch_opts) catch |err| { log.fatal(.app, "fetch error", .{ .err = err, .url = url }); }; }