From 12c2efb81171067ef479b26f43faa68f2a88fd2d Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Sat, 25 Apr 2026 12:34:06 +0800 Subject: [PATCH] Adds --terminate-ms command line argument + ctrl-c improvements in fetch The main.zig path for `fetch` now captures the *Browser so that browser.env.terminate() can be called. This is a bit more complex than the serve path because the Browser owns the Isolate and can't be moved from one thread to another. With main having access to the browser, two things are now possible: 1 - We can support a --terminate-ms flag (https://github.com/lightpanda-io/browser/issues/2206) 2 - ctrl-c can correctly stop blocked JavaScript processes 1 is implemented via setitimer to set a timer for SIGALRM, avoiding the need to add another "watcher" thread, or putting a timer in Network.run. --- src/Config.zig | 7 +++++ src/Sighandler.zig | 40 +++++++++++++++++++++++++-- src/browser/js/Env.zig | 15 ++++++++++ src/lightpanda.zig | 8 +----- src/main.zig | 63 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 120 insertions(+), 13 deletions(-) diff --git a/src/Config.zig b/src/Config.zig index 52c541ff..5bc12f13 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -133,6 +133,7 @@ const Commands = cli.Builder(.{ .{ .name = "wait_until", .type = ?WaitUntil }, .{ .name = "wait_script", .type = ?[:0]const u8 }, .{ .name = "wait_selector", .type = ?[:0]const u8 }, + .{ .name = "terminate_ms", .type = ?u32 }, }, .shared_options = CommonOptions, }, @@ -610,6 +611,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\--wait-script-file \\ Like --wait-script, but reads the script from a file. \\ + \\--terminate-ms Hard deadline in milliseconds. After this time elapses, + \\ JavaScript execution is forcibly terminated (e.g. for + \\ pages with endless scripts). Unlike --wait-ms, which + \\ only stops waiting, --terminate-ms aborts the page. + \\ Defaults to no terminate. + \\ \\--cookie Path to a JSON file to load cookies from (read-only). \\ Defaults to no cookie loading. \\ diff --git a/src/Sighandler.zig b/src/Sighandler.zig index 2b2d7f29..85a8d8e5 100644 --- a/src/Sighandler.zig +++ b/src/Sighandler.zig @@ -22,11 +22,11 @@ //! The structure does not clear the memory allocated in the arena, //! clear the entire arena when exiting the program. const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); const log = lp.log; +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; const SigHandler = @This(); @@ -44,17 +44,41 @@ pub const Listener = struct { }; pub fn install(self: *SigHandler) !void { - // Block SIGINT and SIGTERM for the current thread and all created from it + // Block these signals for the current thread and all created from it. + // SIGALRM is included so arm() can wake the sighandler thread on a deadline. self.sigset = std.posix.sigemptyset(); std.posix.sigaddset(&self.sigset, std.posix.SIG.INT); std.posix.sigaddset(&self.sigset, std.posix.SIG.TERM); std.posix.sigaddset(&self.sigset, std.posix.SIG.QUIT); + std.posix.sigaddset(&self.sigset, std.posix.SIG.ALRM); std.posix.sigprocmask(std.posix.SIG.BLOCK, &self.sigset, null); self.handle_thread = try std.Thread.spawn(.{ .allocator = self.arena }, SigHandler.sighandle, .{self}); self.handle_thread.?.detach(); } +const itimerval = extern struct { + interval: std.c.timeval, + value: std.c.timeval, +}; +const ITIMER_REAL: c_int = 0; +extern "c" fn setitimer(which: c_int, new_value: *const itimerval, old_value: ?*itimerval) c_int; + +/// Schedule a SIGALRM after `ms` milliseconds, which wakes the sighandler +/// thread and runs the registered listeners. Used to enforce --terminate-ms. +pub fn deadline(_: *SigHandler, ms: u32) !void { + const it = itimerval{ + .interval = .{ .sec = 0, .usec = 0 }, + .value = .{ + .sec = @intCast(ms / std.time.ms_per_s), + .usec = @intCast((ms % std.time.ms_per_s) * std.time.us_per_ms), + }, + }; + if (setitimer(ITIMER_REAL, &it, null) != 0) { + return error.SetItimerFailed; + } +} + pub fn on(self: *SigHandler, func: anytype, args: std.meta.ArgsTuple(@TypeOf(func))) !void { assert(@typeInfo(@TypeOf(func)).@"fn".return_type.? == void); @@ -101,6 +125,16 @@ fn sighandle(self: *SigHandler) noreturn { } continue; }, + std.posix.SIG.ALRM => { + // Deadline tripped (e.g. --terminate-ms). Run the same listeners, + // but don't bump `attempt` — a subsequent ctrl-c should still get + // the normal first-attempt graceful path before hard-exiting. + log.info(.app, "Deadline reached ", .{}); + for (self.listeners.items) |*item| { + item.start(item.args.ptr); + } + continue; + }, else => continue, } } diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 7754d8a3..8ea3c9bc 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -362,6 +362,10 @@ pub fn runMicrotasks(self: *Env) void { if (self.microtask_queues_are_running == false) { const v8_isolate = self.isolate.handle; + if (v8.v8__Isolate__IsExecutionTerminating(v8_isolate)) { + return; + } + self.microtask_queues_are_running = true; defer self.microtask_queues_are_running = false; @@ -374,6 +378,10 @@ pub fn runMicrotasks(self: *Env) void { } pub fn runMacrotasks(self: *Env) !void { + if (v8.v8__Isolate__IsExecutionTerminating(self.isolate.handle)) { + return; + } + for (self.contexts[0..self.context_count]) |ctx| { if (comptime builtin.is_test == false) { // I hate this comptime check as much as you do. But we have tests @@ -485,6 +493,13 @@ pub fn terminate(self: *const Env) void { v8.v8__Isolate__TerminateExecution(self.isolate.handle); } +/// Clears a pending termination so V8 calls (e.g. those made during cleanup) +/// don't keep tripping over the terminating-state asserts. Safe to call +/// unconditionally; a no-op if termination wasn't pending. +pub fn cancelTerminate(self: *const Env) void { + v8.v8__Isolate__CancelTerminateExecution(self.isolate.handle); +} + fn promiseRejectCallback(message_handle: v8.PromiseRejectMessage) callconv(.c) void { const promise_event = v8.v8__PromiseRejectMessage__GetEvent(&message_handle); if (promise_event != v8.kPromiseRejectWithNoHandler and promise_event != v8.kPromiseHandlerAddedAfterReject) { diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 992388a1..f50e7f5d 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -60,16 +60,10 @@ pub const FetchOpts = struct { dump_mode: ?Config.DumpFormat = null, writer: ?*std.Io.Writer = null, }; -pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { - const http_client = try HttpClient.init(app.allocator, &app.network); - defer http_client.deinit(); - +pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !void { const notification = try Notification.init(app.allocator); defer notification.deinit(); - var browser = try Browser.init(app, .{ .http_client = http_client }); - defer browser.deinit(); - var session = try browser.newSession(notification); if (app.config.cookieFile()) |cookie_path| { diff --git a/src/main.zig b/src/main.zig index 9536001c..0a955839 100644 --- a/src/main.zig +++ b/src/main.zig @@ -139,7 +139,18 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { fetch_opts.writer = &writer.interface; } - var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, url.?, fetch_opts }); + // Browser owns a V8 isolate, which has thread affinity — it must + // be init/used/deinit on the same thread (fetchThread, below). So + // we can't treat Browser like the above serve path treats Server. + // We need Browser to be createdin fetchThread and to get a reference + // to it here. + var ft: FetchTerminator = .{}; + try sighandler.on(FetchTerminator.terminate, .{&ft}); + if (opts.terminate_ms) |ms| { + try sighandler.deadline(ms); + } + + var worker_thread = try std.Thread.spawn(.{}, fetchThread, .{ app, &ft, url.?, fetch_opts }); defer worker_thread.join(); app.network.run(); @@ -169,9 +180,55 @@ fn run(allocator: Allocator, main_arena: Allocator) !void { } } -fn fetchThread(app: *App, url: [:0]const u8, fetch_opts: lp.FetchOpts) void { +const FetchTerminator = struct { + mutex: std.Thread.Mutex = .{}, + browser: ?*lp.Browser = null, + + fn storeBrowser(self: *FetchTerminator, browser: *lp.Browser) void { + self.mutex.lock(); + defer self.mutex.unlock(); + self.browser = browser; + } + + fn releaseBrowser(self: *FetchTerminator) void { + self.mutex.lock(); + defer self.mutex.unlock(); + const b = self.browser orelse return; + b.env.cancelTerminate(); + self.browser = null; + } + + fn terminate(self: *FetchTerminator) void { + self.mutex.lock(); + defer self.mutex.unlock(); + const b = self.browser orelse return; + b.env.terminate(); + self.browser = null; + } +}; + +fn fetchThread(app: *App, ft: *FetchTerminator, url: [:0]const u8, fetch_opts: lp.FetchOpts) void { defer app.network.stop(); - lp.fetch(app, url, fetch_opts) catch |err| { + + const http_client = lp.HttpClient.init(app.allocator, &app.network) catch |err| { + log.fatal(.app, "http client init error", .{ .err = err }); + return; + }; + defer http_client.deinit(); + + var browser = lp.Browser.init(app, .{ .http_client = http_client }) catch |err| { + log.fatal(.app, "browser init error", .{ .err = err }); + return; + }; + defer browser.deinit(); + + ft.storeBrowser(&browser); + // if this exits normally, we want to disarm the FetchTerminator so that + // any subsequent sighandlers don't try to shutdown an already (or in-the- + // process-of) shutting down browser/env + defer ft.releaseBrowser(); + + lp.fetch(app, &browser, url, fetch_opts) catch |err| { log.fatal(.app, "fetch error", .{ .err = err, .url = url }); }; }