diff --git a/src/Config.zig b/src/Config.zig index 0bec5b7a..393786fe 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -221,7 +221,7 @@ pub const WaitUntil = enum { load, domcontentloaded, networkidle, - fixed, + done, }; pub const Fetch = struct { @@ -400,8 +400,8 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ Defaults to 5000. \\ \\--wait_until Wait until the specified event. - \\ Supported events: load, domcontentloaded, networkidle, fixed. - \\ Defaults to 'load'. + \\ Supported events: load, domcontentloaded, networkidle, done. + \\ Defaults to 'done'. \\ ++ common_options ++ \\ diff --git a/src/Server.zig b/src/Server.zig index d172f6dd..ae2f7c3d 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -302,15 +302,8 @@ pub const Client = struct { var ms_remaining = self.ws.timeout_ms; while (true) { - switch (cdp.pageWait(ms_remaining)) { - .cdp_socket => { - if (self.readSocket() == false) { - return; - } - last_message = milliTimestamp(.monotonic); - ms_remaining = self.ws.timeout_ms; - }, - .no_page => { + const result = cdp.pageWait(ms_remaining) catch |wait_err| switch (wait_err) { + error.NoPage => { const status = http.tick(ms_remaining) catch |err| { log.err(.app, "http tick", .{ .err = err }); return; @@ -324,6 +317,18 @@ pub const Client = struct { } last_message = milliTimestamp(.monotonic); ms_remaining = self.ws.timeout_ms; + continue; + }, + else => return wait_err, + }; + + switch (result) { + .cdp_socket => { + if (self.readSocket() == false) { + return; + } + last_message = milliTimestamp(.monotonic); + ms_remaining = self.ws.timeout_ms; }, .done => { const now = milliTimestamp(.monotonic); diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig new file mode 100644 index 00000000..7491a4f6 --- /dev/null +++ b/src/browser/Runner.zig @@ -0,0 +1,241 @@ +// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const builtin = @import("builtin"); + +const log = @import("../log.zig"); +const App = @import("../App.zig"); + +const Page = @import("Page.zig"); +const Session = @import("Session.zig"); +const Browser = @import("Browser.zig"); +const Factory = @import("Factory.zig"); +const HttpClient = @import("HttpClient.zig"); + +const IS_DEBUG = builtin.mode == .Debug; + +const Runner = @This(); + +page: *Page, +session: *Session, +http_client: *HttpClient, + +pub const Opts = struct {}; + +pub fn init(session: *Session, _: Opts) !Runner { + const page = &(session.page orelse return error.NoPage); + + return .{ + .page = page, + .session = session, + .http_client = session.browser.http_client, + }; +} + +pub const WaitOpts = struct { + ms: u32, + until: lp.Config.WaitUntil = .done, +}; +pub fn wait(self: *Runner, opts: WaitOpts) !void { + _ = try self._wait(false, opts); +} + +pub const CDPWaitResult = enum { + done, + cdp_socket, +}; +pub fn waitCDP(self: *Runner, opts: WaitOpts) !CDPWaitResult { + return self._wait(true, opts); +} + +fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !CDPWaitResult { + var timer = try std.time.Timer.start(); + var ms_remaining = opts.ms; + + const tick_opts = TickOpts{ + .ms = 200, + .until = opts.until, + }; + while (true) { + const tick_result = self._tick(is_cdp, tick_opts) catch |err| { + switch (err) { + error.JsError => {}, // already logged (with hopefully more context) + else => log.err(.browser, "session wait", .{ + .err = err, + .url = self.page.url, + }), + } + return err; + }; + + const next_ms = switch (tick_result) { + .ok => |next_ms| next_ms, + .done => return .done, + .cdp_socket => if (comptime is_cdp) return .cdp_socket else unreachable, + }; + + const ms_elapsed = timer.lap() / 1_000_000; + if (ms_elapsed >= ms_remaining) { + return .done; + } + ms_remaining -= @intCast(ms_elapsed); + if (next_ms > 0) { + std.Thread.sleep(std.time.ns_per_ms * next_ms); + } + } +} + +pub const TickOpts = struct { + ms: u32, + until: lp.Config.WaitUntil = .done, +}; + +pub const TickResult = union(enum) { + done, + ok: u32, +}; +pub fn tick(self: *Runner, opts: TickOpts) !TickResult { + return switch (try self._tick(false, opts)) { + .ok => |ms| .{ .ok = ms }, + .done => .done, + .cdp_socket => unreachable, + }; +} + +pub const CDPTickResult = union(enum) { + done, + cdp_socket, + ok: u32, +}; +pub fn tickCDP(self: *Runner, opts: TickOpts) !CDPTickResult { + return self._tick(true, opts); +} + +fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { + const page = self.page; + const http_client = self.http_client; + + switch (page._parse_state) { + .pre, .raw, .text, .image => { + // The main page hasn't started/finished navigating. + // There's no JS to run, and no reason to run the scheduler. + if (http_client.active == 0 and (comptime is_cdp) == false) { + // haven't started navigating, I guess. + return .done; + } + + // Either we have active http connections, or we're in CDP + // mode with an extra socket. Either way, we're waiting + // for http traffic + const http_result = try http_client.tick(@intCast(opts.ms)); + if ((comptime is_cdp) and http_result == .cdp_socket) { + return .cdp_socket; + } + return .{ .ok = 0 }; + }, + .html, .complete => { + const session = self.session; + if (session.queued_navigation.items.len != 0) { + try session.processQueuedNavigation(); + self.page = &session.page.?; // might have changed + return .{ .ok = 0 }; + } + const browser = session.browser; + + // The HTML page was parsed. We now either have JS scripts to + // download, or scheduled tasks to execute, or both. + + // scheduler.run could trigger new http transfers, so do not + // store http_client.active BEFORE this call and then use + // it AFTER. + try browser.runMacrotasks(); + + // Each call to this runs scheduled load events. + try page.dispatchLoad(); + + const http_active = http_client.active; + const total_network_activity = http_active + http_client.intercepted; + if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { + page.notifyNetworkAlmostIdle(); + } + if (page._notified_network_idle.check(total_network_activity == 0)) { + page.notifyNetworkIdle(); + } + + if (http_active == 0 and (comptime is_cdp == false)) { + // we don't need to consider http_client.intercepted here + // because is_cdp is true, and that can only be + // the case when interception isn't possible. + if (comptime IS_DEBUG) { + std.debug.assert(http_client.intercepted == 0); + } + + if (browser.hasBackgroundTasks()) { + // _we_ have nothing to run, but v8 is working on + // background tasks. We'll wait for them. + browser.waitForBackgroundTasks(); + } + + switch (opts.until) { + .done => {}, + .domcontentloaded => if (page._load_state == .load or page._load_state == .complete) { + return .done; + }, + .load => if (page._load_state == .complete) { + return .done; + }, + .networkidle => if (page._notified_network_idle == .done) { + return .done; + }, + } + + // We never advertise a wait time of more than 20, there can + // always be new background tasks to run. + if (browser.msToNextMacrotask()) |ms_to_next_task| { + return .{ .ok = @min(ms_to_next_task, 20) }; + } + return .done; + } + + // We're here because we either have active HTTP + // connections, or is_cdp == false (aka, there's + // an cdp_socket registered with the http client). + // We should continue to run tasks, so we minimize how long + // we'll poll for network I/O. + var ms_to_wait = @min(opts.ms, browser.msToNextMacrotask() orelse 200); + if (ms_to_wait > 10 and browser.hasBackgroundTasks()) { + // if we have background tasks, we don't want to wait too + // long for a message from the client. We want to go back + // to the top of the loop and run macrotasks. + ms_to_wait = 10; + } + const http_result = try http_client.tick(@intCast(@min(opts.ms, ms_to_wait))); + if ((comptime is_cdp) and http_result == .cdp_socket) { + return .cdp_socket; + } + return .{ .ok = 0 }; + }, + .err => |err| { + page._parse_state = .{ .raw_done = @errorName(err) }; + return err; + }, + .raw_done => return .done, + } +} diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 35f39115..63a7a322 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -30,6 +30,7 @@ const Navigation = @import("webapi/navigation/Navigation.zig"); const History = @import("webapi/History.zig"); const Page = @import("Page.zig"); +pub const Runner = @import("Runner.zig"); const Browser = @import("Browser.zig"); const Factory = @import("Factory.zig"); const Notification = @import("../Notification.zig"); @@ -258,12 +259,6 @@ pub fn currentPage(self: *Session) ?*Page { return &(self.page orelse return null); } -pub const WaitResult = enum { - done, - no_page, - cdp_socket, -}; - pub fn findPageByFrameId(self: *Session, frame_id: u32) ?*Page { const page = self.currentPage() orelse return null; return findPageBy(page, "_frame_id", frame_id); @@ -284,204 +279,8 @@ fn findPageBy(page: *Page, comptime field: []const u8, id: u32) ?*Page { return null; } -const WaitOpts = struct { - timeout_ms: u32 = 5000, - until: lp.Config.WaitUntil = .load, -}; - -pub fn wait(self: *Session, opts: WaitOpts) WaitResult { - var page = &(self.page orelse return .no_page); - while (true) { - const wait_result = self._wait(page, opts) catch |err| { - switch (err) { - error.JsError => {}, // already logged (with hopefully more context) - else => log.err(.browser, "session wait", .{ - .err = err, - .url = page.url, - }), - } - return .done; - }; - - switch (wait_result) { - .done => { - if (self.queued_navigation.items.len == 0) { - return .done; - } - self.processQueuedNavigation() catch return .done; - page = &self.page.?; // might have changed - }, - else => |result| return result, - } - } -} - -fn _wait(self: *Session, page: *Page, opts: WaitOpts) !WaitResult { - const wait_until = opts.until; - - var timer = try std.time.Timer.start(); - var ms_remaining = opts.timeout_ms; - - const browser = self.browser; - var http_client = browser.http_client; - - // I'd like the page to know NOTHING about cdp_socket / CDP, but the - // fact is that the behavior of wait changes depending on whether or - // not we're using CDP. - // If we aren't using CDP, as soon as we think there's nothing left - // to do, we can exit - we'de done. - // But if we are using CDP, we should wait for the whole `wait_ms` - // because the http_click.tick() also monitors the CDP socket. And while - // we could let CDP poll http (like it does for HTTP requests), the fact - // is that we know more about the timing of stuff (e.g. how long to - // poll/sleep) in the page. - const exit_when_done = http_client.cdp_client == null; - - while (true) { - switch (page._parse_state) { - .pre, .raw, .text, .image => { - // The main page hasn't started/finished navigating. - // There's no JS to run, and no reason to run the scheduler. - if (http_client.active == 0 and exit_when_done) { - // haven't started navigating, I guess. - if (wait_until != .fixed) { - return .done; - } - } - // Either we have active http connections, or we're in CDP - // mode with an extra socket. Either way, we're waiting - // for http traffic - if (try http_client.tick(@intCast(ms_remaining)) == .cdp_socket) { - // exit_when_done is explicitly set when there isn't - // an extra socket, so it should not be possibl to - // get an cdp_socket message when exit_when_done - // is true. - if (IS_DEBUG) { - std.debug.assert(exit_when_done == false); - } - - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - }, - .html, .complete => { - if (self.queued_navigation.items.len != 0) { - return .done; - } - - // The HTML page was parsed. We now either have JS scripts to - // download, or scheduled tasks to execute, or both. - - // scheduler.run could trigger new http transfers, so do not - // store http_client.active BEFORE this call and then use - // it AFTER. - try browser.runMacrotasks(); - - // Each call to this runs scheduled load events. - try page.dispatchLoad(); - - const http_active = http_client.active; - const total_network_activity = http_active + http_client.intercepted; - if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { - page.notifyNetworkAlmostIdle(); - } - if (page._notified_network_idle.check(total_network_activity == 0)) { - page.notifyNetworkIdle(); - } - - if (http_active == 0 and exit_when_done) { - // we don't need to consider http_client.intercepted here - // because exit_when_done is true, and that can only be - // the case when interception isn't possible. - if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); - } - - const is_event_done = switch (wait_until) { - .fixed => false, - .domcontentloaded => (page._load_state == .load or page._load_state == .complete), - .load => (page._load_state == .complete), - .networkidle => (page._notified_network_idle == .done), - }; - - var ms = blk: { - if (browser.hasBackgroundTasks()) { - // _we_ have nothing to run, but v8 is working on - // background tasks. We'll wait for them. - browser.waitForBackgroundTasks(); - break :blk 20; - } - - const next_task = browser.msToNextMacrotask(); - if (next_task == null and is_event_done) { - return .done; - } - break :blk next_task orelse 20; - }; - - if (ms > ms_remaining) { - if (is_event_done) { - return .done; - } - // Same as above, except we have a scheduled task, - // it just happens to be too far into the future - // compared to how long we were told to wait. - if (browser.hasBackgroundTasks()) { - // _we_ have nothing to run, but v8 is working on - // background tasks. We'll wait for them. - browser.waitForBackgroundTasks(); - } - // We're still wait for our wait_until. Not sure for what - // but let's keep waiting. Worst case, we'll timeout. - ms = 20; - } - - // We have a task to run in the not-so-distant future. - // You might think we can just sleep until that task is - // ready, but we should continue to run lowPriority tasks - // in the meantime, and that could unblock things. So - // we'll just sleep for a bit, and then restart our wait - // loop to see if anything new can be processed. - std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20)))); - } else { - // We're here because we either have active HTTP - // connections, or exit_when_done == false (aka, there's - // an cdp_socket registered with the http client). - // We should continue to run tasks, so we minimize how long - // we'll poll for network I/O. - var ms_to_wait = @min(200, browser.msToNextMacrotask() orelse 200); - if (ms_to_wait > 10 and browser.hasBackgroundTasks()) { - // if we have background tasks, we don't want to wait too - // long for a message from the client. We want to go back - // to the top of the loop and run macrotasks. - ms_to_wait = 10; - } - if (try http_client.tick(@min(ms_remaining, ms_to_wait)) == .cdp_socket) { - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - } - }, - .err => |err| { - page._parse_state = .{ .raw_done = @errorName(err) }; - return err; - }, - .raw_done => { - if (exit_when_done) { - return .done; - } - // we _could_ http_client.tick(ms_to_wait), but this has - // the same result, and I feel is more correct. - return .no_page; - }, - } - - const ms_elapsed = timer.lap() / 1_000_000; - if (ms_elapsed >= ms_remaining) { - return .done; - } - ms_remaining -= @intCast(ms_elapsed); - } +pub fn runner(self: *Session, opts: Runner.Opts) !Runner { + return Runner.init(self, opts); } pub fn scheduleNavigation(self: *Session, page: *Page) !void { @@ -498,7 +297,7 @@ pub fn scheduleNavigation(self: *Session, page: *Page) !void { return list.append(self.arena, page); } -fn processQueuedNavigation(self: *Session) !void { +pub fn processQueuedNavigation(self: *Session) !void { const navigations = &self.queued_navigation; if (self.page.?._queued_navigation != null) { diff --git a/src/browser/actions.zig b/src/browser/actions.zig index f62cfdbc..be9fe2df 100644 --- a/src/browser/actions.zig +++ b/src/browser/actions.zig @@ -23,6 +23,7 @@ const Element = @import("webapi/Element.zig"); const Event = @import("webapi/Event.zig"); const MouseEvent = @import("webapi/event/MouseEvent.zig"); const Page = @import("Page.zig"); +const Session = @import("Session.zig"); const Selector = @import("webapi/selector/Selector.zig"); pub fn click(node: *DOMNode, page: *Page) !void { @@ -104,10 +105,13 @@ pub fn scroll(node: ?*DOMNode, x: ?i32, y: ?i32, page: *Page) !void { } } -pub fn waitForSelector(selector: [:0]const u8, timeout_ms: u32, page: *Page) !*DOMNode { +pub fn waitForSelector(selector: [:0]const u8, timeout_ms: u32, session: *Session) !*DOMNode { var timer = try std.time.Timer.start(); + var runner = try session.runner(.{}); + try runner.wait(.{.ms = timeout_ms, .until = .load}); while (true) { + const page = runner.page; const element = Selector.querySelector(page.document.asNode(), selector, page) catch { return error.InvalidSelector; }; @@ -120,7 +124,14 @@ pub fn waitForSelector(selector: [:0]const u8, timeout_ms: u32, page: *Page) !*D if (elapsed >= timeout_ms) { return error.Timeout; } - - _ = page._session.wait(.{ .timeout_ms = @min(100, timeout_ms - elapsed) }); + switch (try runner.tick(.{.ms = timeout_ms - elapsed})) { + .done => return error.Timeout, + .ok => |recommended_sleep_ms| { + if (recommended_sleep_ms > 0) { + // guanrateed to be <= 20ms + std.Thread.sleep(std.time.ns_per_ms * recommended_sleep_ms); + } + } + } } } diff --git a/src/browser/tests/animation/animation.html b/src/browser/tests/animation/animation.html index 97bfe077..886ac0b8 100644 --- a/src/browser/tests/animation/animation.html +++ b/src/browser/tests/animation/animation.html @@ -18,7 +18,7 @@ testing.eventually(() => testing.expectEqual(['idle', 'running', 'finished', true], cb)); - +