diff --git a/src/Config.zig b/src/Config.zig index d153c48b..797e9a41 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -532,6 +532,7 @@ pub const DumpFormat = enum { pub const WaitUntil = enum { load, domcontentloaded, + networkalmostidle, networkidle, done, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 874ed98b..cd978f2f 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -111,10 +111,15 @@ network: *Network, arena_pool: *ArenaPool, -// The current proxy. CDP can change it, changeProxy(null) restores -// from config. +// The current proxy. Callers can change it, changeProxy(null) restores +// from config. May point either at `http_proxy_owned` (a caller-supplied +// dupe) or at the config string (which we must not free). http_proxy: ?[:0]const u8 = null, +// When a caller (e.g. CDP) supplies a proxy, we have to dupe it to take ownership +// which we'll be responsible for freeing. +http_proxy_owned: ?[:0]const u8 = null, + // track if the client use a proxy for connections. // We can't use http_proxy because we want also to track proxy configured via // CDP. @@ -260,6 +265,9 @@ pub fn deinit(self: *Client) void { self.handles.deinit(); self.clearUserAgentOverride(); + if (self.http_proxy_owned) |owned| { + self.allocator.free(owned); + } self.robots_layer.deinit(self.allocator); self.deferring_layer.deinit(); @@ -337,7 +345,22 @@ pub fn setTlsVerify(self: *Client, verify: bool) !void { // can be changed at any point in the easy's lifecycle. pub fn changeProxy(self: *Client, proxy: ?[:0]const u8) !void { try self.ensureNoActiveConnection(); - self.http_proxy = proxy orelse self.network.config.httpProxy(); + + // Free any previously-duped proxy before we overwrite http_proxy. + if (self.http_proxy_owned) |owned| { + self.allocator.free(owned); + self.http_proxy_owned = null; + } + + // Reset to the config default; if dupeZ below fails, http_proxy is + // left pointing at this rather than at the freed dup. + self.http_proxy = self.network.config.httpProxy(); + + if (proxy) |p| { + const owned = try self.allocator.dupeZ(u8, p); + self.http_proxy_owned = owned; + self.http_proxy = owned; + } self.use_proxy = self.http_proxy != null; } @@ -1564,10 +1587,28 @@ pub const Transfer = struct { // post-perform would need to be improved), pub fn unpark(self: *Transfer) void { lp.assert(self.state == .parked, "Transfer.unpark", .{ .state = self.state }); + self.leaveIntercept(); self.state = .created; } + // Decrement the interception counter iff this transfer is currently + // parked for CDP interception. + fn leaveIntercept(self: *Transfer) void { + if (self.state != .parked) { + return; + } + switch (self.state.parked) { + .robots => {}, + .intercept_request, .intercept_auth => { + const intercept_layer = &self.client.interception_layer; + lp.assert(intercept_layer.intercepted > 0, "Transfer.leaveIntercept", .{ .value = intercept_layer.intercepted }); + intercept_layer.intercepted -= 1; + }, + } + } + pub fn deinit(self: *Transfer) void { + self.leaveIntercept(); if (self._conn) |c| { self.client.removeConn(c); self._conn = null; @@ -1790,6 +1831,7 @@ pub const Transfer = struct { return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); } + // `url` must have transfer-arena lifetime: it's stored as-is, not duped. pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { self.req.url = url; } @@ -1829,7 +1871,9 @@ pub const Transfer = struct { } const base_url = try conn.getEffectiveUrl(); - const resolved = try URL.resolve(arena, std.mem.span(base_url), location.value, .{}); + // base_url and location.value are owned by curl. The returned value + // will be stored in transfer.req.url, hence the always_dupe. + const resolved = try URL.resolve(arena, std.mem.span(base_url), location.value, .{ .always_dupe = true }); // RFC 7231 §7.1.2: if the Location value has no fragment, the redirect // inherits the fragment from the URI used to generate the request. @@ -1904,9 +1948,9 @@ pub const Transfer = struct { log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.interception_layer.intercepted }); } - self.client.interception_layer.intercepted -= 1; + // The transfer is still .parked(.intercept_auth) + // abort -> deinit -> leaveIntercept decrements the counter. self.abort(error.AbortAuthChallenge); - return; } // headerDoneCallback is called once the headers have been read. @@ -2067,13 +2111,16 @@ pub const Transfer = struct { pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { if (comptime IS_DEBUG) { - lp.assert(self.interception_layer.intercepted > 0, "HttpClient.continueTransfer", .{ .value = self.interception_layer.intercepted }); log.debug(.http, "continue transfer", .{ .intercepted = self.interception_layer.intercepted }); } - self.interception_layer.intercepted -= 1; transfer.unpark(); - return self.process(transfer); + self.process(transfer) catch |err| { + if (transfer.state == .created) { + transfer.abort(err); + } + return err; + }; } const Noop = struct { diff --git a/src/browser/Page.zig b/src/browser/Page.zig index f9d63947..a6fa6749 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -244,7 +244,10 @@ pub fn scheduleNavigation(self: *Page, frame: *Frame) !void { } pub fn findFrameByFrameId(self: *Page, frame_id: u32) ?*Frame { - return findFrameBy(&self.frame, "_frame_id", frame_id); + if (findFrameBy(&self.frame, "_frame_id", frame_id)) |found| { + return found; + } + return self.findPopupBy("_frame_id", frame_id); } // Returns the popup Frame registered under `name`, or null. @@ -258,11 +261,16 @@ pub fn findPopupByName(self: *Page, name: []const u8) ?*Frame { } pub fn findFrameByLoaderId(self: *Page, loader_id: u32) ?*Frame { - return findFrameBy(&self.frame, "_loader_id", loader_id); + if (findFrameBy(&self.frame, "_loader_id", loader_id)) |found| { + return found; + } + return self.findPopupBy("_loader_id", loader_id); } fn findFrameBy(frame: *Frame, comptime field: []const u8, id: u32) ?*Frame { - if (@field(frame, field) == id) return frame; + if (@field(frame, field) == id) { + return frame; + } for (frame.child_frames.items) |f| { if (findFrameBy(f, field, id)) |found| { return found; @@ -270,3 +278,12 @@ fn findFrameBy(frame: *Frame, comptime field: []const u8, id: u32) ?*Frame { } return null; } + +fn findPopupBy(self: *Page, comptime field: []const u8, id: u32) ?*Frame { + for (self.popups.items) |frame| { + if (findFrameBy(frame, field, id)) |found| { + return found; + } + } + return null; +} diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 03f2528a..2cea66d8 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -104,6 +104,7 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !WaitResult { const tick_result = self._tick(is_cdp, tick_opts) catch |err| { switch (err) { error.JsError => {}, // already logged (with hopefully more context) + error.ClientDisconnected => {}, // CDP layer already logged this else => log.err(.browser, "session wait", .{ .err = err, .url = self.frame.url, @@ -215,6 +216,9 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !TickResult { .networkidle => if (frame._notified_network_idle == .done) { return .done; }, + .networkalmostidle => if (frame._notified_network_almost_idle == .done) { + return .done; + }, } if (http_active == 0 and http_next_tick == 0 and http_client.ws_active == 0 and http_client.queue.first == null and http_client.ready_queue.first == null and (comptime is_cdp) == false) { diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 69858758..b1fcffe8 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -631,7 +631,7 @@ test "Env: Worker context " { const frame = try session.createPage(); defer session.removePage(); - const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", frame); + const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", null, frame); var ls: js.Local.Scope = undefined; worker._worker_scope.js.localScope(&ls); diff --git a/src/browser/tests/element/html/embed.html b/src/browser/tests/element/html/embed.html new file mode 100644 index 00000000..b4e689db --- /dev/null +++ b/src/browser/tests/element/html/embed.html @@ -0,0 +1,56 @@ + + + + + + + + + + + diff --git a/src/browser/tests/worker/module-worker.js b/src/browser/tests/worker/module-worker.js new file mode 100644 index 00000000..1b1aa7b6 --- /dev/null +++ b/src/browser/tests/worker/module-worker.js @@ -0,0 +1,25 @@ +// A module worker (`new Worker(url, { type: "module" })`). Unlike a classic +// worker, the entry script may use top-level static `import`/`export`, and +// `importScripts()` is not supported (it throws a TypeError). +import { baseValue } from './modules/base.js'; +import { importedValue, localValue } from './modules/importer.js'; + +export const exported = 'top-level-export-ok'; + +let importScriptsError = null; +try { + importScripts('./import-script1.js'); +} catch (e) { + importScriptsError = e.constructor.name; +} + +onmessage = function (event) { + postMessage({ + echo: event.data, + baseValue: baseValue, + importedValue: importedValue, + localValue: localValue, + importScriptsError: importScriptsError, + from: 'module-worker', + }); +}; diff --git a/src/browser/tests/worker/worker.html b/src/browser/tests/worker/worker.html index cabe5d14..881c6b6c 100644 --- a/src/browser/tests/worker/worker.html +++ b/src/browser/tests/worker/worker.html @@ -380,6 +380,29 @@ } + +