diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index f78b307c..cd3999e8 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -13,7 +13,7 @@ inputs: zig-v8: description: 'zig v8 version to install' required: false - default: 'v0.3.9' + default: 'v0.4.0' v8: description: 'v8 version to install' required: false diff --git a/Dockerfile b/Dockerfile index 715441a0..fd872c45 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM debian:stable-slim ARG MINISIG=0.12 ARG ZIG_MINISIG=RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U ARG V8=14.0.365.4 -ARG ZIG_V8=v0.3.9 +ARG ZIG_V8=v0.4.0 ARG TARGETPLATFORM RUN apt-get update -yq && \ diff --git a/README.md b/README.md index 8f117bbd..a5acdd13 100644 --- a/README.md +++ b/README.md @@ -62,8 +62,12 @@ chmod a+x ./lightpanda *For Windows + WSL2* -The Lightpanda browser is compatible to run on windows inside WSL. Follow the Linux instruction for installation from a WSL terminal. -It is recommended to install clients like Puppeteer on the Windows host. +Lightpanda has no native Windows binary. Install it inside WSL following the Linux steps above. + +WSL not installed? Run `wsl --install` from an administrator shell, restart, then open `wsl`. +See [Microsoft's WSL install guide](https://learn.microsoft.com/en-us/windows/wsl/install) for details. + +Your automation client (Puppeteer, Playwright, etc.) can run either inside WSL or on the Windows host. WSL forwards `localhost:9222` automatically. **Install from Docker** diff --git a/build.zig.zon b/build.zig.zon index 154a76ab..f1185a2a 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -5,8 +5,8 @@ .minimum_zig_version = "0.15.2", .dependencies = .{ .v8 = .{ - .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.3.9.tar.gz", - .hash = "v8-0.0.0-xddH64iHBACfPm7oAqZerjmLLO6ftP4Yg5V7dtEGcD0i", + .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.0.tar.gz", + .hash = "v8-0.0.0-xddH61yIBAD04dV4CHW0qIFiqbOGvkN_-amGdmgbQ3dU", }, // .v8 = .{ .path = "../zig-v8-fork" }, .brotli = .{ diff --git a/src/Config.zig b/src/Config.zig index eb44a9ba..2e349aeb 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -114,7 +114,7 @@ pub fn httpMaxResponseSize(self: *const Config) ?usize { } pub fn wsMaxConcurrent(self: *const Config) u8 { - return self.commonOpts().ws_max_concurrent orelse 8; + return self.commonOpts().ws_max_concurrent orelse 64; } pub fn logLevel(self: *const Config) ?log.Level { @@ -1200,20 +1200,14 @@ fn parseCommonArg( return error.InvalidArgument; }; - for (str) |c| { - if (!std.ascii.isPrint(c)) { - log.fatal(.app, "not printable character", .{ .arg = opt }); - return error.InvalidArgument; - } - } - - if (std.ascii.indexOfIgnoreCase(str, "mozilla") != null) { + validateUserAgent(str) catch |err| { log.fatal(.app, "invalid value", .{ - .detail = "user-agent can't contain Mozilla", + .detail = "invalid user agent", .arg = opt, + .err = err, }); return error.InvalidArgument; - } + }; common.user_agent = try allocator.dupe(u8, str); return true; @@ -1295,3 +1289,61 @@ fn parseCommonArg( return false; } + +pub fn validateUserAgent(ua: []const u8) !void { + for (ua) |c| { + if (!std.ascii.isPrint(c)) { + return error.NonPrintable; + } + } + + if (std.ascii.indexOfIgnoreCase(ua, "mozilla") != null) { + return error.Reserved; + } +} + +const testing = @import("testing.zig"); +test "Config: HttpHeaders - default user agent" { + var config = try Config.init(testing.allocator, "", .{ .serve = .{} }); + defer config.deinit(testing.allocator); + + try testing.expectEqual("Lightpanda/1.0", config.http_headers.user_agent); + try testing.expectEqual("User-Agent: Lightpanda/1.0", config.http_headers.user_agent_header); + try testing.expect(config.http_headers.proxy_bearer_header == null); +} + +test "Config: HttpHeaders - custom user agent override" { + var config = try Config.init(testing.allocator, "", .{ .serve = .{ .common = .{ .user_agent = "MyBot/2.0" } } }); + defer config.deinit(testing.allocator); + + try testing.expectEqual("MyBot/2.0", config.http_headers.user_agent); + try testing.expectEqual("User-Agent: MyBot/2.0", config.http_headers.user_agent_header); +} + +test "Config: HttpHeaders - user agent suffix" { + var config = try Config.init(testing.allocator, "", .{ .serve = .{ .common = .{ .user_agent_suffix = "CustomSuffix/3.0" } } }); + defer config.deinit(testing.allocator); + + try testing.expectEqual("Lightpanda/1.0 CustomSuffix/3.0", config.http_headers.user_agent); + try testing.expectEqual("User-Agent: Lightpanda/1.0 CustomSuffix/3.0", config.http_headers.user_agent_header); +} + +test "Config: HttpHeaders - fetch mode default user agent" { + var config = try Config.init(testing.allocator, "", .{ .fetch = .{ .url = "https://example.com" } }); + defer config.deinit(testing.allocator); + try testing.expectEqual("Lightpanda/1.0", config.http_headers.user_agent); +} + +test "Config: HttpHeaders - fetch mode custom user agent" { + var config = try Config.init(testing.allocator, "", .{ .fetch = .{ .url = "https://example.com", .common = .{ .user_agent = "FetchBot/1.0" } } }); + defer config.deinit(testing.allocator); + try testing.expectEqual("FetchBot/1.0", config.http_headers.user_agent); + try testing.expectEqual("User-Agent: FetchBot/1.0", config.http_headers.user_agent_header); +} + +test "Config: HttpHeaders - proxy bearer header" { + var config = try Config.init(testing.allocator, "", .{ .serve = .{ .common = .{ .proxy_bearer_token = "secret-token" } } }); + defer config.deinit(testing.allocator); + try testing.expectEqual("Lightpanda/1.0", config.http_headers.user_agent); + try testing.expectEqual("Proxy-Authorization: Bearer secret-token", config.http_headers.proxy_bearer_header.?); +} diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 36c2352b..f60605c9 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -115,6 +115,12 @@ tls_verify: bool = true, obey_robots: bool, +// User agent override set via CDP Emulation.setUserAgentOverride. +// When set, takes precedence over the config's http_headers values. +// Both fields are allocated from self.allocator when set, null otherwise. +user_agent_override: ?[:0]const u8 = null, +user_agent_header_override: ?[:0]const u8 = null, + cdp_client: ?CDPClient = null, max_response_size: usize, @@ -177,9 +183,33 @@ pub fn deinit(self: *Client) void { } self.pending_robots_queue.deinit(self.allocator); + self.clearUserAgentOverride(); self.allocator.destroy(self); } +// Set a user agent override. Both the raw UA string and the pre-formatted +// "User-Agent: " header string are allocated from self.allocator. +pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void { + self.clearUserAgentOverride(); + const override = try self.allocator.dupeZ(u8, ua); + errdefer self.allocator.free(override); + const header = try std.fmt.allocPrintSentinel(self.allocator, "User-Agent: {s}", .{ua}, 0); + self.user_agent_override = override; + self.user_agent_header_override = header; +} + +// Clear any user agent override, restoring the default from config. +pub fn clearUserAgentOverride(self: *Client) void { + if (self.user_agent_override) |ua| { + self.allocator.free(ua); + self.user_agent_override = null; + } + if (self.user_agent_header_override) |uah| { + self.allocator.free(uah); + self.user_agent_header_override = null; + } +} + // Enable TLS verification on all connections. pub fn setTlsVerify(self: *Client, verify: bool) !void { // Remove inflight connections check on enable TLS b/c chromiumoxide calls @@ -209,7 +239,12 @@ pub fn changeProxy(self: *Client, proxy: ?[:0]const u8) !void { } pub fn newHeaders(self: *const Client) !http.Headers { - return http.Headers.init(self.network.config.http_headers.user_agent_header); + const ua_header = self.user_agent_header_override orelse self.network.config.http_headers.user_agent_header; + return http.Headers.init(ua_header); +} + +pub fn getUserAgent(self: *const Client) [:0]const u8 { + return self.user_agent_override orelse self.network.config.http_headers.user_agent; } pub fn abort(self: *Client) void { @@ -512,7 +547,7 @@ fn robotsDoneCallback(ctx_ptr: *anyopaque) !void { 200 => { if (ctx.buffer.items.len > 0) { const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes( - ctx.client.network.config.http_headers.user_agent, + ctx.client.getUserAgent(), ctx.buffer.items, ) catch blk: { log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url }); diff --git a/src/browser/Page.zig b/src/browser/Page.zig index 0455bb54..1df05819 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -142,7 +142,11 @@ _blob_urls: std.StringHashMapUnmanaged(*Blob) = .{}, /// `load` events that'll be fired before window's `load` event. /// A call to `documentIsComplete` (which calls `_documentIsComplete`) resets it. -_to_load: std.ArrayList(*Element.Html) = .{}, +/// Double-buffered so that dispatching load events (which may trigger JS that +/// creates new elements) doesn't invalidate the list while iterating. +_to_load_1: std.ArrayList(*Element.Html) = .{}, +_to_load_2: std.ArrayList(*Element.Html) = .{}, +_to_load: *std.ArrayList(*Element.Html) = undefined, _style_manager: StyleManager, _script_manager: ScriptManager, @@ -281,6 +285,7 @@ pub fn init(self: *Page, frame_id: u32, session: *Session, parent: ?*Page) !void ._script_manager = undefined, ._event_manager = EventManager.init(session.page_arena, self), }; + self._to_load = &self._to_load_1; var screen: *Screen = undefined; var visual_viewport: *VisualViewport = undefined; @@ -321,7 +326,7 @@ pub fn init(self: *Page, frame_id: u32, session: *Session, parent: ?*Page) !void .identity_arena = session.page_arena, .call_arena = self.call_arena, }); - errdefer self.js.deinit(); + errdefer browser.env.destroyContext(self.js); document._page = self; @@ -386,7 +391,12 @@ pub fn deinit(self: *Page, abort_http: bool) void { observer.releaseRef(session); } - self.window._document._selection.releaseRef(session); + var document = self.window._document; + document._selection.releaseRef(session); + + if (document._fonts) |f| { + f.releaseRef(session); + } } session.browser.env.destroyContext(self.js); @@ -463,7 +473,7 @@ pub fn releaseArena(self: *Page, allocator: Allocator) void { return self._session.releaseArena(allocator); } -pub fn isSameOrigin(self: *const Page, url: [:0]const u8) !bool { +pub fn isSameOrigin(self: *const Page, url: [:0]const u8) bool { const current_origin = self.origin orelse return false; // fastpath @@ -1448,14 +1458,22 @@ pub fn checkIntersections(self: *Page) !void { pub fn dispatchLoad(self: *Page) !void { const has_dom_load_listener = self._event_manager.has_dom_load_listener; - for (self._to_load.items) |html_element| { + + // Swap buffers - new additions during dispatch go to the other buffer + const to_process = self._to_load; + self._to_load = if (self._to_load == &self._to_load_1) + &self._to_load_2 + else + &self._to_load_1; + + for (to_process.items) |html_element| { if (has_dom_load_listener or html_element.hasAttributeFunction(.onload, self)) { const event = try Event.initTrusted(comptime .wrap("load"), .{}, self); try self._event_manager.dispatch(html_element.asEventTarget(), event); } } - // We drained everything. - self._to_load.clearRetainingCapacity(); + + to_process.clearRetainingCapacity(); } pub fn scheduleMutationDelivery(self: *Page) !void { @@ -3699,6 +3717,8 @@ fn asUint(comptime string: anytype) std.meta.Int( const testing = @import("../testing.zig"); test "WebApi: Page" { + const filter: testing.LogFilter = .init(&.{.http}); + defer filter.deinit(); try testing.htmlRunner("page", .{}); } diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 149518c6..776a6521 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -75,6 +75,10 @@ identity: js.Identity = .{}, // This ensures objects are only freed when ALL v8 wrappers are gone. finalizer_callbacks: std.AutoHashMapUnmanaged(usize, *FinalizerCallback) = .empty, +// Pool for FinalizerCallback.Identity structs. These must survive page resets +// so V8 weak callbacks can validate the FC before dereferencing it. +fc_identity_pool: std.heap.MemoryPool(FinalizerCallback.Identity), + // Tracked global v8 objects that need to be released on cleanup. // Lives at Session level so objects can outlive individual Identities. globals: std.ArrayList(v8.Global) = .empty, @@ -133,6 +137,7 @@ pub fn init(self: *Session, browser: *Browser, notification: *Notification) !voi .queued_queued_navigation = .{}, .notification = notification, .cookie_jar = storage.Cookie.Jar.init(allocator), + .fc_identity_pool = .init(allocator), }; self.queued_navigation = &self.queued_navigation_1; } @@ -142,6 +147,7 @@ pub fn deinit(self: *Session) void { self.removePage(); } self.cookie_jar.deinit(); + self.fc_identity_pool.deinit(); self.storage_shed.deinit(self.browser.app.allocator); self.arena_pool.release(self.page_arena); @@ -506,19 +512,33 @@ pub const FinalizerCallback = struct { finalizer_ptr_id: usize, release_ref: *const fn (ptr_id: usize, session: *Session) void, - // Track how many identities (JS worlds) reference this FC. - // Only cleanup when all identities have finalized. + // Linked list of Identities referencing this FC. + identities: ?*Identity = null, + // Count of active identities (for knowing when to clean up FC). identity_count: u8 = 0, // For every FinalizerCallback we'll have 1+ FinalizerCallback.Identity: one - // for every identity that gets the instance. In most cases, that'l be 1. + // for every identity that gets the instance. In most cases, that'll be 1. + // Allocated from Session.fc_identity_pool so it survives page resets and + // allows the weak callback to safely check the done flag. pub const Identity = struct { + session: *Session, identity: *js.Identity, - fc: *Session.FinalizerCallback, + finalizer_ptr_id: usize, + resolved_ptr_id: usize, + next: ?*Identity = null, + done: bool = false, }; - // Called during page reset to force cleanup regardless of identity_count. + // Called during page reset to force cleanup regardless of identities. fn deinit(self: *FinalizerCallback, session: *Session) void { + // Mark all identities as done so stale V8 weak callbacks + // won't find the wrong FC if resolved_ptr_id is reused. + var id = self.identities; + while (id) |identity| { + identity.done = true; + id = identity.next; + } self.release_ref(self.finalizer_ptr_id, session); session.releaseArena(self.arena); } diff --git a/src/browser/js/Caller.zig b/src/browser/js/Caller.zig index 01e88a74..05419752 100644 --- a/src/browser/js/Caller.zig +++ b/src/browser/js/Caller.zig @@ -660,6 +660,17 @@ pub const Function = struct { switch (cache) { .internal => |idx| { + // Defensive check: verify object has enough internal fields. + // This guards against edge cases where signature check passes but + // the receiver doesn't have expected internal fields (e.g., global + // proxy vs global object, cross-context scenarios). + if (v8.v8__Object__InternalFieldCount(js_this) <= idx) { + if (comptime IS_DEBUG) { + std.debug.assert(false); + } + return false; + } + if (v8.v8__Object__GetInternalField(js_this, idx)) |cached| { // means we can't cache undefined, since we can't tell the // difference between "it isn't in the cache" and "it's diff --git a/src/browser/js/Context.zig b/src/browser/js/Context.zig index 575e2f3a..abe10ee6 100644 --- a/src/browser/js/Context.zig +++ b/src/browser/js/Context.zig @@ -228,7 +228,13 @@ pub fn setOrigin(self: *Context, key: ?[]const u8) !void { const env = self.env; const isolate = env.isolate; - lp.assert(self.origin.rc == 1, "Ref opaque origin", .{ .rc = self.origin.rc }); + if (comptime IS_DEBUG) { + // A page starts off with an opaque origin. After navigation, setOrigin + // is called. This is the only time setOrigin should be called for that + // page. Therefore, when setOrigin is called, the previous origin should + // have been opaque and its rc should have been 1. + lp.assert(self.origin.rc == 1, "Ref opaque origin", .{ .rc = self.origin.rc }); + } const origin = try self.session.getOrCreateOrigin(key); diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 2158e51d..075cd1f1 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -561,7 +561,7 @@ test "Env: Worker context " { const page = try session.createPage(); defer session.removePage(); - const worker = try @import("../webapi/Worker.zig").init("about:blank", &page.js.execution); + const worker = try @import("../webapi/Worker.zig").init("http://localhost:9582/src/browser/tests/testing.js", &page.js.execution); var ls: js.Local.Scope = undefined; worker._worker_scope.js.localScope(&ls); diff --git a/src/browser/js/Inspector.zig b/src/browser/js/Inspector.zig index d956cc52..0bc81872 100644 --- a/src/browser/js/Inspector.zig +++ b/src/browser/js/Inspector.zig @@ -128,6 +128,10 @@ pub fn contextCreated( } } +pub fn getContextId(_: *const Inspector, local: *const js.Local) i32 { + return v8.v8__inspector__executionContextId(local.handle); +} + pub fn contextDestroyed(self: *Inspector, context: *const v8.Context) void { v8.v8_inspector__Inspector__ContextDestroyed(self.handle, context); diff --git a/src/browser/js/Local.zig b/src/browser/js/Local.zig index a4757221..e77c60ac 100644 --- a/src/browser/js/Local.zig +++ b/src/browser/js/Local.zig @@ -281,11 +281,15 @@ pub fn mapZigInstanceToJs(self: *const Local, js_obj_handle: ?*const v8.Object, finalizer_gop.value_ptr.* = try self.createFinalizerCallback(resolved_ptr_id, finalizer_ptr_id, finalizer.release_ref_from_zig); } const fc = finalizer_gop.value_ptr.*; - const identity_finalizer = try fc.arena.create(Session.FinalizerCallback.Identity); + const identity_finalizer = try session.fc_identity_pool.create(); identity_finalizer.* = .{ - .fc = fc, + .session = session, .identity = ctx.identity, + .finalizer_ptr_id = finalizer_ptr_id, + .resolved_ptr_id = resolved_ptr_id, + .next = fc.identities, }; + fc.identities = identity_finalizer; fc.identity_count += 1; v8.v8__Global__SetWeakFinalizer(gop.value_ptr, identity_finalizer, finalizer.release_ref, v8.kParameter); @@ -412,6 +416,9 @@ pub fn zigValueToJs(self: *const Local, value: anytype, comptime opts: CallOpts) js.Promise.Temp, js.PromiseResolver.Global, js.Module.Global => return .{ .local = self, .handle = @ptrCast(value.local(self).handle) }, + + js.Undefined => return .{.local = self, .handle = isolate.initUndefined() }, + else => {} } // zig fmt: on @@ -857,6 +864,18 @@ fn jsValueToTypedArray(comptime T: type, js_val: js.Value) !?[]T { return ptr[0..num_elements]; } }, + f32 => { + if (js_val.isFloat32Array()) { + const ptr = @as([*]f32, @ptrCast(@alignCast(base))); + return ptr[0..num_elements]; + } + }, + f64 => { + if (js_val.isFloat64Array()) { + const ptr = @as([*]f64, @ptrCast(@alignCast(base))); + return ptr[0..num_elements]; + } + }, else => {}, } return error.InvalidArgument; @@ -985,6 +1004,12 @@ fn probeJsValueToZig(self: *const Local, comptime T: type, js_val: js.Value) !Pr i64 => if (js_val.isBigInt64Array()) { return .{ .ok = {} }; }, + f32 => if (js_val.isFloat32Array()) { + return .{ .ok = {} }; + }, + f64 => if (js_val.isFloat64Array()) { + return .{ .ok = {} }; + }, else => {}, } return .{ .invalid = {} }; @@ -1200,26 +1225,31 @@ fn resolveT(comptime T: type, value: *T) Resolved { const ptr = v8.v8__WeakCallbackInfo__GetParameter(handle.?).?; const identity_finalizer: *Session.FinalizerCallback.Identity = @ptrCast(@alignCast(ptr)); - const fc = identity_finalizer.fc; - const session = fc.session; - const finalizer_ptr_id = fc.finalizer_ptr_id; + // Identity is allocated from pool, so it's valid even after page reset. + const session = identity_finalizer.session; + const resolved_ptr_id = identity_finalizer.resolved_ptr_id; + defer session.fc_identity_pool.destroy(identity_finalizer); - // Remove from this identity's map - if (identity_finalizer.identity.identity_map.fetchRemove(fc.resolved_ptr_id)) |kv| { + // Always clean up the identity map entry + if (identity_finalizer.identity.identity_map.fetchRemove(resolved_ptr_id)) |kv| { var global = kv.value; v8.v8__Global__Reset(&global); } + // If done, FC was already cleaned up during page reset. The + // finalizer_ptr_id may have been reused for a new object, so + // we must not look it up in the map. + if (identity_finalizer.done) return; + + const finalizer_ptr_id = identity_finalizer.finalizer_ptr_id; + const fc = session.finalizer_callbacks.get(finalizer_ptr_id) orelse return; + const identity_count = fc.identity_count; if (identity_count == 1) { - // All IsolatedWorlds that reference this object have - // released it. Release the instance ref, remove the - // FinalizerCallback and free it. + // Last identity - clean up the FC. + // Remove from map before releaseRef to prevent address reuse issues. + _ = session.finalizer_callbacks.remove(finalizer_ptr_id); FT.releaseRef(@ptrFromInt(finalizer_ptr_id), session); - const removed = session.finalizer_callbacks.remove(finalizer_ptr_id); - if (comptime IS_DEBUG) { - std.debug.assert(removed); - } session.releaseArena(fc.arena); } else { fc.identity_count = identity_count - 1; diff --git a/src/browser/js/Value.zig b/src/browser/js/Value.zig index f80727b0..bf1696e7 100644 --- a/src/browser/js/Value.zig +++ b/src/browser/js/Value.zig @@ -155,6 +155,14 @@ pub fn isBigInt64Array(self: Value) bool { return v8.v8__Value__IsBigInt64Array(self.handle); } +pub fn isFloat32Array(self: Value) bool { + return v8.v8__Value__IsFloat32Array(self.handle); +} + +pub fn isFloat64Array(self: Value) bool { + return v8.v8__Value__IsFloat64Array(self.handle); +} + pub fn isPromise(self: Value) bool { return v8.v8__Value__IsPromise(self.handle); } diff --git a/src/browser/js/js.zig b/src/browser/js/js.zig index 74ee0c7a..cda8f9d5 100644 --- a/src/browser/js/js.zig +++ b/src/browser/js/js.zig @@ -319,6 +319,9 @@ pub fn simpleZigValueToJs(isolate: Isolate, value: anytype, comptime fail: bool, return null; } +// marker interface +pub const Undefined = struct {}; + // These are here, and not in Inspector.zig, because Inspector.zig isn't always // included (e.g. in the wpt build). diff --git a/src/browser/tests/element/html/media.html b/src/browser/tests/element/html/media.html index e97a0d67..a0e23f43 100644 --- a/src/browser/tests/element/html/media.html +++ b/src/browser/tests/element/html/media.html @@ -42,11 +42,17 @@ const audio = document.getElementById('audio1'); testing.expectEqual(true, audio.paused); - audio.play(); + var resolved = false; + audio.play().then(() => { + resolved = true; + }); testing.expectEqual(false, audio.paused); audio.pause(); testing.expectEqual(true, audio.paused); + testing.onload(() => { + testing.expectEqual(true, resolved); + }) } diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 7c75e641..e4facdd9 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -454,6 +454,7 @@ pub fn getFonts(self: *Document, page: *Page) !*FontFaceSet { return fonts; } const fonts = try FontFaceSet.init(page); + fonts.acquireRef(); self._fonts = fonts; return fonts; } diff --git a/src/browser/webapi/History.zig b/src/browser/webapi/History.zig index 19325b7a..1ccc59f6 100644 --- a/src/browser/webapi/History.zig +++ b/src/browser/webapi/History.zig @@ -92,7 +92,7 @@ fn goInner(delta: i32, page: *Page) !void { const entry = page._session.navigation._entries.items[index]; if (entry._url) |url| { - if (try page.isSameOrigin(url)) { + if (page.isSameOrigin(url)) { const target = page.window.asEventTarget(); if (page._event_manager.hasDirectListeners(target, "popstate", page.window._on_popstate)) { const event = (try PopStateEvent.initTrusted(comptime .wrap("popstate"), .{ .state = entry._state.value }, page)).asEvent(); diff --git a/src/browser/webapi/Navigator.zig b/src/browser/webapi/Navigator.zig index 8de0d163..818c20ec 100644 --- a/src/browser/webapi/Navigator.zig +++ b/src/browser/webapi/Navigator.zig @@ -37,7 +37,7 @@ _storage: StorageManager = .{}, pub const init: Navigator = .{}; pub fn getUserAgent(_: *const Navigator, page: *Page) []const u8 { - return page._session.browser.app.config.http_headers.user_agent; + return page._session.browser.http_client.getUserAgent(); } pub fn getLanguages(_: *const Navigator) [2][]const u8 { @@ -139,7 +139,7 @@ fn validateProtocolHandlerURL(url: [:0]const u8, page: *const Page) !void { if (std.mem.indexOf(u8, url, "%s") == null) { return error.SyntaxError; } - if (try page.isSameOrigin(url) == false) { + if (page.isSameOrigin(url) == false) { return error.SyntaxError; } } diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index d4f7f54b..4ae306e8 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -244,8 +244,6 @@ pub fn terminate(self: *Worker) void { resp.abort(error.Abort); self._http_response = null; } - - self._page.removeWorker(self); } // Posts a message from the page to the worker. diff --git a/src/browser/webapi/element/html/Media.zig b/src/browser/webapi/element/html/Media.zig index 6d62013f..84a828be 100644 --- a/src/browser/webapi/element/html/Media.zig +++ b/src/browser/webapi/element/html/Media.zig @@ -137,7 +137,7 @@ fn isMaybeSupported(mime_type: []const u8) bool { return false; } -pub fn play(self: *Media, page: *Page) !void { +pub fn play(self: *Media, page: *Page) !js.Promise { const was_paused = self._paused; self._paused = false; self._ready_state = .HAVE_ENOUGH_DATA; @@ -146,6 +146,7 @@ pub fn play(self: *Media, page: *Page) !void { try self.dispatchEvent("play", page); try self.dispatchEvent("playing", page); } + return page.js.local.?.resolvePromise(js.Undefined{}); } pub fn pause(self: *Media, page: *Page) !void { diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 1d0776b3..07901487 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -86,6 +86,12 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise { log.debug(.http, "fetch", .{ .url = request._url }); } + const cookie_jar = switch (request._credentials) { + .omit => null, + .include => &page._session.cookie_jar, + .@"same-origin" => if (page.isSameOrigin(request._url)) &page._session.cookie_jar else null, + }; + try http_client.request(.{ .ctx = fetch, .url = request._url, @@ -95,7 +101,7 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise { .body = request._body, .headers = headers, .resource_type = .fetch, - .cookie_jar = &page._session.cookie_jar, + .cookie_jar = cookie_jar, .cookie_origin = page.url, .notification = page._session.notification, .start_callback = httpStartCallback, @@ -236,7 +242,6 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void { defer if (self._owns_response) { response.deinit(self._page._session); - self._owns_response = false; }; var ls: js.Local.Scope = undefined; diff --git a/src/browser/webapi/net/WebSocket.zig b/src/browser/webapi/net/WebSocket.zig index 32be22fb..e0907bca 100644 --- a/src/browser/webapi/net/WebSocket.zig +++ b/src/browser/webapi/net/WebSocket.zig @@ -38,6 +38,7 @@ const Allocator = std.mem.Allocator; const IS_DEBUG = @import("builtin").mode == .Debug; const WebSocket = @This(); + _rc: lp.RC(u8) = .{}, _page: *Page, _proto: *EventTarget, @@ -88,20 +89,6 @@ pub const BinaryType = enum { arraybuffer, }; -fn isValidProtocol(protocol: []const u8) bool { - if (protocol.len == 0) return false; - for (protocol) |c| { - // Control characters - if (c <= 31 or c == 127) return false; - // Separators per RFC 2616 - switch (c) { - '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t' => return false, - else => {}, - } - } - return true; -} - pub fn init(url: []const u8, protocols: [][]const u8, page: *Page) !*WebSocket { { if (url.len < 6) { @@ -196,6 +183,18 @@ pub fn deinit(self: *WebSocket, session: *Session) void { session.releaseArena(self._arena); } +pub fn releaseRef(self: *WebSocket, session: *Session) void { + self._rc.release(self, session); +} + +pub fn acquireRef(self: *WebSocket) void { + self._rc.acquire(); +} + +fn asEventTarget(self: *WebSocket) *EventTarget { + return self._proto; +} + // we're being aborted internally (e.g. page shutting down) pub fn kill(self: *WebSocket) void { self.cleanup(); @@ -211,13 +210,15 @@ pub fn disconnected(self: *WebSocket, err_: ?anyerror) void { log.info(.websocket, "disconnected", .{ .url = self._url, .reason = "closed" }); } - self.cleanup(); + defer self.cleanup(); // Use 1006 (abnormal closure) if connection wasn't cleanly closed const code = if (was_clean) self._close_code else 1006; const reason = if (was_clean) self._close_reason else ""; - // Spec requires error event before close on abnormal closure + // Spec requires error event before close on abnormal closure. + // Dispatch events before cleanup since cleanup releases the ref count + // which may free our event handler references. if (!was_clean) { self.dispatchErrorEvent() catch |err| { log.err(.websocket, "error event dispatch failed", .{ .err = err }); @@ -239,18 +240,6 @@ fn cleanup(self: *WebSocket) void { } } -pub fn releaseRef(self: *WebSocket, session: *Session) void { - self._rc.release(self, session); -} - -pub fn acquireRef(self: *WebSocket) void { - self._rc.acquire(); -} - -fn asEventTarget(self: *WebSocket) *EventTarget { - return self._proto; -} - fn queueMessage(self: *WebSocket, msg: Message) !void { const was_empty = self._send_queue.items.len == 0; try self._send_queue.append(self._arena, msg); @@ -263,6 +252,20 @@ fn queueMessage(self: *WebSocket, msg: Message) !void { } } +fn isValidProtocol(protocol: []const u8) bool { + if (protocol.len == 0) return false; + for (protocol) |c| { + // Control characters and non-ASCII + if (c <= 31 or c >= 127) return false; + // Separators per RFC 2616 + switch (c) { + '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t' => return false, + else => {}, + } + } + return true; +} + /// WebSocket send() accepts string, Blob, ArrayBuffer, or TypedArray const SendData = union(enum) { blob: *Blob, @@ -279,17 +282,16 @@ const BinaryData = union(enum) { uint32: []u32, int64: []i64, uint64: []u64, + float32: []f32, + float64: []f64, fn asBuffer(self: BinaryData) []u8 { return switch (self) { .int8 => |b| @as([*]u8, @ptrCast(b.ptr))[0..b.len], .uint8 => |b| b, - .int16 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 2], - .uint16 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 2], - .int32 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 4], - .uint32 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 4], - .int64 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 8], - .uint64 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 8], + inline .int16, .uint16 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 2], + inline .int32, .uint32, .float32 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 4], + inline .int64, .uint64, .float64 => |b| @as([*]u8, @ptrCast(b.ptr))[0 .. b.len * 8], }; } }; @@ -754,7 +756,7 @@ pub const JsApi = struct { pub const onclose = bridge.accessor(WebSocket.getOnClose, WebSocket.setOnClose, .{}); pub const send = bridge.function(WebSocket.send, .{ .dom_exception = true }); - pub const close = bridge.function(WebSocket.close, .{}); + pub const close = bridge.function(WebSocket.close, .{ .dom_exception = true }); }; const testing = @import("../../../testing.zig"); diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 2c130ac3..c19393e0 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -245,7 +245,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void { var headers = try http_client.newHeaders(); // Only add cookies for same-origin or when withCredentials is true - const cookie_support = self._with_credentials or try page.isSameOrigin(self._url); + const cookie_support = self._with_credentials or page.isSameOrigin(self._url); try self._request_headers.populateHttpHeader(page.call_arena, &headers); if (cookie_support) { diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 222098c8..cc4919c6 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -368,6 +368,7 @@ pub const BrowserContext = struct { next_script_id: u32 = 1, http_proxy_changed: bool = false, + user_agent_changed: bool = false, // Extra headers to add to all requests. extra_headers: std.ArrayList([*c]const u8) = .empty, @@ -477,6 +478,9 @@ pub const BrowserContext = struct { log.warn(.http, "changeProxy", .{ .err = err }); }; } + if (self.user_agent_changed) { + browser.http_client.clearUserAgentOverride(); + } self.intercept_state.deinit(); } @@ -583,7 +587,7 @@ pub const BrowserContext = struct { pub fn onPageRemove(ctx: *anyopaque, _: Notification.PageRemove) !void { const self: *BrowserContext = @ptrCast(@alignCast(ctx)); - try @import("domains/page.zig").pageRemove(self); + @import("domains/page.zig").pageRemove(self); } pub fn onPageCreated(ctx: *anyopaque, page: *Page) !void { @@ -804,16 +808,18 @@ const IsolatedWorld = struct { identity: js.Identity = .{}, pub fn deinit(self: *IsolatedWorld) void { - self.removeContext() catch {}; - self.identity.deinit(); + self.removeContext(); self.browser.arena_pool.release(self.call_arena); self.browser.arena_pool.release(self.arena); } - pub fn removeContext(self: *IsolatedWorld) !void { - const ctx = self.context orelse return error.NoIsolatedContextToRemove; - self.browser.env.destroyContext(ctx); - self.context = null; + pub fn removeContext(self: *IsolatedWorld) void { + if (self.context) |ctx| { + self.browser.env.destroyContext(ctx); + self.context = null; + } + // I don't think it's possible to have any identity without a context, + // but there's no harm in being safe. self.identity.deinit(); self.identity = .{}; } diff --git a/src/cdp/domains/emulation.zig b/src/cdp/domains/emulation.zig index 4cfcd7be..fc7d821d 100644 --- a/src/cdp/domains/emulation.zig +++ b/src/cdp/domains/emulation.zig @@ -19,6 +19,7 @@ const std = @import("std"); const CDP = @import("../CDP.zig"); const log = @import("../../log.zig"); +const Config = @import("../../Config.zig"); pub fn processMessage(cmd: *CDP.Command) !void { const action = std.meta.stringToEnum(enum { @@ -69,7 +70,146 @@ fn setTouchEmulationEnabled(cmd: *CDP.Command) !void { return cmd.sendResult(null, .{}); } -fn setUserAgentOverride(cmd: *CDP.Command) !void { - log.info(.app, "setUserAgentOverride ignored", .{}); +// Emulation.setUserAgentOverride is also called by Network.setUserAgentOverride +pub fn setUserAgentOverride(cmd: *CDP.Command) !void { + const params = (try cmd.params(struct { + userAgent: []const u8, + acceptLanguage: ?[]const u8 = null, + platform: ?[]const u8 = null, + })) orelse return error.InvalidParams; + + if (params.acceptLanguage) |v| { + log.warn(.not_implemented, "Emulation.setUserAgentOverride", .{ .param = "acceptLanguage", .value = v }); + } + if (params.platform) |v| { + log.warn(.not_implemented, "Emulation.setUserAgentOverride", .{ .param = "platform", .value = v }); + } + + const ua = params.userAgent; + Config.validateUserAgent(ua) catch |err| switch (err) { + error.NonPrintable => return cmd.sendError(-32602, "User agent contains non-printable characters", .{}), + error.Reserved => { + log.warn(.not_implemented, "Emulation.setUserAgentOverride", .{ .param = "userAgent", .value = ua, .info = "User agent must not contain Mozilla" }); + return cmd.sendResult(null, .{}); + }, + }; + + const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; + const http_client = cmd.cdp.browser.http_client; + try http_client.setUserAgentOverride(ua); + bc.user_agent_changed = true; + return cmd.sendResult(null, .{}); } + +const testing = @import("../testing.zig"); + +test "cdp.Emulation: setUserAgentOverride with valid user agent" { + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA1" }); + + try ctx.processMessage(.{ + .id = 1, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "CustomBot/1.0" }, + }); + + try ctx.expectSentResult(null, .{ .id = 1 }); +} + +test "cdp.Emulation: setUserAgentOverride ignores mozilla" { + const filter: testing.LogFilter = .init(&.{.not_implemented}); + defer filter.deinit(); + + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA2" }); + + try ctx.processMessage(.{ + .id = 2, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "Mozilla/5.0 (Windows NT 10.0)" }, + }); + + try ctx.expectSentResult(null, .{}); + try testing.expectEqual(false, ctx.cdp().browser_context.?.user_agent_changed); +} + +test "cdp.Emulation: setUserAgentOverride ignores mozilla case insensitive" { + const filter: testing.LogFilter = .init(&.{.not_implemented}); + defer filter.deinit(); + + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA3" }); + + try ctx.processMessage(.{ + .id = 3, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "MOZILLA/5.0 test" }, + }); + + try ctx.expectSentResult(null, .{}); + try testing.expectEqual(false, ctx.cdp().browser_context.?.user_agent_changed); +} + +test "cdp.Emulation: setUserAgentOverride rejects non-printable characters" { + const filter: testing.LogFilter = .init(&.{.not_implemented}); + defer filter.deinit(); + + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA4" }); + + try ctx.processMessage(.{ + .id = 4, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "Bot/1.0\x01hidden" }, + }); + + try ctx.expectSentError(-32602, "User agent contains non-printable characters", .{ .id = 4 }); +} + +test "cdp.Emulation: setUserAgentOverride with optional params" { + const filter: testing.LogFilter = .init(&.{.not_implemented}); + defer filter.deinit(); + + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA5" }); + + try ctx.processMessage(.{ + .id = 5, + .method = "Emulation.setUserAgentOverride", + .params = .{ + .userAgent = "CustomBot/2.0", + .acceptLanguage = "en-US", + .platform = "Linux", + }, + }); + + try ctx.expectSentResult(null, .{ .id = 5 }); +} + +test "cdp.Emulation: setUserAgentOverride can be called multiple times" { + var ctx = try testing.context(); + defer ctx.deinit(); + _ = try ctx.loadBrowserContext(.{ .id = "BID-UA6" }); + + try ctx.processMessage(.{ + .id = 6, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "FirstBot/1.0" }, + }); + + try ctx.expectSentResult(null, .{ .id = 6 }); + + try ctx.processMessage(.{ + .id = 7, + .method = "Emulation.setUserAgentOverride", + .params = .{ .userAgent = "SecondBot/2.0" }, + }); + + try ctx.expectSentResult(null, .{ .id = 7 }); +} diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index ea6bc020..9e1a31e8 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -51,7 +51,7 @@ pub fn processMessage(cmd: *CDP.Command) !void { .enable => return enable(cmd), .disable => return disable(cmd), .setCacheDisabled => return cmd.sendResult(null, .{}), - .setUserAgentOverride => return cmd.sendResult(null, .{}), + .setUserAgentOverride => return @import("emulation.zig").setUserAgentOverride(cmd), .setExtraHTTPHeaders => return setExtraHTTPHeaders(cmd), .deleteCookies => return deleteCookies(cmd), .clearBrowserCookies => return clearBrowserCookies(cmd), diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 17455391..ab7c8397 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -257,7 +257,22 @@ fn createIsolatedWorld(cmd: *CDP.Command) !void { const page = bc.session.currentPage() orelse return error.PageNotLoaded; const js_context = try world.createContext(page); - return cmd.sendResult(.{ .executionContextId = js_context.id }, .{}); + const aux_data = try std.fmt.allocPrint(cmd.arena, "{{\"isDefault\":false,\"type\":\"isolated\",\"frameId\":\"{s}\"}}", .{params.frameId}); + + var ls: js.Local.Scope = undefined; + js_context.localScope(&ls); + defer ls.deinit(); + + bc.inspector_session.inspector.contextCreated( + &ls.local, + params.worldName, + page.origin orelse "", + aux_data, + false, + ); + + const context_id = bc.inspector_session.inspector.getContextId(&ls.local); + return cmd.sendResult(.{ .executionContextId = context_id }, .{}); } fn navigate(cmd: *CDP.Command) !void { @@ -283,6 +298,12 @@ fn navigate(cmd: *CDP.Command) !void { var page = session.currentPage() orelse return error.PageNotLoaded; if (page._load_state != .waiting) { + // Reset isolated world identities to disable V8 weak callbacks before + // resetPageResources releases refs. Prevents double-release crashes. + for (bc.isolated_worlds.items) |isolated_world| { + isolated_world.identity.deinit(); + isolated_world.identity = .{}; + } page = try session.replacePage(); } @@ -313,6 +334,12 @@ fn doReload(cmd: *CDP.Command) !void { const reload_url = try cmd.arena.dupeZ(u8, page.url); if (page._load_state != .waiting) { + // Reset isolated world identities to disable V8 weak callbacks before + // resetPageResources releases refs. Prevents double-release crashes. + for (bc.isolated_worlds.items) |isolated_world| { + isolated_world.identity.deinit(); + isolated_world.identity = .{}; + } page = try session.replacePage(); } @@ -376,14 +403,14 @@ pub fn pageNavigate(bc: *CDP.BrowserContext, event: *const Notification.PageNavi }, .{ .session_id = session_id }); } -pub fn pageRemove(bc: *CDP.BrowserContext) !void { +pub fn pageRemove(bc: *CDP.BrowserContext) void { // Clear all remote object mappings to prevent stale objectIds from being used // after the context is destroy bc.inspector_session.inspector.resetContextGroup(); // The main page is going to be removed, we need to remove contexts from other worlds first. for (bc.isolated_worlds.items) |isolated_world| { - try isolated_world.removeContext(); + isolated_world.removeContext(); } } diff --git a/src/cdp/testing.zig b/src/cdp/testing.zig index a7c7317b..7392b873 100644 --- a/src/cdp/testing.zig +++ b/src/cdp/testing.zig @@ -32,6 +32,7 @@ pub const expectError = base.expectError; pub const expectEqualSlices = base.expectEqualSlices; pub const pageTest = base.pageTest; pub const newString = base.newString; +pub const LogFilter = base.LogFilter; const TestContext = struct { read_at: usize = 0,