diff --git a/.github/workflows/package-archlinux.yml b/.github/workflows/package-archlinux.yml new file mode 100644 index 00000000..dba6d2d0 --- /dev/null +++ b/.github/workflows/package-archlinux.yml @@ -0,0 +1,79 @@ +name: package archlinux + +on: + workflow_call: + +permissions: + contents: write + +env: + RELEASE: ${{ github.ref_type == 'tag' && github.ref_name || 'nightly' }} + +jobs: + package: + strategy: + fail-fast: false + matrix: + arch: [x86_64, aarch64] + + env: + ARCH: ${{ matrix.arch }} + OS: linux + + runs-on: ubuntu-22.04 + container: archlinux:latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v6 + + - name: Install packaging deps + run: pacman -Syu --noconfirm --needed base-devel sudo + + - name: Download linux binary + uses: actions/download-artifact@v4 + with: + name: lightpanda-${{ env.ARCH }}-${{ env.OS }} + path: . + + - name: Build Arch package + run: | + useradd -m builder + echo "builder ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + + RAW_VERSION="${{ env.RELEASE }}" + PKGVER="${RAW_VERSION#v}" + PKGREL="1" + echo "PKGVER=${PKGVER}" >> "$GITHUB_ENV" + echo "PKGREL=${PKGREL}" >> "$GITHUB_ENV" + + mkdir -p pkg + cp lightpanda-${{ env.ARCH }}-${{ env.OS }} pkg/ + cp LICENSE pkg/ + + cat > pkg/PKGBUILD <> "$GITHUB_ENV" + + ROOT="lightpanda_${PKGVER}_${DEB_ARCH}" + mkdir -p "$ROOT/DEBIAN" "$ROOT/usr/bin" "$ROOT/usr/share/doc/lightpanda" + + install -m755 "lightpanda-${ARCH}-${OS}" "$ROOT/usr/bin/lightpanda" + install -m644 LICENSE "$ROOT/usr/share/doc/lightpanda/copyright" + + cat > "$ROOT/DEBIAN/control" <= 2.35) + Maintainer: Lightpanda + Homepage: https://lightpanda.io + Description: Lightpanda, headless browser built for AI and automation + EOF + + dpkg-deb --build --root-owner-group "$ROOT" + + - name: Upload Debian package to release + uses: ncipollo/release-action@v1 + with: + allowUpdates: true + artifacts: lightpanda_${{ env.PKGVER }}_${{ env.DEB_ARCH }}.deb + tag: ${{ env.RELEASE }} + makeLatest: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 62f75198..11e4b87e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -134,70 +134,10 @@ jobs: package-archlinux: if: github.ref_type == 'tag' - strategy: - fail-fast: false - matrix: - arch: [x86_64, aarch64] - - env: - ARCH: ${{ matrix.arch }} - OS: linux - needs: build-linux - runs-on: ubuntu-22.04 - container: archlinux:latest - timeout-minutes: 10 + uses: ./.github/workflows/package-archlinux.yml - steps: - - uses: actions/checkout@v6 - - - name: Install packaging deps - run: pacman -Syu --noconfirm --needed base-devel sudo - - - name: Download linux binary - uses: actions/download-artifact@v4 - with: - name: lightpanda-${{ env.ARCH }}-${{ env.OS }} - path: . - - - name: Build Arch package - run: | - useradd -m builder - echo "builder ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers - - RAW_VERSION="${{ env.RELEASE }}" - PKGVER="${RAW_VERSION#v}" - PKGREL="1" - echo "PKGVER=${PKGVER}" >> "$GITHUB_ENV" - echo "PKGREL=${PKGREL}" >> "$GITHUB_ENV" - - mkdir -p pkg - cp lightpanda-${{ env.ARCH }}-${{ env.OS }} pkg/ - cp LICENSE pkg/ - - cat > pkg/PKGBUILD < resolves. Without this, lightpanda_module only + // sees idn2.h transitively if a system libidn2 happens to be installed. + mod.linkLibrary(libidn2); + switch (target.result.os.tag) { .macos => { // needed for proxying on mac @@ -498,6 +505,158 @@ fn buildNghttp2(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.O return lib; } +fn buildLibidn2( + b: *Build, + target: Build.ResolvedTarget, + optimize: std.builtin.OptimizeMode, + is_tsan: bool, +) *Build.Step.Compile { + const dep = b.dependency("libidn2", .{}); + + const os = target.result.os.tag; + const is_darwin = os.isDarwin(); + + const mod = b.createModule(.{ + .target = target, + .optimize = optimize, + .link_libc = true, + .sanitize_thread = is_tsan, + }); + + // libidn2's autoconf+gnulib stack expects a config.h with hundreds of + // HAVE_*/_GL_ATTRIBUTE_* defines — including ~800 lines of attribute- + // detection macros emitted from gnulib-common.m4 via AH_VERBATIM. We + // vendor a single autoconf-generated config.h rather than try to + // reproduce that machinery in the Zig build system. + mod.addIncludePath(b.path("vendor/libidn2")); + + // Substitute the gnulib-style .in.h templates. All @VAR@ in them are + // either DLL-visibility markers (empty for static POSIX) or + // HAVE_UNISTRING_WOE32DLL_H (0). + inline for (.{ "unitypes", "unistr", "uniconv", "unictype", "uninorm" }) |name| { + mod.addConfigHeader(renderUnistringHeader(b, dep, name)); + } + + mod.addIncludePath(dep.path("lib")); + mod.addIncludePath(dep.path("unistring")); + // gl/ holds gnulib helpers — only malloca and version-etc headers are + // referenced from the sources we compile; we don't need the full gl/ shim + // layer (system header replacements). + mod.addIncludePath(dep.path("gl")); + + const lib = b.addLibrary(.{ .name = "idn2", .root_module = mod }); + lib.installHeader(dep.path("lib/idn2.h"), "idn2.h"); + + if (is_darwin) { + // unistring's striconveh.c calls real iconv_*, which on macOS lives + // in libiconv (separate from libSystem). On glibc Linux iconv is in + // libc itself; on musl it would also need a separate -liconv. + mod.linkSystemLibrary("iconv", .{}); + } + + lib.addCSourceFiles(.{ + .root = dep.path("lib"), + .flags = &.{ "-DHAVE_CONFIG_H", "-DIDN2_STATIC" }, + .files = &.{ + "bidi.c", "context.c", "data.c", "decode.c", + "error.c", "free.c", "idna.c", "lookup.c", + "punycode.c", "register.c", "tables.c", "tr46map.c", + "version.c", + }, + }); + lib.addCSourceFiles(.{ + .root = dep.path("gl"), + .flags = &.{"-DHAVE_CONFIG_H"}, + // malloca.c provides striconveha's stack-or-heap allocator; strverscmp + // is a glibc extension absent on macOS that lib/version.c needs. + .files = &.{ "malloca.c", "strverscmp.c" }, + }); + lib.addCSourceFiles(.{ + .root = dep.path("unistring"), + .flags = &.{"-DHAVE_CONFIG_H"}, + .files = &.{ + "c-ctype.c", "c-strcasecmp.c", "c-strncasecmp.c", + "free.c", "iconv.c", "iconv_close.c", + "iconv_open.c", "localcharset.c", "stdlib.c", + "striconveh.c", "striconveha.c", "unistd.c", + "uniconv/u8-conv-from-enc.c", "uniconv/u8-strconv-from-enc.c", "uniconv/u8-strconv-from-locale.c", + "uniconv/u8-strconv-to-enc.c", "uniconv/u8-strconv-to-locale.c", "unictype/bidi_of.c", + "unictype/categ_M.c", "unictype/categ_none.c", "unictype/categ_of.c", + "unictype/categ_test.c", "unictype/combiningclass.c", "unictype/joiningtype_of.c", + "unictype/scripts.c", "uninorm/canonical-decomposition.c", "uninorm/composition.c", + "uninorm/decompose-internal.c", "uninorm/decomposition-table.c", "uninorm/nfc.c", + "uninorm/nfd.c", "uninorm/u32-normalize.c", "unistr/u32-cmp.c", + "unistr/u32-cpy-alloc.c", "unistr/u32-cpy.c", "unistr/u32-mbtouc-unsafe.c", + "unistr/u32-strlen.c", "unistr/u32-to-u8.c", "unistr/u32-uctomb.c", + "unistr/u8-check.c", "unistr/u8-mblen.c", "unistr/u8-mbtouc.c", + "unistr/u8-mbtouc-aux.c", "unistr/u8-mbtouc-unsafe.c", "unistr/u8-mbtouc-unsafe-aux.c", + "unistr/u8-mbtoucr.c", "unistr/u8-prev.c", "unistr/u8-strlen.c", + "unistr/u8-to-u32.c", "unistr/u8-uctomb.c", "unistr/u8-uctomb-aux.c", + }, + }); + + return lib; +} + +/// Process one of unistring's `.in.h` template headers into a real `.h`. +/// All `@VAR@` substitutions in these headers are either DLL-visibility markers +/// (empty for static POSIX builds) or `HAVE_UNISTRING_WOE32DLL_H` (0). +fn renderUnistringHeader(b: *Build, dep: *Build.Dependency, name: []const u8) *Build.Step.ConfigHeader { + const in_rel = b.fmt("unistring/{s}.in.h", .{name}); + const out_name = b.fmt("{s}.h", .{name}); + const lazy = dep.path(in_rel); + const path = lazy.getPath3(b, null); + + const file = path.root_dir.handle.openFile(path.sub_path, .{}) catch |e| { + std.debug.panic("openFile {s}: {s}", .{ path.sub_path, @errorName(e) }); + }; + defer file.close(); + const contents = file.readToEndAlloc(b.allocator, 4 << 20) catch @panic("OOM"); + + const ch = b.addConfigHeader(.{ + .include_path = out_name, + .style = .{ .autoconf_at = lazy }, + }, .{}); + + var seen = std.StringHashMap(void).init(b.allocator); + var i: usize = 0; + while (std.mem.indexOfScalarPos(u8, contents, i, '@')) |s| { + const a = s + 1; + const e = std.mem.indexOfScalarPos(u8, contents, a, '@') orelse break; + const var_name = contents[a..e]; + if (!isAtConfigName(var_name)) { + // Stray '@' (e.g. an email address in a comment); advance past it + // alone so we don't mis-pair with a later '@'. + i = s + 1; + continue; + } + const owned = b.allocator.dupe(u8, var_name) catch @panic("OOM"); + const gop = seen.getOrPut(owned) catch @panic("OOM"); + if (!gop.found_existing) { + if (std.mem.eql(u8, var_name, "HAVE_UNISTRING_WOE32DLL_H")) { + ch.addValue(owned, c_int, 0); + } else { + ch.addValue(owned, []const u8, ""); + } + } + i = e + 1; + } + return ch; +} + +fn isAtConfigName(s: []const u8) bool { + if (s.len == 0) return false; + for (s, 0..) |c, idx| { + const ok = switch (c) { + 'A'...'Z', '_' => true, + '0'...'9' => idx > 0, + else => false, + }; + if (!ok) return false; + } + return true; +} + fn buildCurl( b: *Build, target: Build.ResolvedTarget, @@ -574,6 +733,11 @@ fn buildCurl( ._FILE_OFFSET_BITS = 64, .USE_IPV6 = true, + // Route IDN hostnames through libidn2 (vendored, see buildLibidn2). + // Without this, libcurl ships UTF-8 host bytes to SNI/cert validation + // and breaks for non-ASCII hostnames like räksmörgås.se. + .HAVE_LIBIDN2 = true, + .HAVE_IDN2_H = true, .CURL_OS = switch (os) { .linux => if (is_android) "\"android\"" else "\"linux\"", else => std.fmt.allocPrint(b.allocator, "\"{s}\"", .{@tagName(os)}) catch @panic("OOM"), diff --git a/build.zig.zon b/build.zig.zon index ef9bfc99..05299a40 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -42,6 +42,10 @@ .url = "git+https://github.com/lightpanda-io/zenai.git#3f61d6a21574a8edfc8ccd599e865db10bf80207", .hash = "zenai-0.0.0-iOY_VIhAAwBZsH_XAUZWE_BcxMUE3-Yf0lM-9DYd4Pyd", }, + .libidn2 = .{ + .url = "https://ftp.gnu.org/gnu/libidn/libidn2-2.3.8.tar.gz", + .hash = "N-V-__8AABGOuAC_dhAN07kfoP4dycCFi8Bka4O-tuhriNH8", + }, }, .paths = .{""}, } diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index c80739fc..9cd25f68 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -412,7 +412,7 @@ pub fn deinit(self: *Frame, abort_http: bool) void { const browser = page.session.browser; browser.env.destroyContext(self.js); - self._script_manager.shutdown = true; + self._script_manager.base.shutdown = true; if (self.parent == null) { browser.http_client.abort(); @@ -535,6 +535,8 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo self.origin = try URL.getOrigin(self.arena, request_url[5.. :0]); } else if (self.parent) |parent| { self.origin = parent.origin; + } else if (self.window._opener) |opener| { + self.origin = opener._frame.origin; } else { self.origin = null; } @@ -1339,6 +1341,69 @@ pub fn iframeAddedCallback(self: *Frame, iframe: *IFrame) !void { } } +const OpenPopupOpts = struct { + url: []const u8, + name: []const u8, + opener: ?*Window, +}; + +// Create a new top-level browsing context as a sibling of the root frame. +// The popup shares the Page's arena, factory, and identity map, but has no +// parent and is not attached to the frame tree — it lives in page.popups. +pub fn openPopup(self: *Frame, opts: OpenPopupOpts) !*Frame { + const page = self._page; + const session = self._session; + + const resolved_url: [:0]const u8 = blk: { + if (opts.url.len == 0) { + break :blk "about:blank"; + } + if (std.mem.eql(u8, opts.url, "about:blank")) { + break :blk "about:blank"; + } + const frame_base = base_blk: { + var frame = self; + while (true) { + const maybe_base = frame.base(); + if (!std.mem.eql(u8, maybe_base, "about:blank")) { + break :base_blk maybe_base; + } + frame = frame.parent orelse break :base_blk ""; + } + }; + break :blk try URL.resolve(self.call_arena, frame_base, opts.url, .{ .always_dupe = true, .encoding = self.charset }); + }; + + const popup = try page.frame_arena.create(Frame); + errdefer page.frame_arena.destroy(popup); + + const frame_id = session.nextFrameId(); + try Frame.init(popup, frame_id, page, null); + errdefer popup.deinit(true); + + popup.window._opener = opts.opener; + if (opts.name.len > 0 and + !std.ascii.eqlIgnoreCase(opts.name, "_blank") and + !std.ascii.eqlIgnoreCase(opts.name, "_self") and + !std.ascii.eqlIgnoreCase(opts.name, "_parent") and + !std.ascii.eqlIgnoreCase(opts.name, "_top")) + { + popup.window._name = try page.frame_arena.dupe(u8, opts.name); + } + + const popup_index = page.popups.items.len; + try page.popups.append(page.frame_arena, popup); + // not impossible that navigate adds popups, so remove by index + errdefer _ = page.popups.swapRemove(popup_index); + + popup.navigate(resolved_url, .{ .reason = .script }) catch |err| { + log.warn(.frame, "popup navigate failure", .{ .url = resolved_url, .err = err }); + return err; + }; + + return popup; +} + pub fn domChanged(self: *Frame) void { self.version += 1; @@ -3555,7 +3620,7 @@ pub const QueuedNavigation = struct { /// to the appropriateFrame to navigate. /// Returns null if the target is "_blank" (which would open a new window/tab). /// Note: Callers should handle empty target separately (for owner document resolution). -fn resolveTargetFrame(self: *Frame, target_name: []const u8) ?*Frame { +pub fn resolveTargetFrame(self: *Frame, target_name: []const u8) ?*Frame { if (std.ascii.eqlIgnoreCase(target_name, "_self")) { return self; } @@ -3691,6 +3756,14 @@ pub fn handleClick(self: *Frame, target: *Node) !void { } }, .select, .textarea => try element.focus(self), + .label => |label| { + // Per HTML §4.10.4 "The label element", a label's activation + // behavior is to run the synthetic click activation steps on the + // labeled control. Mirrors Chrome's HTMLLabelElement::DefaultEventHandler. + const control = label.getControl(self) orelse return; + const control_html = control.is(Element.Html) orelse return; + try control_html.click(self); + }, else => {}, } } @@ -3769,9 +3842,14 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For const form_element = form.asElement(); + const submit_button: ?*Element = blk: { + const s = submitter_ orelse break :blk null; + break :blk if (Element.Html.Form.isSubmitButton(s)) s else null; + }; + const target_name_: ?[]const u8 = blk: { - if (submitter_) |submitter| { - if (submitter.getAttributeSafe(comptime .wrap("formtarget"))) |ft| { + if (submit_button) |s| { + if (s.getAttributeSafe(comptime .wrap("formtarget"))) |ft| { break :blk ft; } } @@ -3824,12 +3902,19 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For // button, its formaction/formmethod/formenctype attributes override the // form's corresponding attributes (matching how formtarget is honored above). // https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#concept-form-submit - const enctype = blk: { - if (submitter_) |s| { + const enctype_attr = blk: { + if (submit_button) |s| { if (s.getAttributeSafe(comptime .wrap("formenctype"))) |fe| break :blk fe; } break :blk form_element.getAttributeSafe(comptime .wrap("enctype")); }; + const method = blk: { + if (submit_button) |s| { + if (s.getAttributeSafe(comptime .wrap("formmethod"))) |fm| break :blk fm; + } + break :blk form_element.getAttributeSafe(comptime .wrap("method")) orelse ""; + }; + const is_post = std.ascii.eqlIgnoreCase(method, "post"); // Get charset from accept-charset attribute or fall back to document charset const charset: []const u8 = blk: { @@ -3843,17 +3928,28 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For break :blk self.charset; }; - var buf = std.Io.Writer.Allocating.init(arena); - try form_data.write(.{ .enctype = enctype, .charset = charset, .allocator = arena }, &buf.writer); - - const method = blk: { - if (submitter_) |s| { - if (s.getAttributeSafe(comptime .wrap("formmethod"))) |fm| break :blk fm; + var boundary_buf: [36]u8 = undefined; + // GET ignores enctype per HTML spec; only resolve the union for POST. + const encoding: FormData.EncType = blk: { + if (is_post) { + if (enctype_attr) |attr| { + if (std.ascii.eqlIgnoreCase(attr, "multipart/form-data")) { + @import("../id.zig").uuidv4(&boundary_buf); + break :blk .{ .formdata = &boundary_buf }; + } + if (!std.ascii.eqlIgnoreCase(attr, "application/x-www-form-urlencoded")) { + log.warn(.not_implemented, "FormData.encoding", .{ .encoding = attr }); + } + } } - break :blk form_element.getAttributeSafe(comptime .wrap("method")) orelse ""; + break :blk .urlencode; }; + + var buf = std.Io.Writer.Allocating.init(arena); + try form_data.write(.{ .encoding = encoding, .charset = charset, .allocator = arena }, &buf.writer); + var action = blk: { - if (submitter_) |s| { + if (submit_button) |s| { if (s.getAttributeSafe(comptime .wrap("formaction"))) |fa| break :blk fa; } break :blk form_element.getAttributeSafe(comptime .wrap("action")) orelse self.url; @@ -3863,11 +3959,13 @@ pub fn submitForm(self: *Frame, submitter_: ?*Element, form_: ?*Element.Html.For .reason = .form, .kind = .{ .push = null }, }; - if (std.ascii.eqlIgnoreCase(method, "post")) { + if (is_post) { opts.method = .POST; opts.body = buf.written(); - // form_data.write currently only supports this encoding, so we know this has to be the content type - opts.header = "Content-Type: application/x-www-form-urlencoded"; + opts.header = switch (encoding) { + .urlencode => "Content-Type: application/x-www-form-urlencoded", + .formdata => |b| try std.fmt.allocPrintSentinel(arena, "Content-Type: multipart/form-data; boundary={s}", .{b}, 0), + }; } else { action = try URL.concatQueryString(arena, action, buf.written()); } diff --git a/src/browser/Page.zig b/src/browser/Page.zig index af58f34a..929cd07c 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -88,6 +88,14 @@ queued_queued_navigation: std.ArrayList(*Frame) = .empty, // The root Frame of this Page. Non-optional — a Page always has a root frame. frame: Frame, +// Popup Frames opened by window.open. They are top-level browsing contexts +// (parent == null, no iframe element) but share this Page's factory, arena, +// and identity map. +// Their lifetime is bound to the Page: on Page.deinit they +// are torn down. TODO: this is far from correct. An new window shouldn't be tied +// to the original page like this. +popups: std.ArrayList(*Frame) = .empty, + // Initialize a Page and its root Frame. pub fn init(self: *Page, session: *Session, frame_id: u32) !void { const frame_arena = try session.arena_pool.acquire(.large, "Page.frame_arena"); @@ -107,6 +115,11 @@ pub fn init(self: *Page, session: *Session, frame_id: u32) !void { // Tear down the Page and its root Frame. Equivalent to the old // Session.removePage + Session.resetFrameResources. pub fn deinit(self: *Page, abort_http: bool) void { + for (self.popups.items) |popup| { + popup.deinit(abort_http); + } + self.popups = .empty; + self.frame.deinit(abort_http); const session = self.session; @@ -217,6 +230,16 @@ pub fn findFrameByFrameId(self: *Page, frame_id: u32) ?*Frame { return findFrameBy(&self.frame, "_frame_id", frame_id); } +// Returns the popup Frame registered under `name`, or null. +pub fn findPopupByName(self: *Page, name: []const u8) ?*Frame { + for (self.popups.items) |popup| { + if (std.mem.eql(u8, popup.window._name, name)) { + return popup; + } + } + return null; +} + pub fn findFrameByLoaderId(self: *Page, loader_id: u32) ?*Frame { return findFrameBy(&self.frame, "_loader_id", loader_id); } diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 12cc6cb5..316b1b96 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -21,124 +21,69 @@ const lp = @import("lightpanda"); const builtin = @import("builtin"); const HttpClient = @import("HttpClient.zig"); -const http = @import("../network/http.zig"); const js = @import("js/js.zig"); const URL = @import("URL.zig"); const Frame = @import("Frame.zig"); +const ScriptManagerBase = @import("ScriptManagerBase.zig"); const Element = @import("webapi/Element.zig"); const log = lp.log; -const String = lp.String; const Allocator = std.mem.Allocator; const IS_DEBUG = builtin.mode == .Debug; const ScriptManager = @This(); +// Re-exports so Frame / Context callers don't need to import Base directly. +pub const Script = ScriptManagerBase.Script; +pub const ModuleSource = ScriptManagerBase.ModuleSource; + +base: ScriptManagerBase, frame: *Frame, -// used to prevent recursive evaluation -is_evaluating: bool, - -// Only once this is true can deferred scripts be run -static_scripts_done: bool, - -// List of async scripts. We don't care about the execution order of these, but -// on shutdown/abort, we need to cleanup any pending ones. -async_scripts: std.DoublyLinkedList, - -// List of deferred scripts. These must be executed in order, but only once -// dom_loaded == true, -defer_scripts: std.DoublyLinkedList, - -// When an async script is ready, it's queued here. We played with executing -// them as they complete, but it can cause timing issues with v8 module loading. -ready_scripts: std.DoublyLinkedList, - -shutdown: bool = false, - -client: *HttpClient, -allocator: Allocator, - -// We can download multiple sync modules in parallel, but we want to process -// them in order. We can't use an std.DoublyLinkedList, like the other script types, -// because the order we load them might not be the order we want to process -// them in (I'm not sure this is true, but as far as I can tell, v8 doesn't -// make any guarantees about the list of sub-module dependencies it gives us -// So this is more like a cache. When an imported module is completed, its -// source is placed here (keyed by the full url) for some point in the future -// when v8 asks for it. -// The type is confusing (too confusing? move to a union). Starts of as `null` -// then transitions to either an error (from errorCallback) or the completed -// buffer from doneCallback -imported_modules: std.StringHashMapUnmanaged(ImportedModule), - -// Mapping between module specifier and resolution. -// see https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script/type/importmap -// importmap contains resolved urls. -importmap: std.StringHashMapUnmanaged([:0]const u8), - -// have we notified the frame that all scripts are loaded (used to fire the "load" -// event). +// have we notified the frame that all scripts are loaded (used to fire the +// "load" event). frame_notified_of_completion: bool, pub fn init(allocator: Allocator, http_client: *HttpClient, frame: *Frame) ScriptManager { + var base = ScriptManagerBase.init(allocator, http_client, .{ .frame = frame }); + base.tail_hook = tailHook; return .{ .frame = frame, - .async_scripts = .{}, - .defer_scripts = .{}, - .ready_scripts = .{}, - .importmap = .empty, - .is_evaluating = false, - .allocator = allocator, - .imported_modules = .empty, - .client = http_client, - .static_scripts_done = false, + .base = base, .frame_notified_of_completion = false, }; } pub fn deinit(self: *ScriptManager) void { - // necessary to free any arenas scripts may be referencing - self.reset(); - - self.imported_modules.deinit(self.allocator); - // we don't deinit self.importmap b/c we use the frame's arena for its - // allocations. + self.base.deinit(); } pub fn reset(self: *ScriptManager) void { - var it = self.imported_modules.valueIterator(); - while (it.next()) |value_ptr| { - switch (value_ptr.state) { - .done => |script| script.deinit(), - else => {}, - } - } - self.imported_modules.clearRetainingCapacity(); - - // Our allocator is the frame arena, it's been reset. We cannot use - // clearAndRetainCapacity, since that space is no longer ours - self.importmap = .empty; - - clearList(&self.defer_scripts); - clearList(&self.async_scripts); - clearList(&self.ready_scripts); - self.static_scripts_done = false; + self.base.reset(); + self.frame_notified_of_completion = false; } -fn clearList(list: *std.DoublyLinkedList) void { - while (list.popFirst()) |n| { - const script: *Script = @fieldParentPtr("node", n); - script.deinit(); +// Frame wrapper uses this to fire documentIsLoaded and scriptsCompletedLoading +// once Base has finished processing its ready / defer queues. +pub fn tailHook(base: *ScriptManagerBase) void { + const self: *ScriptManager = @fieldParentPtr("base", base); + const frame = self.frame; + + // When all scripts (normal and deferred) are done loading, the document + // state changes (this ultimately triggers the DOMContentLoaded event). + // Page makes this safe to call multiple times. + frame.documentIsLoaded(); + + if (base.async_scripts.first == null and self.frame_notified_of_completion == false) { + self.frame_notified_of_completion = true; + frame.scriptsCompletedLoading(); } } -fn getHeaders(self: *ScriptManager) !http.Headers { - var headers = try self.client.newHeaders(); - try self.frame.headersForRequest(&headers); - return headers; +fn getHeaders(self: *ScriptManager) !HttpClient.Headers { + return self.base.getHeaders(); } pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_element: *Element.Html.Script, comptime ctx: []const u8) !void { @@ -226,7 +171,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e .kind = kind, .node = .{}, .arena = arena, - .manager = self, + .manager = &self.base, .source = source, .script_element = script_element, .complete = is_inline, @@ -261,7 +206,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e const is_blocking = script.mode == .normal; if (is_blocking == false) { - self.scriptList(script).append(&script.node); + self.base.scriptList(script).append(&script.node); } if (remote_url) |url| { @@ -278,15 +223,15 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e }); } - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; + const was_evaluating = self.base.is_evaluating; + self.base.is_evaluating = true; + defer self.base.is_evaluating = was_evaluating; const headers = try self.getHeaders(); errdefer headers.deinit(); if (is_blocking) { - const response = try self.client.syncRequest(arena, .{ + const response = try self.base.client.syncRequest(arena, .{ .url = url, .method = .GET, .frame_id = frame._frame_id, @@ -303,11 +248,11 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e script.complete = true; } else { errdefer { - self.scriptList(script).remove(&script.node); + self.base.scriptList(script).remove(&script.node); // Let the outer errdefer handle releasing the arena if client.request fails } - try self.client.request(.{ + try self.base.client.request(.{ .ctx = script, .params = .{ .url = url, @@ -342,292 +287,17 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e } // could have already been evaluating if this is dynamically added - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; + const was_evaluating = self.base.is_evaluating; + self.base.is_evaluating = true; defer { - self.is_evaluating = was_evaluating; + self.base.is_evaluating = was_evaluating; script.deinit(); } script.eval(frame); } -fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { - return switch (script.mode) { - .normal => unreachable, // not added to a list, executed immediately - .@"defer" => &self.defer_scripts, - .async, .import_async, .import => &self.async_scripts, - }; -} - -// Resolve a module specifier to an valid URL. -pub fn resolveSpecifier(self: *ScriptManager, arena: Allocator, base: [:0]const u8, specifier: [:0]const u8) ![:0]const u8 { - // If the specifier is mapped in the importmap, return the pre-resolved value. - if (self.importmap.get(specifier)) |s| { - return s; - } - - return URL.resolve(arena, base, specifier, .{ .always_dupe = true }); -} - -pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const u8) !void { - const gop = try self.imported_modules.getOrPut(self.allocator, url); - if (gop.found_existing) { - gop.value_ptr.waiters += 1; - return; - } - errdefer _ = self.imported_modules.remove(url); - - const frame = self.frame; - const arena = try frame.getArena(.large, "SM.preloadImport"); - errdefer frame.releaseArena(arena); - - const script = try arena.create(Script); - script.* = .{ - .kind = .module, - .arena = arena, - .url = url, - .node = .{}, - .manager = self, - .complete = false, - .script_element = null, - .source = .{ .remote = .{} }, - .mode = .import, - }; - - gop.value_ptr.* = ImportedModule{}; - - if (comptime IS_DEBUG) { - var ls: js.Local.Scope = undefined; - frame.js.localScope(&ls); - defer ls.deinit(); - - log.debug(.http, "script queue", .{ - .url = url, - .ctx = "module", - .referrer = referrer, - .stack = ls.local.stackTrace() catch "???", - }); - } - - // This seems wrong since we're not dealing with an async import (unlike - // getAsyncModule below), but all we're trying to do here is pre-load the - // script for execution at some point in the future (when waitForImport is - // called). - self.async_scripts.append(&script.node); - - self.client.request(.{ - .ctx = script, - .params = .{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .resource_type = .script, - .notification = frame._session.notification, - }, - .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, - .header_callback = Script.headerCallback, - .data_callback = Script.dataCallback, - .done_callback = Script.doneCallback, - .error_callback = Script.errorCallback, - }) catch |err| { - self.async_scripts.remove(&script.node); - return err; - }; -} - -pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource { - const entry = self.imported_modules.getEntry(url) orelse { - // It shouldn't be possible for v8 to ask for a module that we didn't - // `preloadImport` above. - return error.UnknownModule; - }; - - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; - - var client = self.client; - while (true) { - switch (entry.value_ptr.state) { - .loading => { - _ = try client.tick(200); - continue; - }, - .done => |script| { - var shared = false; - const buffer = entry.value_ptr.buffer; - const waiters = entry.value_ptr.waiters; - - if (waiters == 1) { - self.imported_modules.removeByPtr(entry.key_ptr); - } else { - shared = true; - entry.value_ptr.waiters = waiters - 1; - } - return .{ - .buffer = buffer, - .shared = shared, - .script = script, - }; - }, - .err => return error.Failed, - } - } -} - -pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { - const frame = self.frame; - const arena = try frame.getArena(.large, "SM.getAsyncImport"); - errdefer frame.releaseArena(arena); - - const script = try arena.create(Script); - script.* = .{ - .kind = .module, - .arena = arena, - .url = url, - .node = .{}, - .manager = self, - .complete = false, - .script_element = null, - .source = .{ .remote = .{} }, - .mode = .{ .import_async = .{ - .callback = cb, - .data = cb_data, - } }, - }; - - if (comptime IS_DEBUG) { - var ls: js.Local.Scope = undefined; - frame.js.localScope(&ls); - defer ls.deinit(); - - log.debug(.http, "script queue", .{ - .url = url, - .ctx = "dynamic module", - .referrer = referrer, - .stack = ls.local.stackTrace() catch "???", - }); - } - - // It's possible, but unlikely, for client.request to immediately finish - // a request, thus calling our callback. We generally don't want a call - // from v8 (which is why we're here), to result in a new script evaluation. - // So we block even the slightest change that `client.request` immediately - // executes a callback. - const was_evaluating = self.is_evaluating; - self.is_evaluating = true; - defer self.is_evaluating = was_evaluating; - - self.async_scripts.append(&script.node); - self.client.request(.{ - .ctx = script, - .params = .{ - .url = url, - .method = .GET, - .frame_id = frame._frame_id, - .loader_id = frame._loader_id, - .headers = try self.getHeaders(), - .resource_type = .script, - .cookie_jar = &frame._session.cookie_jar, - .cookie_origin = frame.url, - .notification = frame._session.notification, - }, - .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, - .header_callback = Script.headerCallback, - .data_callback = Script.dataCallback, - .done_callback = Script.doneCallback, - .error_callback = Script.errorCallback, - }) catch |err| { - self.async_scripts.remove(&script.node); - return err; - }; -} - -// Called from the Page to let us know it's done parsing the HTML. Necessary that -// we know this so that we know that we can start evaluating deferred scripts. -pub fn staticScriptsDone(self: *ScriptManager) void { - lp.assert(self.static_scripts_done == false, "ScriptManager.staticScriptsDone", .{}); - self.static_scripts_done = true; - self.evaluate(); -} - -fn evaluate(self: *ScriptManager) void { - if (self.is_evaluating) { - // It's possible for a script.eval to cause evaluate to be called again. - return; - } - - const frame = self.frame; - self.is_evaluating = true; - defer self.is_evaluating = false; - - while (self.ready_scripts.popFirst()) |n| { - var script: *Script = @fieldParentPtr("node", n); - switch (script.mode) { - .async => { - defer script.deinit(); - script.eval(frame); - }, - .import_async => |ia| { - if (script.status < 200 or script.status > 299) { - script.deinit(); - ia.callback(ia.data, error.FailedToLoad); - } else { - ia.callback(ia.data, .{ - .shared = false, - .script = script, - .buffer = script.source.remote, - }); - } - }, - else => unreachable, // no other script is put in this list - } - } - - if (self.static_scripts_done == false) { - // We can only execute deferred scripts if - // 1 - all the normal scripts are done - // 2 - we've finished parsing the HTML and at least queued all the scripts - // The last one isn't obvious, but it's possible for self.scripts to - // be empty not because we're done executing all the normal scripts - // but because we're done executing some (or maybe none), but we're still - // parsing the HTML. - return; - } - - while (self.defer_scripts.first) |n| { - var script: *Script = @fieldParentPtr("node", n); - if (script.complete == false) { - return; - } - defer { - _ = self.defer_scripts.popFirst(); - script.deinit(); - } - script.eval(frame); - } - - // At this point all normal scripts and deferred scripts are done, PLUS - // the frame has signaled that it's done parsing HTML (static_scripts_done == true). - // - - // When all scripts (normal and deferred) are done loading, the document - // state changes (this ultimately triggers the DOMContentLoaded event). - // Page makes this safe to call multiple times. - frame.documentIsLoaded(); - - if (self.async_scripts.first == null and self.frame_notified_of_completion == false) { - self.frame_notified_of_completion = true; - frame.scriptsCompletedLoading(); - } -} - -fn parseImportmap(self: *ScriptManager, script: *const Script) !void { +pub fn parseImportmap(self: *ScriptManager, script: *const Script) !void { const content = script.source.content(); const Imports = struct { @@ -653,364 +323,13 @@ fn parseImportmap(self: *ScriptManager, script: *const Script) !void { .{}, ); - try self.importmap.put(self.frame.arena, entry.key_ptr.*, resolved_url); + try self.base.importmap.put(self.frame.arena, entry.key_ptr.*, resolved_url); } } -pub const Script = struct { - kind: Kind, - complete: bool, - status: u16 = 0, - source: Source, - url: []const u8, - arena: Allocator, - mode: ExecutionMode, - node: std.DoublyLinkedList.Node, - script_element: ?*Element.Html.Script, - manager: *ScriptManager, - - // for debugging a rare production issue - header_callback_called: bool = false, - - // for debugging a rare production issue - debug_transfer_id: u32 = 0, - debug_transfer_tries: u8 = 0, - debug_transfer_aborted: bool = false, - debug_transfer_bytes_received: usize = 0, - debug_transfer_notified_fail: bool = false, - debug_transfer_auth_challenge: bool = false, - debug_transfer_easy_id: usize = 0, - - const Kind = enum { - module, - javascript, - importmap, - }; - - const Callback = union(enum) { - string: []const u8, - function: js.Function, - }; - - const Source = union(enum) { - @"inline": []const u8, - remote: std.ArrayList(u8), - - fn content(self: Source) []const u8 { - return switch (self) { - .remote => |buf| buf.items, - .@"inline" => |c| c, - }; - } - }; - - const ExecutionMode = union(enum) { - normal, - @"defer", - async, - import, - import_async: ImportAsync, - }; - - fn deinit(self: *Script) void { - self.manager.frame.releaseArena(self.arena); - } - - fn startCallback(response: HttpClient.Response) !void { - log.debug(.http, "script fetch start", .{ .req = response }); - } - - fn headerCallback(response: HttpClient.Response) !bool { - const self: *Script = @ptrCast(@alignCast(response.ctx)); - - self.status = response.status().?; - if (response.status() != 200) { - log.info(.http, "script header", .{ - .req = response, - .status = response.status(), - .content_type = response.contentType(), - }); - return false; - } - - if (comptime IS_DEBUG) { - log.debug(.http, "script header", .{ - .req = response, - .status = response.status(), - .content_type = response.contentType(), - }); - } - - switch (response.inner) { - .transfer => |transfer| { - // temp debug, trying to figure out why the next assert sometimes - // fails. Is the buffer just corrupt or is headerCallback really - // being called twice? - lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ - .m = @tagName(std.meta.activeTag(self.mode)), - .a1 = self.debug_transfer_id, - .a2 = self.debug_transfer_tries, - .a3 = self.debug_transfer_aborted, - .a4 = self.debug_transfer_bytes_received, - .a5 = self.debug_transfer_notified_fail, - .a8 = self.debug_transfer_auth_challenge, - .a9 = self.debug_transfer_easy_id, - .b1 = transfer.id, - .b2 = transfer._tries, - .b3 = transfer.aborted, - .b4 = transfer.bytes_received, - .b5 = transfer._notified_fail, - .b8 = transfer._auth_challenge != null, - .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, - }); - self.header_callback_called = true; - self.debug_transfer_id = transfer.id; - self.debug_transfer_tries = transfer._tries; - self.debug_transfer_aborted = transfer.aborted; - self.debug_transfer_bytes_received = transfer.bytes_received; - self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_auth_challenge = transfer._auth_challenge != null; - self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; - }, - else => {}, - } - - lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); - var buffer: std.ArrayList(u8) = .empty; - if (response.contentLength()) |cl| { - try buffer.ensureTotalCapacity(self.arena, cl); - } - self.source = .{ .remote = buffer }; - return true; - } - - fn dataCallback(response: HttpClient.Response, data: []const u8) !void { - const self: *Script = @ptrCast(@alignCast(response.ctx)); - self._dataCallback(response, data) catch |err| { - log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len }); - return err; - }; - } - - fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void { - try self.source.remote.appendSlice(self.arena, data); - } - - fn doneCallback(ctx: *anyopaque) !void { - const self: *Script = @ptrCast(@alignCast(ctx)); - self.complete = true; - if (comptime IS_DEBUG) { - log.debug(.http, "script fetch complete", .{ .req = self.url }); - } - - const manager = self.manager; - if (self.mode == .async or self.mode == .import_async) { - manager.async_scripts.remove(&self.node); - manager.ready_scripts.append(&self.node); - } else if (self.mode == .import) { - manager.async_scripts.remove(&self.node); - const entry = manager.imported_modules.getPtr(self.url).?; - entry.state = .{ .done = self }; - entry.buffer = self.source.remote; - } - manager.evaluate(); - } - - fn errorCallback(ctx: *anyopaque, err: anyerror) void { - const self: *Script = @ptrCast(@alignCast(ctx)); - log.warn(.http, "script fetch error", .{ - .err = err, - .req = self.url, - .mode = std.meta.activeTag(self.mode), - .kind = self.kind, - .status = self.status, - }); - - if (self.mode == .normal) { - // This is blocked in a loop at the end of addFromElement, setting - // it to complete with a status of 0 will signal the error. - self.status = 0; - self.complete = true; - return; - } - - const manager = self.manager; - manager.scriptList(self).remove(&self.node); - if (manager.shutdown) { - self.deinit(); - return; - } - - switch (self.mode) { - .import_async => |ia| ia.callback(ia.data, error.FailedToLoad), - .import => { - const entry = manager.imported_modules.getPtr(self.url).?; - entry.state = .err; - }, - else => {}, - } - self.deinit(); - manager.evaluate(); - } - - fn eval(self: *Script, frame: *Frame) void { - // never evaluated, source is passed back to v8, via callbacks. - if (comptime IS_DEBUG) { - std.debug.assert(self.mode != .import_async); - - // never evaluated, source is passed back to v8 when asked for it. - std.debug.assert(self.mode != .import); - } - - if (frame.isGoingAway()) { - // don't evaluate scripts for a dying frame. - return; - } - - const script_element = self.script_element.?; - - const previous_script = frame.document._current_script; - frame.document._current_script = script_element; - defer frame.document._current_script = previous_script; - - // Clear the document.write insertion point for this script - const previous_write_insertion_point = frame.document._write_insertion_point; - frame.document._write_insertion_point = null; - defer frame.document._write_insertion_point = previous_write_insertion_point; - - // inline scripts aren't cached. remote ones are. - const cacheable = self.source == .remote; - - const url = self.url; - - log.info(.browser, "executing script", .{ - .src = url, - .kind = self.kind, - .cacheable = cacheable, - }); - - var ls: js.Local.Scope = undefined; - frame.js.localScope(&ls); - defer ls.deinit(); - - const local = &ls.local; - - // Handle importmap special case here: the content is a JSON containing - // imports. - if (self.kind == .importmap) { - frame._script_manager.parseImportmap(self) catch |err| { - log.err(.browser, "parse importmap script", .{ - .err = err, - .src = url, - .kind = self.kind, - .cacheable = cacheable, - }); - self.executeCallback(comptime .wrap("error"), frame); - return; - }; - self.executeCallback(comptime .wrap("load"), frame); - return; - } - - defer frame._event_manager.clearIgnoreList(); - - var try_catch: js.TryCatch = undefined; - try_catch.init(local); - defer try_catch.deinit(); - - const success = blk: { - const content = self.source.content(); - switch (self.kind) { - .javascript => _ = local.eval(content, url) catch break :blk false, - .module => { - // We don't care about waiting for the evaluation here. - frame.js.module(false, local, content, url, cacheable) catch break :blk false; - }, - .importmap => unreachable, // handled before the try/catch. - } - break :blk true; - }; - - if (comptime IS_DEBUG) { - log.debug(.browser, "executed script", .{ .src = url, .success = success }); - } - - defer { - local.runMacrotasks(); // also runs microtasks - _ = frame.js.scheduler.run() catch |err| { - log.err(.frame, "scheduler", .{ .err = err }); - }; - } - - if (success) { - self.executeCallback(comptime .wrap("load"), frame); - return; - } - - const caught = try_catch.caughtOrError(frame.call_arena, error.Unknown); - log.warn(.js, "eval script", .{ - .url = url, - .caught = caught, - .cacheable = cacheable, - }); - - self.executeCallback(comptime .wrap("error"), frame); - } - - fn executeCallback(self: *const Script, typ: String, frame: *Frame) void { - const Event = @import("webapi/Event.zig"); - const event = Event.initTrusted(typ, .{}, frame._page) catch |err| { - log.warn(.js, "script internal callback", .{ - .url = self.url, - .type = typ, - .err = err, - }); - return; - }; - frame._event_manager.dispatchOpts(self.script_element.?.asNode().asEventTarget(), event, .{ .apply_ignore = true }) catch |err| { - log.warn(.js, "script callback", .{ - .url = self.url, - .type = typ, - .err = err, - }); - }; - } -}; - -const ImportAsync = struct { - data: *anyopaque, - callback: ImportAsync.Callback, - - pub const Callback = *const fn (ptr: *anyopaque, result: anyerror!ModuleSource) void; -}; - -pub const ModuleSource = struct { - shared: bool, - script: *Script, - buffer: std.ArrayList(u8), - - pub fn deinit(self: *ModuleSource) void { - if (self.shared == false) { - self.script.deinit(); - } - } - - pub fn src(self: *const ModuleSource) []const u8 { - return self.buffer.items; - } -}; - -const ImportedModule = struct { - waiters: u16 = 1, - state: State = .loading, - buffer: std.ArrayList(u8) = .{}, - - const State = union(enum) { - err, - loading, - done: *Script, - }; -}; +pub fn staticScriptsDone(self: *ScriptManager) void { + self.base.staticScriptsDone(); +} // Parses data:[][;base64], fn parseDataURI(allocator: Allocator, src: []const u8) !?[]const u8 { diff --git a/src/browser/ScriptManagerBase.zig b/src/browser/ScriptManagerBase.zig new file mode 100644 index 00000000..ccffc5db --- /dev/null +++ b/src/browser/ScriptManagerBase.zig @@ -0,0 +1,810 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const lp = @import("lightpanda"); +const builtin = @import("builtin"); + +const HttpClient = @import("HttpClient.zig"); +const http = @import("../network/http.zig"); + +const js = @import("js/js.zig"); +const URL = @import("URL.zig"); +const Session = @import("Session.zig"); +const Frame = @import("Frame.zig"); +const WorkerGlobalScope = @import("webapi/WorkerGlobalScope.zig"); + +const Element = @import("webapi/Element.zig"); + +const log = lp.log; +const String = lp.String; +const Allocator = std.mem.Allocator; +const IS_DEBUG = builtin.mode == .Debug; + +const ScriptManagerBase = @This(); + +// Either a *Frame (for page ScriptManagers) or *WorkerGlobalScope (for workers). +// Used from HTTP callbacks that only have a *Script in hand; the Script reaches +// the owner through its manager pointer. +pub const Owner = union(enum) { + frame: *Frame, + worker: *WorkerGlobalScope, + + pub fn url(self: Owner) [:0]const u8 { + return switch (self) { + .frame => |f| f.url, + .worker => |w| w.url, + }; + } + + pub fn frameId(self: Owner) u32 { + return switch (self) { + .frame => |f| f._frame_id, + .worker => |w| w._worker._frame_id, + }; + } + + pub fn loaderId(self: Owner) u32 { + return switch (self) { + .frame => |f| f._loader_id, + .worker => |w| w._worker._loader_id, + }; + } + + pub fn session(self: Owner) *Session { + return switch (self) { + .frame => |f| f._session, + .worker => |w| w._session, + }; + } + + pub fn jsContext(self: Owner) *js.Context { + return switch (self) { + .frame => |f| f.js, + .worker => |w| w.js, + }; + } + + pub fn addHeaders(self: Owner, headers: *HttpClient.Headers) !void { + switch (self) { + .frame => |f| try f.headersForRequest(headers), + .worker => {}, + } + } +}; + +owner: Owner, + +// used to prevent recursive evaluation +is_evaluating: bool, + +// Only once this is true can deferred scripts be run +static_scripts_done: bool, + +// List of async scripts. We don't care about the execution order of these, but +// on shutdown/abort, we need to cleanup any pending ones. Used for both +// frame-side .async scripts and .import / .import_async modules. +async_scripts: std.DoublyLinkedList, + +// List of deferred scripts. These must be executed in order, but only once +// dom_loaded == true. Workers never populate this list. +defer_scripts: std.DoublyLinkedList, + +// When an async script is ready, it's queued here. +ready_scripts: std.DoublyLinkedList, + +shutdown: bool = false, + +client: *HttpClient, +allocator: Allocator, + +// See ScriptManager.zig for the type's documentation. +imported_modules: std.StringHashMapUnmanaged(ImportedModule), + +// Mapping between module specifier and resolution. +// see https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script/type/importmap +// For workers this stays empty (only Frame authors importmaps via +// ScriptManager.parseImportmap). +importmap: std.StringHashMapUnmanaged([:0]const u8), + +// Called at the end of evaluate() after all Base-owned work has run. Frame +// wrapper uses this to drain defer_scripts and fire documentIsLoaded / +// scriptsCompletedLoading. Null for workers. +tail_hook: ?*const fn (*ScriptManagerBase) void, + +pub fn init(allocator: Allocator, http_client: *HttpClient, owner: Owner) ScriptManagerBase { + return .{ + .owner = owner, + .async_scripts = .{}, + .defer_scripts = .{}, + .ready_scripts = .{}, + .importmap = .empty, + .is_evaluating = false, + .allocator = allocator, + .imported_modules = .empty, + .client = http_client, + .static_scripts_done = false, + .tail_hook = null, + }; +} + +pub fn deinit(self: *ScriptManagerBase) void { + // necessary to free any arenas scripts may be referencing + self.reset(); + + self.imported_modules.deinit(self.allocator); + // we don't deinit self.importmap b/c we use the owner's arena for its + // allocations. +} + +pub fn reset(self: *ScriptManagerBase) void { + var it = self.imported_modules.valueIterator(); + while (it.next()) |value_ptr| { + switch (value_ptr.state) { + .done => |script| script.deinit(), + else => {}, + } + } + self.imported_modules.clearRetainingCapacity(); + + // The importmap's keys/values were allocated from the owner's arena, which + // has been reset. Can't use clearAndRetainCapacity — that space is no + // longer ours. + self.importmap = .empty; + + clearList(&self.defer_scripts); + clearList(&self.async_scripts); + clearList(&self.ready_scripts); + self.static_scripts_done = false; +} + +fn clearList(list: *std.DoublyLinkedList) void { + while (list.popFirst()) |n| { + const script: *Script = @fieldParentPtr("node", n); + script.deinit(); + } +} + +pub fn getHeaders(self: *ScriptManagerBase) !http.Headers { + var headers = try self.client.newHeaders(); + try self.owner.addHeaders(&headers); + return headers; +} + +fn acquireArena(self: *ScriptManagerBase, size_or_bucket: anytype, debug: []const u8) !Allocator { + return self.owner.session().getArena(size_or_bucket, debug); +} + +fn releaseArena(self: *ScriptManagerBase, arena: Allocator) void { + self.owner.session().releaseArena(arena); +} + +pub fn scriptList(self: *ScriptManagerBase, script: *const Script) *std.DoublyLinkedList { + return switch (script.mode) { + .normal => unreachable, // not added to a list, executed immediately + .@"defer" => &self.defer_scripts, + .async, .import_async, .import => &self.async_scripts, + }; +} + +// Resolve a module specifier to a valid URL. +pub fn resolveSpecifier(self: *ScriptManagerBase, arena: Allocator, base: [:0]const u8, specifier: [:0]const u8) ![:0]const u8 { + // If the specifier is mapped in the importmap, return the pre-resolved + // value. For workers this map is empty. + if (self.importmap.get(specifier)) |s| { + return s; + } + + return URL.resolve(arena, base, specifier, .{ .always_dupe = true }); +} + +pub fn preloadImport(self: *ScriptManagerBase, url: [:0]const u8, referrer: []const u8) !void { + const gop = try self.imported_modules.getOrPut(self.allocator, url); + if (gop.found_existing) { + gop.value_ptr.waiters += 1; + return; + } + errdefer _ = self.imported_modules.remove(url); + + const arena = try self.acquireArena(.large, "SM.preloadImport"); + errdefer self.releaseArena(arena); + + const script = try arena.create(Script); + script.* = .{ + .kind = .module, + .arena = arena, + .url = url, + .node = .{}, + .manager = self, + .complete = false, + .script_element = null, + .source = .{ .remote = .{} }, + .mode = .import, + }; + + gop.value_ptr.* = ImportedModule{}; + + if (comptime IS_DEBUG) { + var ls: js.Local.Scope = undefined; + self.owner.jsContext().localScope(&ls); + defer ls.deinit(); + + log.debug(.http, "script queue", .{ + .url = url, + .ctx = "module", + .referrer = referrer, + .stack = ls.local.stackTrace() catch "???", + }); + } + + // This seems wrong since we're not dealing with an async import (unlike + // getAsyncModule below), but all we're trying to do here is pre-load the + // script for execution at some point in the future (when waitForImport is + // called). + self.async_scripts.append(&script.node); + + const session = self.owner.session(); + self.client.request(.{ + .ctx = script, + .params = .{ + .url = url, + .method = .GET, + .frame_id = self.owner.frameId(), + .loader_id = self.owner.loaderId(), + .headers = try self.getHeaders(), + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.owner.url(), + .resource_type = .script, + .notification = session.notification, + }, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, + }) catch |err| { + self.async_scripts.remove(&script.node); + return err; + }; +} + +pub fn waitForImport(self: *ScriptManagerBase, url: [:0]const u8) !ModuleSource { + const entry = self.imported_modules.getEntry(url) orelse { + // It shouldn't be possible for v8 to ask for a module that we didn't + // `preloadImport` above. + return error.UnknownModule; + }; + + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; + + var client = self.client; + while (true) { + switch (entry.value_ptr.state) { + .loading => { + _ = try client.tick(200); + continue; + }, + .done => |script| { + var shared = false; + const buffer = entry.value_ptr.buffer; + const waiters = entry.value_ptr.waiters; + + if (waiters == 1) { + self.imported_modules.removeByPtr(entry.key_ptr); + } else { + shared = true; + entry.value_ptr.waiters = waiters - 1; + } + return .{ + .buffer = buffer, + .shared = shared, + .script = script, + }; + }, + .err => return error.Failed, + } + } +} + +pub fn getAsyncImport(self: *ScriptManagerBase, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { + const arena = try self.acquireArena(.large, "SM.getAsyncImport"); + errdefer self.releaseArena(arena); + + const script = try arena.create(Script); + script.* = .{ + .kind = .module, + .arena = arena, + .url = url, + .node = .{}, + .manager = self, + .complete = false, + .script_element = null, + .source = .{ .remote = .{} }, + .mode = .{ .import_async = .{ + .callback = cb, + .data = cb_data, + } }, + }; + + if (comptime IS_DEBUG) { + var ls: js.Local.Scope = undefined; + self.owner.jsContext().localScope(&ls); + defer ls.deinit(); + + log.debug(.http, "script queue", .{ + .url = url, + .ctx = "dynamic module", + .referrer = referrer, + .stack = ls.local.stackTrace() catch "???", + }); + } + + // It's possible, but unlikely, for client.request to immediately finish + // a request, thus calling our callback. We generally don't want a call + // from v8 (which is why we're here), to result in a new script evaluation. + // So we block even the slightest change that `client.request` immediately + // executes a callback. + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; + + const session = self.owner.session(); + self.async_scripts.append(&script.node); + self.client.request(.{ + .ctx = script, + .params = .{ + .url = url, + .method = .GET, + .frame_id = self.owner.frameId(), + .loader_id = self.owner.loaderId(), + .headers = try self.getHeaders(), + .resource_type = .script, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.owner.url(), + .notification = session.notification, + }, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, + }) catch |err| { + self.async_scripts.remove(&script.node); + return err; + }; +} + +// Called from the Page / Frame to signal it's done parsing the HTML, so +// deferred scripts can start evaluating. Workers never call this. +pub fn staticScriptsDone(self: *ScriptManagerBase) void { + lp.assert(self.static_scripts_done == false, "ScriptManagerBase.staticScriptsDone", .{}); + self.static_scripts_done = true; + self.evaluate(); +} + +pub fn evaluate(self: *ScriptManagerBase) void { + if (self.is_evaluating) { + // It's possible for a script.eval to cause evaluate to be called again. + return; + } + + self.is_evaluating = true; + defer self.is_evaluating = false; + + while (self.ready_scripts.popFirst()) |n| { + var script: *Script = @fieldParentPtr("node", n); + switch (script.mode) { + .async => { + defer script.deinit(); + // Workers never create .async mode scripts. + script.eval(self.owner.frame); + }, + .import_async => |ia| { + if (script.status < 200 or script.status > 299) { + script.deinit(); + ia.callback(ia.data, error.FailedToLoad); + } else { + ia.callback(ia.data, .{ + .shared = false, + .script = script, + .buffer = script.source.remote, + }); + } + }, + else => unreachable, // no other script is put in this list + } + } + + if (self.static_scripts_done == false) { + // We can only execute deferred scripts if + // 1 - all the normal scripts are done + // 2 - we've finished parsing the HTML and at least queued all the scripts + // The last one isn't obvious, but it's possible for self.scripts to + // be empty not because we're done executing all the normal scripts + // but because we're done executing some (or maybe none), but we're still + // parsing the HTML. + return; + } + + while (self.defer_scripts.first) |n| { + var script: *Script = @fieldParentPtr("node", n); + if (script.complete == false) return; + defer { + _ = self.defer_scripts.popFirst(); + script.deinit(); + } + // Only Frames populate defer_scripts. + script.eval(self.owner.frame); + } + + // Frame wrapper uses this to fire documentIsLoaded and + // scriptsCompletedLoading. Null for workers. + if (self.tail_hook) |hook| hook(self); +} + +pub const Script = struct { + kind: Kind, + complete: bool, + status: u16 = 0, + source: Source, + url: []const u8, + arena: Allocator, + mode: ExecutionMode, + node: std.DoublyLinkedList.Node, + script_element: ?*Element.Html.Script, + manager: *ScriptManagerBase, + + // for debugging a rare production issue + header_callback_called: bool = false, + + // for debugging a rare production issue + debug_transfer_id: u32 = 0, + debug_transfer_tries: u8 = 0, + debug_transfer_aborted: bool = false, + debug_transfer_bytes_received: usize = 0, + debug_transfer_notified_fail: bool = false, + debug_transfer_auth_challenge: bool = false, + debug_transfer_easy_id: usize = 0, + + pub const Kind = enum { + module, + javascript, + importmap, + }; + + pub const Source = union(enum) { + @"inline": []const u8, + remote: std.ArrayList(u8), + + pub fn content(self: Source) []const u8 { + return switch (self) { + .remote => |buf| buf.items, + .@"inline" => |c| c, + }; + } + }; + + pub const ExecutionMode = union(enum) { + normal, + @"defer", + async, + import, + import_async: ImportAsync, + }; + + pub fn deinit(self: *Script) void { + self.manager.releaseArena(self.arena); + } + + pub fn startCallback(response: HttpClient.Response) !void { + log.debug(.http, "script fetch start", .{ .req = response }); + } + + pub fn headerCallback(response: HttpClient.Response) !bool { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + + self.status = response.status().?; + if (response.status() != 200) { + log.info(.http, "script header", .{ + .req = response, + .status = response.status(), + .content_type = response.contentType(), + }); + return false; + } + + if (comptime IS_DEBUG) { + log.debug(.http, "script header", .{ + .req = response, + .status = response.status(), + .content_type = response.contentType(), + }); + } + + switch (response.inner) { + .transfer => |transfer| { + // temp debug, trying to figure out why the next assert sometimes + // fails. Is the buffer just corrupt or is headerCallback really + // being called twice? + lp.assert(self.header_callback_called == false, "ScriptManagerBase.Header recall", .{ + .m = @tagName(std.meta.activeTag(self.mode)), + .a1 = self.debug_transfer_id, + .a2 = self.debug_transfer_tries, + .a3 = self.debug_transfer_aborted, + .a4 = self.debug_transfer_bytes_received, + .a5 = self.debug_transfer_notified_fail, + .a8 = self.debug_transfer_auth_challenge, + .a9 = self.debug_transfer_easy_id, + .b1 = transfer.id, + .b2 = transfer._tries, + .b3 = transfer.aborted, + .b4 = transfer.bytes_received, + .b5 = transfer._notified_fail, + .b8 = transfer._auth_challenge != null, + .b9 = if (transfer._conn) |c| @intFromPtr(c._easy) else 0, + }); + self.header_callback_called = true; + self.debug_transfer_id = transfer.id; + self.debug_transfer_tries = transfer._tries; + self.debug_transfer_aborted = transfer.aborted; + self.debug_transfer_bytes_received = transfer.bytes_received; + self.debug_transfer_notified_fail = transfer._notified_fail; + self.debug_transfer_auth_challenge = transfer._auth_challenge != null; + self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0; + }, + else => {}, + } + + lp.assert(self.source.remote.capacity == 0, "ScriptManagerBase.Header buffer", .{ .capacity = self.source.remote.capacity }); + var buffer: std.ArrayList(u8) = .empty; + if (response.contentLength()) |cl| { + try buffer.ensureTotalCapacity(self.arena, cl); + } + self.source = .{ .remote = buffer }; + return true; + } + + pub fn dataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + self._dataCallback(response, data) catch |err| { + log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len }); + return err; + }; + } + + fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void { + try self.source.remote.appendSlice(self.arena, data); + } + + pub fn doneCallback(ctx: *anyopaque) !void { + const self: *Script = @ptrCast(@alignCast(ctx)); + self.complete = true; + if (comptime IS_DEBUG) { + log.debug(.http, "script fetch complete", .{ .req = self.url }); + } + + const manager = self.manager; + if (self.mode == .async or self.mode == .import_async) { + manager.async_scripts.remove(&self.node); + manager.ready_scripts.append(&self.node); + } else if (self.mode == .import) { + manager.async_scripts.remove(&self.node); + const entry = manager.imported_modules.getPtr(self.url).?; + entry.state = .{ .done = self }; + entry.buffer = self.source.remote; + } + manager.evaluate(); + } + + pub fn errorCallback(ctx: *anyopaque, err: anyerror) void { + const self: *Script = @ptrCast(@alignCast(ctx)); + log.warn(.http, "script fetch error", .{ + .err = err, + .req = self.url, + .mode = std.meta.activeTag(self.mode), + .kind = self.kind, + .status = self.status, + }); + + if (self.mode == .normal) { + // This is blocked in a loop at the end of addFromElement, setting + // it to complete with a status of 0 will signal the error. + self.status = 0; + self.complete = true; + return; + } + + const manager = self.manager; + manager.scriptList(self).remove(&self.node); + if (manager.shutdown) { + self.deinit(); + return; + } + + switch (self.mode) { + .import_async => |ia| ia.callback(ia.data, error.FailedToLoad), + .import => { + const entry = manager.imported_modules.getPtr(self.url).?; + entry.state = .err; + }, + else => {}, + } + self.deinit(); + manager.evaluate(); + } + + pub fn eval(self: *Script, frame: *Frame) void { + // never evaluated, source is passed back to v8, via callbacks. + if (comptime IS_DEBUG) { + std.debug.assert(self.mode != .import_async); + + // never evaluated, source is passed back to v8 when asked for it. + std.debug.assert(self.mode != .import); + } + + if (frame.isGoingAway()) { + // don't evaluate scripts for a dying frame. + return; + } + + const script_element = self.script_element.?; + + const previous_script = frame.document._current_script; + frame.document._current_script = script_element; + defer frame.document._current_script = previous_script; + + // Clear the document.write insertion point for this script + const previous_write_insertion_point = frame.document._write_insertion_point; + frame.document._write_insertion_point = null; + defer frame.document._write_insertion_point = previous_write_insertion_point; + + // inline scripts aren't cached. remote ones are. + const cacheable = self.source == .remote; + + const url = self.url; + + log.info(.browser, "executing script", .{ + .src = url, + .kind = self.kind, + .cacheable = cacheable, + }); + + var ls: js.Local.Scope = undefined; + frame.js.localScope(&ls); + defer ls.deinit(); + + const local = &ls.local; + + // Handle importmap special case here: the content is a JSON containing + // imports. + if (self.kind == .importmap) { + frame._script_manager.parseImportmap(self) catch |err| { + log.err(.browser, "parse importmap script", .{ + .err = err, + .src = url, + .kind = self.kind, + .cacheable = cacheable, + }); + self.executeCallback(comptime .wrap("error"), frame); + return; + }; + self.executeCallback(comptime .wrap("load"), frame); + return; + } + + defer frame._event_manager.clearIgnoreList(); + + var try_catch: js.TryCatch = undefined; + try_catch.init(local); + defer try_catch.deinit(); + + const success = blk: { + const content = self.source.content(); + switch (self.kind) { + .javascript => _ = local.eval(content, url) catch break :blk false, + .module => { + // We don't care about waiting for the evaluation here. + frame.js.module(false, local, content, url, cacheable) catch break :blk false; + }, + .importmap => unreachable, // handled before the try/catch. + } + break :blk true; + }; + + if (comptime IS_DEBUG) { + log.debug(.browser, "executed script", .{ .src = url, .success = success }); + } + + defer { + local.runMacrotasks(); // also runs microtasks + _ = frame.js.scheduler.run() catch |err| { + log.err(.frame, "scheduler", .{ .err = err }); + }; + } + + if (success) { + self.executeCallback(comptime .wrap("load"), frame); + return; + } + + const caught = try_catch.caughtOrError(frame.call_arena, error.Unknown); + log.warn(.js, "eval script", .{ + .url = url, + .caught = caught, + .cacheable = cacheable, + }); + + self.executeCallback(comptime .wrap("error"), frame); + } + + fn executeCallback(self: *const Script, typ: String, frame: *Frame) void { + const Event = @import("webapi/Event.zig"); + const event = Event.initTrusted(typ, .{}, frame._page) catch |err| { + log.warn(.js, "script internal callback", .{ + .url = self.url, + .type = typ, + .err = err, + }); + return; + }; + frame._event_manager.dispatchOpts(self.script_element.?.asNode().asEventTarget(), event, .{ .apply_ignore = true }) catch |err| { + log.warn(.js, "script callback", .{ + .url = self.url, + .type = typ, + .err = err, + }); + }; + } +}; + +pub const ImportAsync = struct { + data: *anyopaque, + callback: ImportAsync.Callback, + + pub const Callback = *const fn (ptr: *anyopaque, result: anyerror!ModuleSource) void; +}; + +pub const ModuleSource = struct { + shared: bool, + script: *Script, + buffer: std.ArrayList(u8), + + pub fn deinit(self: *ModuleSource) void { + if (self.shared == false) { + self.script.deinit(); + } + } + + pub fn src(self: *const ModuleSource) []const u8 { + return self.buffer.items; + } +}; + +pub const ImportedModule = struct { + waiters: u16 = 1, + state: State = .loading, + buffer: std.ArrayList(u8) = .{}, + + pub const State = union(enum) { + err, + loading, + done: *Script, + }; +}; diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 67686ed5..daa388ef 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -133,9 +133,15 @@ pub fn createPage(self: *Session) !*Frame { } pub fn removePage(self: *Session) void { + lp.assert(self.page != null, "Session.removePage - page is null", .{}); + if (self.page.?.frame._script_manager.base.is_evaluating) { + // Reentrant teardown from a CDP message drained inside syncRequest; + // Session.deinit reclaims the page when the connection closes. + return; + } + // Inform CDP the frame is going to be removed, allowing other worlds to remove themselves before the main one self.notification.dispatch(.frame_remove, .{}); - lp.assert(self.page != null, "Session.removePage - page is null", .{}); self.page.?.deinit(false); self.page = null; @@ -287,6 +293,13 @@ pub fn processQueuedNavigation(self: *Session) !void { } fn processFrameNavigation(self: *Session, frame: *Frame, qn: *QueuedNavigation) !void { + // Popups live on the Page as top-level browsing contexts without a + // parent or iframe element. Their re-navigation path is simpler than + // iframes — no parent bookkeeping to patch. + if (frame.parent == null and frame.iframe == null) { + return self.processPopupNavigation(frame, qn); + } + lp.assert(frame.parent != null, "root queued navigation", .{}); const iframe = frame.iframe.?; @@ -337,6 +350,45 @@ fn processFrameNavigation(self: *Session, frame: *Frame, qn: *QueuedNavigation) }; } +// Re-navigates a popup Frame in place. The Frame pointer stays stable +// (scripts in the opener may hold a cached Window ref — though the Window +// object inside is replaced, matching how iframes behave on navigation). +fn processPopupNavigation(self: *Session, frame: *Frame, qn: *QueuedNavigation) !void { + frame._queued_navigation = null; + defer self.releaseArena(qn.arena); + + // Preserve popup identity fields. _name lives in the Page arena and + // survives Frame.deinit; _opener is just a pointer. + const saved_name = frame.window._name; + const saved_opener = frame.window._opener; + const frame_id = frame._frame_id; + const page = self.currentPage().?; + + frame.deinit(true); + frame.* = undefined; + + errdefer { + // If re-init fails, drop from popups so we don't leave a corpse. + for (page.popups.items, 0..) |p, i| { + if (p == frame) { + _ = page.popups.swapRemove(i); + break; + } + } + } + + try Frame.init(frame, frame_id, page, null); + errdefer frame.deinit(true); + + frame.window._name = saved_name; + frame.window._opener = saved_opener; + + frame.navigate(qn.url, qn.opts) catch |err| { + log.err(.browser, "queued popup navigation error", .{ .err = err }); + return err; + }; +} + fn processRootQueuedNavigation(self: *Session) !void { const current_frame = &self.page.?.frame; const frame_id = current_frame._frame_id; diff --git a/src/browser/URL.zig b/src/browser/URL.zig index d55a23ac..c3bfbde7 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -17,6 +17,8 @@ // along with this program. If not, see . const std = @import("std"); +const idna = @import("../sys/idna.zig"); + const Allocator = std.mem.Allocator; pub const ResolveOpts = struct { @@ -190,11 +192,35 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, o } fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) ![:0]const u8 { - const encoding = opts.encoding orelse return url; + const encoding = opts.encoding orelse return ensureHostAscii(allocator, url); return ensureEncoded(allocator, url, encoding); } -pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8, encoding: []const u8) ![:0]const u8 { +/// IDNA-only pass: converts a non-ASCII host (`räksmörgås.se`) to its +/// punycode form (`xn--rksmrgs-5wao1o.se`) and leaves everything else alone. +fn ensureHostAscii(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { + const hostname = getHostname(url); + if (hostname.len == 0 or !idna.needsAscii(hostname)) { + return url; + } + + const ascii = try idna.toAscii(allocator, hostname); + + // hostname is a slice of url, so its start offset is just pointer arithmetic. + const start = @intFromPtr(hostname.ptr) - @intFromPtr(url.ptr); + const end = start + hostname.len; + var buf = try std.ArrayList(u8).initCapacity(allocator, url.len - hostname.len + ascii.len + 1); + buf.appendSliceAssumeCapacity(url[0..start]); + buf.appendSliceAssumeCapacity(ascii); + buf.appendSliceAssumeCapacity(url[end..]); + buf.appendAssumeCapacity(0); + return buf.items[0 .. buf.items.len - 1 :0]; +} + +pub fn ensureEncoded(allocator: Allocator, url_in: [:0]const u8, encoding: []const u8) ![:0]const u8 { + // Resolve any IDN host first; everything below operates on the ASCII form. + const url = try ensureHostAscii(allocator, url_in); + const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; diff --git a/src/browser/js/Context.zig b/src/browser/js/Context.zig index 9811eb3d..fc7c3347 100644 --- a/src/browser/js/Context.zig +++ b/src/browser/js/Context.zig @@ -28,7 +28,7 @@ const Execution = @import("Execution.zig"); const Frame = @import("../Frame.zig"); const Page = @import("../Page.zig"); const Session = @import("../Session.zig"); -const ScriptManager = @import("../ScriptManager.zig"); +const ScriptManagerBase = @import("../ScriptManagerBase.zig"); const WorkerGlobalScope = @import("../webapi/WorkerGlobalScope.zig"); const v8 = js.v8; @@ -138,8 +138,9 @@ module_cache: std.StringHashMapUnmanaged(ModuleEntry) = .empty, // necessary to lookup/store the dependent module in the module_cache. module_identifier: std.AutoHashMapUnmanaged(u32, [:0]const u8) = .empty, -// the frame's script manager -script_manager: ?*ScriptManager, +// Module-loading plumbing. Frame contexts point at the ScriptManager's +// embedded Base; worker contexts point at WorkerGlobalScope's Base directly. +script_manager: *ScriptManagerBase, // Our macrotasks scheduler: Scheduler, @@ -484,7 +485,7 @@ fn postCompileModule(self: *Context, mod: js.Module, url: [:0]const u8, local: * // dependent modules this module has and start downloading them asap. const requests = mod.getModuleRequests(); const request_len = requests.len(); - const script_manager = self.script_manager.?; + const script_manager = self.script_manager; for (0..request_len) |i| { const specifier = requests.get(i).specifier(local); const normalized_specifier = try script_manager.resolveSpecifier( @@ -590,7 +591,7 @@ pub fn dynamicModuleCallback( return @constCast(local.rejectPromise(.{ .generic_error = "Out of memory" }).handle); }; - const normalized_specifier = self.script_manager.?.resolveSpecifier( + const normalized_specifier = self.script_manager.resolveSpecifier( self.arena, // might need to survive until the module is loaded resource, specifier, @@ -643,7 +644,7 @@ fn _resolveModuleCallback(self: *Context, referrer: js.Module, specifier: [:0]co return error.UnknownModuleReferrer; }; - const normalized_specifier = try self.script_manager.?.resolveSpecifier( + const normalized_specifier = try self.script_manager.resolveSpecifier( self.arena, referrer_path, specifier, @@ -654,12 +655,12 @@ fn _resolveModuleCallback(self: *Context, referrer: js.Module, specifier: [:0]co return local.toLocal(m).handle; } - var source = self.script_manager.?.waitForImport(normalized_specifier) catch |err| switch (err) { + var source = self.script_manager.waitForImport(normalized_specifier) catch |err| switch (err) { error.UnknownModule => blk: { // Module is in cache but was consumed from imported_modules // (e.g., by a previous failed resolution). Re-preload and retry. - try self.script_manager.?.preloadImport(normalized_specifier, referrer_path); - break :blk try self.script_manager.?.waitForImport(normalized_specifier); + try self.script_manager.preloadImport(normalized_specifier, referrer_path); + break :blk try self.script_manager.waitForImport(normalized_specifier); }, else => return err, }; @@ -728,7 +729,7 @@ fn _dynamicModuleCallback(self: *Context, specifier: [:0]const u8, referrer: []c }; // Next, we need to actually load it. - self.script_manager.?.getAsyncImport(specifier, dynamicModuleSourceCallback, state, referrer) catch |err| { + self.script_manager.getAsyncImport(specifier, dynamicModuleSourceCallback, state, referrer) catch |err| { const error_msg = local.newString(@errorName(err)); _ = resolver.reject("dynamic module get async", error_msg); }; @@ -797,7 +798,7 @@ fn _dynamicModuleCallback(self: *Context, specifier: [:0]const u8, referrer: []c return promise; } -fn dynamicModuleSourceCallback(ctx: *anyopaque, module_source_: anyerror!ScriptManager.ModuleSource) void { +fn dynamicModuleSourceCallback(ctx: *anyopaque, module_source_: anyerror!ScriptManagerBase.ModuleSource) void { const state: *DynamicModuleResolveState = @ptrCast(@alignCast(ctx)); var self = state.context; diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index b2840e3c..76bd4391 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -296,7 +296,7 @@ fn _createContext(self: *Env, global: anytype, params: ContextParams) !*Context .templates = self.templates, .call_arena = params.call_arena, .microtask_queue = microtask_queue, - .script_manager = if (comptime is_frame) &global._script_manager else null, + .script_manager = if (comptime is_frame) &global._script_manager.base else &global._script_manager, .scheduler = .init(context_arena), .identity = params.identity, .identity_arena = params.identity_arena, diff --git a/src/browser/tests/crypto.html b/src/browser/tests/crypto.html index c584cd84..59d6522e 100644 --- a/src/browser/tests/crypto.html +++ b/src/browser/tests/crypto.html @@ -120,6 +120,135 @@ }); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/url.html b/src/browser/tests/url.html index 3b1d2add..60f991ea 100644 --- a/src/browser/tests/url.html +++ b/src/browser/tests/url.html @@ -871,3 +871,52 @@ testing.expectEqual('', url.search); } + + diff --git a/src/browser/tests/window/open.html b/src/browser/tests/window/open.html new file mode 100644 index 00000000..4bc0b72d --- /dev/null +++ b/src/browser/tests/window/open.html @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/worker/import-module.js b/src/browser/tests/worker/import-module.js new file mode 100644 index 00000000..dc73ee18 --- /dev/null +++ b/src/browser/tests/worker/import-module.js @@ -0,0 +1,4 @@ +export const message = 'imported from module'; +export function multiply(a, b) { + return a * b; +} diff --git a/src/browser/tests/worker/import-worker.js b/src/browser/tests/worker/import-worker.js new file mode 100644 index 00000000..726874ce --- /dev/null +++ b/src/browser/tests/worker/import-worker.js @@ -0,0 +1,14 @@ +// Dynamic import() in a classic worker — before the ScriptManagerBase +// split, this path crashed on a null script_manager unwrap. +(async function() { + try { + const mod = await import('./import-module.js'); + postMessage({ + ok: true, + message: mod.message, + product: mod.multiply(6, 7), + }); + } catch (e) { + postMessage({ ok: false, err: String(e) }); + } +})(); diff --git a/src/browser/tests/worker/module-test-worker.js b/src/browser/tests/worker/module-test-worker.js new file mode 100644 index 00000000..86409d5d --- /dev/null +++ b/src/browser/tests/worker/module-test-worker.js @@ -0,0 +1,49 @@ +// Exercises module imports inside a worker. Classic workers can't use +// top-level `import`, so all imports go through dynamic import() — which +// is the path the ScriptManagerBase split was made to enable. +(async function () { + const results = {}; + try { + const m1 = await import('./modules/base.js'); + results.basic_baseValue = m1.baseValue; + + const m2 = await import('./modules/importer.js'); + results.transitive_importedValue = m2.importedValue; + results.transitive_localValue = m2.localValue; + + const m3 = await import('./modules/re-exporter.js'); + results.reexport_baseValue = m3.baseValue; + results.reexport_importedValue = m3.importedValue; + results.reexport_localValue = m3.localValue; + + const m4a = await import('./modules/shared.js'); + results.shared_first_inc = m4a.increment(); + results.shared_first_count = m4a.getCount(); + const m4b = await import('./modules/shared.js'); + results.shared_second_inc = m4b.increment(); + results.shared_second_count = m4b.getCount(); + results.shared_same_module = m4a === m4b; + + const ma = await import('./modules/circular-a.js'); + const mb = await import('./modules/circular-b.js'); + results.circular_aValue = ma.aValue; + results.circular_bValue = mb.bValue; + results.circular_getFromB = ma.getFromB(); + results.circular_getFromA = mb.getFromA(); + + const mm = await import('./modules/meta.js'); + results.meta_url_endsWith = mm.moduleUrl.endsWith('/tests/worker/modules/meta.js'); + + let import_404_threw = false; + try { + await import('./modules/nonexistent.js'); + } catch (e) { + import_404_threw = e.toString().includes('FailedToLoad'); + } + results.import_404_threw = import_404_threw; + + postMessage({ ok: true, results }); + } catch (e) { + postMessage({ ok: false, err: String(e), stack: e.stack }); + } +})(); diff --git a/src/browser/tests/worker/module.html b/src/browser/tests/worker/module.html new file mode 100644 index 00000000..0010de19 --- /dev/null +++ b/src/browser/tests/worker/module.html @@ -0,0 +1,51 @@ + + + + + diff --git a/src/browser/tests/worker/modules/base.js b/src/browser/tests/worker/modules/base.js new file mode 100644 index 00000000..aac2ce46 --- /dev/null +++ b/src/browser/tests/worker/modules/base.js @@ -0,0 +1 @@ +export const baseValue = 'from-base'; diff --git a/src/browser/tests/worker/modules/circular-a.js b/src/browser/tests/worker/modules/circular-a.js new file mode 100644 index 00000000..e6f79f6b --- /dev/null +++ b/src/browser/tests/worker/modules/circular-a.js @@ -0,0 +1,7 @@ +import { getBValue } from './circular-b.js'; + +export const aValue = 'a'; + +export function getFromB() { + return getBValue(); +} diff --git a/src/browser/tests/worker/modules/circular-b.js b/src/browser/tests/worker/modules/circular-b.js new file mode 100644 index 00000000..36902a6a --- /dev/null +++ b/src/browser/tests/worker/modules/circular-b.js @@ -0,0 +1,11 @@ +import { aValue } from './circular-a.js'; + +export const bValue = 'b'; + +export function getBValue() { + return bValue; +} + +export function getFromA() { + return aValue; +} diff --git a/src/browser/tests/worker/modules/importer.js b/src/browser/tests/worker/modules/importer.js new file mode 100644 index 00000000..c2351a8b --- /dev/null +++ b/src/browser/tests/worker/modules/importer.js @@ -0,0 +1,4 @@ +import { baseValue } from './base.js'; + +export const importedValue = baseValue; +export const localValue = 'local'; diff --git a/src/browser/tests/worker/modules/meta.js b/src/browser/tests/worker/modules/meta.js new file mode 100644 index 00000000..092ce75e --- /dev/null +++ b/src/browser/tests/worker/modules/meta.js @@ -0,0 +1 @@ +export const moduleUrl = import.meta.url; diff --git a/src/browser/tests/worker/modules/re-exporter.js b/src/browser/tests/worker/modules/re-exporter.js new file mode 100644 index 00000000..1d882b10 --- /dev/null +++ b/src/browser/tests/worker/modules/re-exporter.js @@ -0,0 +1,2 @@ +export { baseValue } from './base.js'; +export { importedValue, localValue } from './importer.js'; diff --git a/src/browser/tests/worker/modules/shared.js b/src/browser/tests/worker/modules/shared.js new file mode 100644 index 00000000..4603c3b6 --- /dev/null +++ b/src/browser/tests/worker/modules/shared.js @@ -0,0 +1,9 @@ +let counter = 0; + +export function increment() { + return ++counter; +} + +export function getCount() { + return counter; +} diff --git a/src/browser/tests/worker/worker.html b/src/browser/tests/worker/worker.html index 1d912060..b6d671b8 100644 --- a/src/browser/tests/worker/worker.html +++ b/src/browser/tests/worker/worker.html @@ -224,6 +224,23 @@ } + +