diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index 8aedf41e..e407ab12 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -622,6 +622,10 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo if (opts.header) |hdr| { try headers.add(hdr); } + if (opts.referer) |ref| { + const ref_header = try std.mem.concatWithSentinel(self.arena, u8, &.{ "Referer: ", ref }, 0); + try headers.add(ref_header); + } // We dispatch frame_navigate event before sending the request. // It ensures the event frame_navigated is not dispatched before this one. session.notification.dispatch(.frame_navigate, &.{ @@ -754,9 +758,18 @@ fn scheduleNavigationWithArena(originator: *Frame, arena: Allocator, request_url session.browser.http_client.abortFrame(target._frame_id); } + // Capture the originating frame's URL as the Referer for this + // navigation. The originator's frame may be torn down before navigate() + // runs (processRootQueuedNavigation rebuilds the Page in-place), so dup + // into the QueuedNavigation arena which outlives that tear-down. + var nav_opts = opts; + if (nav_opts.referer == null and std.mem.startsWith(u8, originator.url, "http")) { + nav_opts.referer = try arena.dupe(u8, originator.url); + } + const qn = try arena.create(QueuedNavigation); qn.* = .{ - .opts = opts, + .opts = nav_opts, .arena = arena, .url = resolved_url, .is_about_blank = is_about_blank, @@ -1279,7 +1292,12 @@ pub fn iframeAddedCallback(self: *Frame, iframe: *IFrame) !void { ); }; - new_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| { + new_frame.navigate(url, .{ + .reason = .initialFrameNavigation, + // Iframe's initial src request carries the parent's URL as Referer. + // Parent frame outlives this navigate() call, so the slice is safe. + .referer = if (std.mem.startsWith(u8, self.url, "http")) self.url else null, + }) catch |err| { log.warn(.frame, "iframe navigate failure", .{ .url = url, .err = err }); self._pending_loads -= 1; iframe._window = null; @@ -3452,6 +3470,10 @@ pub const NavigateOpts = struct { method: HttpClient.Method = .GET, body: ?[]const u8 = null, header: ?[:0]const u8 = null, + // Set by scheduleNavigationWithArena from the originating frame's URL so + // anchor click / form submit / location.href navigations carry a Referer. + // null on CDP Page.navigate (address-bar) and Page.reload — matches Chrome. + referer: ?[]const u8 = null, force: bool = false, kind: NavigationKind = .{ .push = null }, }; diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index ee6f0990..c77319ae 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -1182,6 +1182,89 @@ test "cdp.frame: navigate inherits original fragment across redirect" { } } +test "cdp.frame: anchor click sends Referer matching the originating page" { + // HTML Living Standard "navigate" algorithm + Fetch §4.5 "request's referrer": + // when a navigation is initiated by a hyperlink click (or form submit, or + // location.href assignment), the resulting request carries a Referer + // header equal to the originating document's URL. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18"; + bc.session_id = "SID-A18"; + bc.target_id = "TID-A18-000000".*; + + // Initial navigation to the page hosting the anchor — driven directly via + // Frame.navigate(.address_bar), so this request itself has no Referer. + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/referer_link.html", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // Click the anchor via JS. The click goes through Frame.scheduleNavigation + // (.reason = .script), which must capture the originating frame's URL as + // the Referer for the queued navigation. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + _ = try ls.local.exec("document.getElementById('link').click()", null); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + // After the click navigation completes, the loaded page is /echo_referer + // and its body echoes the Referer header the server actually saw. + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec( + "document.body.innerText.includes('referer=http://127.0.0.1:9582/referer_link.html')", + null, + ); + try testing.expect(v.toBool()); + } +} + +test "cdp.frame: address-bar Page.navigate sends no Referer" { + // Regression guard: navigations initiated by the user agent itself (CDP + // Page.navigate, address-bar typed URLs, Page.reload) must not leak the + // previous page's URL as Referer. Matches Chrome. + var ctx = try testing.context(); + defer ctx.deinit(); + + const cdp_inst = ctx.cdp(); + _ = try cdp_inst.createBrowserContext(); + var bc = &cdp_inst.browser_context.?; + bc.id = "BID-A18B"; + bc.session_id = "SID-A18B"; + bc.target_id = "TID-A18B-00000".*; + + { + const f = try bc.session.createPage(); + try f.navigate("http://127.0.0.1:9582/echo_referer", .{}); + var runner = try bc.session.runner(.{}); + try runner.wait(.{ .ms = 2000 }); + } + + { + const f = bc.session.currentFrame() orelse unreachable; + var ls: js.Local.Scope = undefined; + f.js.localScope(&ls); + defer ls.deinit(); + const v = try ls.local.exec("document.body.innerText.includes('referer=NONE')", null); + try testing.expect(v.toBool()); + } +} + test "cdp.frame: addScriptToEvaluateOnNewDocument" { var ctx = try testing.context(); defer ctx.deinit(); diff --git a/src/testing.zig b/src/testing.zig index 2fb04668..1ea704b0 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -662,6 +662,39 @@ fn testHTTPHandler(req: *std.http.Server.Request) !void { }); } + if (std.mem.eql(u8, path, "/echo_referer")) { + // Echo the request's Referer header back as HTML so tests can assert + // what Referer the navigation sent. Used by the cross-page Referer test. + var it = req.iterateHeaders(); + var referer: []const u8 = "NONE"; + while (it.next()) |h| { + if (std.ascii.eqlIgnoreCase(h.name, "Referer")) { + referer = h.value; + break; + } + } + var html_buf: [512]u8 = undefined; + const html = try std.fmt.bufPrint(&html_buf, "referer={s}", .{referer}); + return req.respond(html, .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }); + } + + if (std.mem.eql(u8, path, "/referer_link.html")) { + // Page with an anchor link to /echo_referer. The test clicks the link + // via JS and asserts the resulting page reports Referer = this page. + return req.respond( + "go", + .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/html; charset=utf-8" }, + }, + }, + ); + } + if (std.mem.eql(u8, path, "/echo_method")) { // Echo the request method back as HTML so tests can assert on what // method the navigation used. Used by the Page.reload-replays-POST test.