From 3e409d49e9d803dd6ae54aa9b598f4e96048f6c5 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Sun, 17 May 2026 16:19:55 +0200 Subject: [PATCH] Implement external stylesheet fetch + parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires up --enable-external-stylesheets / LP.configureLoading.externalStylesheets from the prior surface-only commit. When the flag is set, parser- and JS-created elements now synchronously fetch and parse their href, register a CSSStyleSheet on document.styleSheets, and feed StyleManager so checkVisibility() reflects external rules. Flag stays default-off — scrapers that don't need accurate visibility pay nothing. Frame.loadExternalStylesheet mirrors ScriptManager.addFromElement: same HttpClient.syncRequest path, same arena ownership, same per-frame notification + cookie wiring. Body is routed through CSSStyleSheet.replaceSync, which already parses, populates cssRules, and calls sheetModified() — no StyleManager changes needed. 2 MiB hard cap on a single sheet body, status non-2xx and oversize both fire `error` on the link. Link.Build.created is added so static head elements reach linkAddedCallback at all — void elements never trigger nodeComplete, which is why static `` had no observable effect before. Mirrors Image. HttpClient.Request.ResourceType gains a `.stylesheet` variant so CDP Network events report the right type; cdp.fetch.zig switches updated. Refs #2343 --- src/browser/Frame.zig | 89 +++++++++++++++++++ src/browser/HttpClient.zig | 2 + src/browser/Session.zig | 6 +- .../tests/css/external_stylesheet.html | 85 ++++++++++++++++++ src/browser/tests/element/html/link.html | 16 ++++ src/browser/webapi/element/html/Link.zig | 23 +++++ src/cdp/domains/fetch.zig | 2 + src/testing.zig | 40 +++++++++ 8 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 src/browser/tests/css/external_stylesheet.html diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index 1f081ded..b23a85aa 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -52,6 +52,7 @@ const AbstractRange = @import("webapi/AbstractRange.zig"); const MutationObserver = @import("webapi/MutationObserver.zig"); const IntersectionObserver = @import("webapi/IntersectionObserver.zig"); const Worker = @import("webapi/Worker.zig"); +const CSSStyleSheet = @import("webapi/css/CSSStyleSheet.zig"); const CustomElementDefinition = @import("webapi/CustomElementDefinition.zig"); const PageTransitionEvent = @import("webapi/event/PageTransitionEvent.zig"); const SubmitEvent = @import("webapi/event/SubmitEvent.zig"); @@ -1679,6 +1680,94 @@ pub fn queueLoad(self: *Frame, html: *Element.Html) !void { } } +// Hard cap on a single external stylesheet body. CSS rule storage is per- +// arena so a hostile sheet could otherwise inflate page memory; 2 MiB is +// well above anything seen on real sites (Tailwind's `preflight + utilities` +// build is ~400 KiB gzipped, ~3 MiB raw — at which point a site should be +// splitting by route anyway). +const MAX_STYLESHEET_BYTES: usize = 2 * 1024 * 1024; + +// Synchronously fetch and parse an external ``. Opt-in +// behind `session.load_external_stylesheets` — scrapers/crawlers that don't +// need accurate visibility checks still get the cheap no-fetch path via +// `Link.linkAddedCallback`. Mirrors `ScriptManager.addFromElement`'s use of +// `syncRequest`: stylesheets are render-blocking in real browsers, so a +// synchronous fetch from inside the parser callback matches expected +// document-load ordering without manual `_pending_loads` bookkeeping. +pub fn loadExternalStylesheet(self: *Frame, link: *Element.Html.Link) !void { + if (self.isGoingAway()) return; + + const element = link.asElement(); + const href = element.getAttributeSafe(comptime .wrap("href")) orelse return; + if (href.len == 0) return; + + const arena = try self.getArena(.medium, "Frame.loadExternalStylesheet"); + defer self._session.releaseArena(arena); + + const resolved = URL.resolve(arena, self.base(), href, .{ .encoding = self.charset }) catch |err| { + log.warn(.browser, "external stylesheet resolve", .{ .err = err, .href = href }); + try self.fireLinkEvent(link, comptime .wrap("error")); + return; + }; + + const session = self._session; + // HttpClient takes ownership of `headers` via the request struct (see + // HttpClient.zig:411 — must NOT pair with a local `defer deinit`). + var headers = try session.browser.http_client.newHeaders(); + try headers.add("Accept: text/css,*/*;q=0.1"); + + var response = session.browser.http_client.syncRequest(arena, .{ + .url = resolved, + .method = .GET, + .frame_id = self._frame_id, + .loader_id = self._loader_id, + .headers = headers, + .cookie_jar = &session.cookie_jar, + .cookie_origin = self.url, + .resource_type = .stylesheet, + .notification = session.notification, + }) catch |err| { + log.warn(.browser, "external stylesheet fetch", .{ .err = err, .url = resolved }); + try self.fireLinkEvent(link, comptime .wrap("error")); + return; + }; + defer response.deinit(arena); + + if (response.status < 200 or response.status >= 300) { + log.info(.browser, "external stylesheet status", .{ .status = response.status, .url = resolved }); + try self.fireLinkEvent(link, comptime .wrap("error")); + return; + } + + if (response.body.items.len > MAX_STYLESHEET_BYTES) { + log.warn(.browser, "external stylesheet too large", .{ + .bytes = response.body.items.len, + .max = MAX_STYLESHEET_BYTES, + .url = resolved, + }); + try self.fireLinkEvent(link, comptime .wrap("error")); + return; + } + + const sheet = try CSSStyleSheet.initWithOwner(element, self); + sheet._href = try self.arena.dupe(u8, resolved); + sheet.replaceSync(response.body.items, self) catch |err| { + log.warn(.browser, "external stylesheet parse", .{ .err = err, .url = resolved }); + try self.fireLinkEvent(link, comptime .wrap("error")); + return; + }; + + const sheets = try self.document.getStyleSheets(self); + try sheets.add(sheet, self); + + try self.fireLinkEvent(link, comptime .wrap("load")); +} + +fn fireLinkEvent(self: *Frame, link: *Element.Html.Link, name: String) !void { + const event = try Event.initTrusted(name, .{}, self._page); + try self._event_manager.dispatch(link._proto.asEventTarget(), event); +} + fn dispatchLoad(self: *Frame) !void { const has_dom_load_listener = self._event_manager.has_dom_load_listener; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 6f2d19b7..04b5661a 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -910,6 +910,7 @@ pub const Request = struct { xhr, script, fetch, + stylesheet, // Allowed Values: Document, Stylesheet, Image, Media, Font, Script, // TextTrack, XHR, Fetch, Prefetch, EventSource, WebSocket, Manifest, @@ -921,6 +922,7 @@ pub const Request = struct { .xhr => "XHR", .script => "Script", .fetch => "Fetch", + .stylesheet => "Stylesheet", }; } }; diff --git a/src/browser/Session.zig b/src/browser/Session.zig index d044d0ef..70ec5d3e 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -95,9 +95,9 @@ worker_loading_enabled: bool = true, // false to preserve the current rendering-free fast path: drivers that // don't need accurate visibility checks pay nothing. Set from the // `--enable-external-stylesheets` CLI flag at session init; the -// LP.configureLoading CDP method can flip it per-session. Currently -// unread — the fetch path lands in a follow-up that depends on the -// network refactor in #2303. +// LP.configureLoading CDP method can flip it per-session. When true, +// `Link.linkAddedCallback` routes to `Frame.loadExternalStylesheet` +// (synchronous fetch + parse + register on `document.styleSheets`). load_external_stylesheets: bool = false, pub fn init(self: *Session, browser: *Browser, notification: *Notification) !void { diff --git a/src/browser/tests/css/external_stylesheet.html b/src/browser/tests/css/external_stylesheet.html new file mode 100644 index 00000000..cbd6df0d --- /dev/null +++ b/src/browser/tests/css/external_stylesheet.html @@ -0,0 +1,85 @@ + + + + + + +
always visible
+
hidden by external rule
+ + + + + + + + + + + diff --git a/src/browser/tests/element/html/link.html b/src/browser/tests/element/html/link.html index 9f4dd6a8..d3f2faa0 100644 --- a/src/browser/tests/element/html/link.html +++ b/src/browser/tests/element/html/link.html @@ -137,3 +137,19 @@ }); } + + diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig index 1b83dacf..2d5bfef6 100644 --- a/src/browser/webapi/element/html/Link.zig +++ b/src/browser/webapi/element/html/Link.zig @@ -114,6 +114,14 @@ pub fn linkAddedCallback(self: *Link, frame: *Frame) !void { return; } + // Opt-in fetch for `rel="stylesheet"` — drives `frame.loadExternalStylesheet`, + // which fires the load/error event itself. Other rels (preload, + // modulepreload) and the disabled case keep the rendering-free stub that + // fires a synthetic `load` event without touching the network. + if (std.mem.eql(u8, rel, "stylesheet") and frame._session.load_external_stylesheets) { + return frame.loadExternalStylesheet(self); + } + try frame.queueLoad(self._proto); } @@ -143,7 +151,22 @@ pub const JsApi = struct { } }; +// Parser-created elements are void (no closing tag) so they never +// reach `Frame.nodeComplete`. Mirror `Image.Build.created` so static head +// links in HTML go through `linkAddedCallback` at element-create time, +// with attributes already populated by `populateElementAttributes`. +pub const Build = struct { + pub fn created(node: *Node, frame: *Frame) !void { + const self = node.as(Link); + return self.linkAddedCallback(frame); + } +}; + const testing = @import("../../../../testing.zig"); test "WebApi: HTML.Link" { try testing.htmlRunner("element/html/link.html", .{}); } + +test "WebApi: HTML.Link external stylesheet" { + try testing.htmlRunner("css/external_stylesheet.html", .{ .load_external_stylesheets = true }); +} diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 64a1ec7e..b2831abe 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -209,6 +209,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification. .xhr => "XHR", .document => "Document", .fetch => "Fetch", + .stylesheet => "Stylesheet", }, .networkId = &id.toRequestId(transfer), // matches the Network REQ-ID }, .{ .session_id = session_id }); @@ -453,6 +454,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati .xhr => "XHR", .document => "Document", .fetch => "Fetch", + .stylesheet => "Stylesheet", }, .authChallenge = .{ .origin = "", // TODO get origin, could be the proxy address for example. diff --git a/src/testing.zig b/src/testing.zig index 3897a06a..a9ecff71 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -341,6 +341,7 @@ const WEB_API_TEST_ROOT = "src/browser/tests/"; const HtmlRunnerOpts = struct { timeout_ms: u32 = 2000, inject_script: ?[]const u8 = null, + load_external_stylesheets: bool = false, }; pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { @@ -353,6 +354,9 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { } defer test_session.inject_scripts = &.{}; + test_session.load_external_stylesheets = opts.load_external_stylesheets; + defer test_session.load_external_stylesheets = false; + const root = try std.fs.path.joinZ(arena_allocator, &.{ WEB_API_TEST_ROOT, path }); const stat = std.fs.cwd().statFile(root) catch |err| { std.debug.print("Failed to stat file: '{s}'", .{root}); @@ -678,6 +682,42 @@ fn testHTTPHandler(req: *std.http.Server.Request) !void { }); } + if (std.mem.eql(u8, path, "/styles/visibility.css")) { + // Used by css/external_stylesheet.html — drives the visibility + // cascade through StyleManager via Frame.loadExternalStylesheet + // so a `.ext-hide` element is observable to checkVisibility(). + return req.respond(".ext-hide { display: none; }", .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/css" }, + }, + }); + } + + if (std.mem.eql(u8, path, "/styles/404.css")) { + return req.respond("/* unused */", .{ + .status = .not_found, + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/css" }, + }, + }); + } + + if (std.mem.eql(u8, path, "/styles/oversize.css")) { + // Body that exceeds Frame.MAX_STYLESHEET_BYTES (2 MiB) — written as a + // long sequence of valid declarations so the response itself parses + // fine and the error path is exercised by the size cap, not by a + // CSS parse failure. + const chunk = ".pad { color: #abcdef; } "; // 25 bytes + const repeats = (2 * 1024 * 1024 / chunk.len) + 1024; + var body = try std.ArrayList(u8).initCapacity(arena_allocator, chunk.len * repeats); + for (0..repeats) |_| body.appendSliceAssumeCapacity(chunk); + return req.respond(body.items, .{ + .extra_headers = &.{ + .{ .name = "Content-Type", .value = "text/css" }, + }, + }); + } + if (std.mem.eql(u8, path, "/echo_referer")) { // Echo the request's Referer header back as HTML so tests can assert // what Referer the navigation sent. Used by the cross-page Referer test.