From 02df0dc2879ed0ca4750e1311591fb4746741595 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 26 May 2026 13:41:03 +0800 Subject: [PATCH] Unify Data URLs Previously, the logic for data/blob URLs were spread out at every http_client caller. fetch/XHR/ScriptManager all had their own "if this is a data url, ..." logic. This causes two issues. 1 - Duplication, particularly as we try to cover more edge cases that need to be handled in all places. 2 - Correctness because data/blob URLs are still URLs and still need to be "fetched" (from memory). They should still fire with the same timing as any other URL. That means that for fetch/XHR, they should fire asynchronously (i.e. on the next tick). And for ScriptManager they should fire depending on the type of script (normal/defer/async). This PR relies on the infrastructure added to: https://github.com/lightpanda-io/browser/pull/2506 in order to fulfilled a synthetic response on the next tick. Frame.navigate is excluded from this refactor. For one, about:blank must be special-cased and run synchronously (one of the few places where this is strictly required) and even blob URLs are a bit different: the blob URL list is the parent frame, not self, and there's more we need to do (set origin). Potentially there _is_ some improvement here, but it's both less significant and less simple. --- src/browser/Frame.zig | 6 +- src/browser/HttpClient.zig | 93 ++++++++ src/browser/ScriptManager.zig | 73 +------ src/browser/data_url.zig | 202 ++++++++++++++++++ src/browser/js/Execution.zig | 7 - .../tests/element/html/script/data_url.html | 29 +++ src/browser/tests/frames/data_url_iframe.html | 23 ++ src/browser/tests/worker/worker.html | 22 ++ src/browser/webapi/Worker.zig | 11 - src/browser/webapi/WorkerGlobalScope.zig | 6 +- src/browser/webapi/net/Fetch.zig | 25 --- src/browser/webapi/net/XMLHttpRequest.zig | 36 ---- 12 files changed, 377 insertions(+), 156 deletions(-) create mode 100644 src/browser/data_url.zig create mode 100644 src/browser/tests/element/html/script/data_url.html create mode 100644 src/browser/tests/frames/data_url_iframe.html diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index 49d82d07..500e6b8e 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -294,6 +294,7 @@ pub fn init(self: *Frame, frame_id: u32, page: *Page, parent: ?*Frame) !void { ._event_manager = EventManager.init(arena, self), }; self._to_load = &self._to_load_1; + self._http_owner.blob_urls = &self._blob_urls; var screen: *Screen = undefined; var visual_viewport: *VisualViewport = undefined; @@ -507,11 +508,6 @@ pub fn isSameOrigin(self: *const Frame, url: [:0]const u8) bool { return std.mem.eql(u8, URL.getHost(url), URL.getHost(current_origin)); } -/// Look up a blob URL in this frame's registry. -pub fn lookupBlobUrl(self: *Frame, url: []const u8) ?*Blob { - return self._blob_urls.get(url); -} - pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !void { lp.assert(self._load_state == .waiting, "frame.renavigate", .{}); const session = self._session; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index d73a37a4..ad5cd412 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -567,6 +567,21 @@ fn requestT(self: *Client, req: Request, owner: ?*Owner) !*Transfer { // via transfer.abort which fires error_callback and deinits. `.created` // means no commit happened — anything else is held by an owner that // will clean up. + + // Synthetic schemes never touch the network or the layer chain — they skip + // robots/cache/interception and deliver on the next tick + if (Synthetic.isSynthetic(req.url)) { + // The 2nd transfer is the callback context. We don't actually use it, + // we're just sticking transfer in there to have something. + self.runNextTick(transfer, transfer, .{ .run = Synthetic.run }) catch |err| { + if (transfer.state == .created) { + transfer.abort(err); + } + return err; + }; + return transfer; + } + self.entry_layer.request(transfer) catch |err| { if (transfer.state == .created) { transfer.abort(err); @@ -577,6 +592,80 @@ fn requestT(self: *Client, req: Request, owner: ?*Owner) !*Transfer { return transfer; } +// Non-network URL schemes whose response is synthesized in-process rather than +// fetched, think blob data URLs. +const Synthetic = struct { + const data_url = @import("data_url.zig"); + + fn isSynthetic(url: []const u8) bool { + return std.mem.startsWith(u8, url, "data:") or std.mem.startsWith(u8, url, "blob:"); + } + + fn run(transfer: *Transfer, _: *anyopaque) void { + defer transfer.deinit(); + + const fulfilled = build(transfer) catch |err| { + transfer.req.error_callback(transfer.req.ctx, err); + return; + }; + deliver(&transfer.req, &fulfilled) catch |err| { + transfer.req.error_callback(transfer.req.ctx, err); + }; + } + + fn build(transfer: *Transfer) !FulfilledResponse { + const arena = transfer.arena; + const url = transfer.req.url; + + var body: []const u8 = ""; + var content_type: []const u8 = ""; + + if (std.mem.startsWith(u8, url, "data:")) { + const parsed = try data_url.parse(arena, url); + content_type = parsed.content_type; + body = parsed.body; + } else { + // blob: — resolved against the owning frame's registry. + const owner = transfer.owner orelse return error.BlobNotFound; + const blob_urls = owner.blob_urls orelse return error.BlobNotFound; + const blob = blob_urls.get(url) orelse return error.BlobNotFound; + content_type = blob._mime; + body = blob._slice; + } + + // A blob with no type yields no Content-Type header. + const headers = if (content_type.len > 0) blk: { + const h = try arena.alloc(http.Header, 1); + h[0] = .{ .name = "content-type", .value = content_type }; + break :blk h; + } else &[_]http.Header{}; + + return .{ + .url = url, + .body = body, + .status = 200, + .headers = headers, + }; + } + + fn deliver(req: *Request, fulfilled: *const FulfilledResponse) !void { + const response = Response.fromFulfilled(req.ctx, fulfilled); + if (req.start_callback) |cb| { + try cb(response); + } + const proceed = try req.header_callback(response); + if (!proceed) { + return error.Abort; + } + if (fulfilled.body) |b| { + if (b.len > 0) { + try req.data_callback(response, b); + } + } + try req.done_callback(req.ctx); + } +}; + const SyncContext = struct { allocator: Allocator, completion: union(enum) { @@ -1909,7 +1998,11 @@ pub const Owner = struct { transfers: std.DoublyLinkedList = .{}, websockets: std.DoublyLinkedList = .{}, + // The owning Frame's / WorkerGlobalScope's blob: registry, + blob_urls: ?*const std.StringHashMapUnmanaged(*Blob) = null, + const WebSocket = @import("webapi/net/WebSocket.zig"); + const Blob = @import("webapi/Blob.zig"); pub fn addTransfer(self: *Owner, t: *Transfer) void { self.transfers.append(&t.owner_node); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 742a38e2..0c2af244 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -141,12 +141,11 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e var remote_url: ?[:0]const u8 = null; const base_url = frame.base(); if (element.getAttributeSafe(comptime .wrap("src"))) |src| { - if (try parseDataURI(arena, src)) |data_uri| { - source = .{ .@"inline" = data_uri }; - } else { - remote_url = try URL.resolve(arena, base_url, src, .{ .encoding = frame.charset }); - source = .{ .remote = .{} }; - } + // data: and blob: srcs flow through the normal request path; HttpClient + // synthesizes the response. Execution mode (blocking vs async/defer) is + // attribute-driven, the same as any other src. + remote_url = try URL.resolve(arena, base_url, src, .{ .encoding = frame.charset }); + source = .{ .remote = .{} }; } else { var buf = std.Io.Writer.Allocating.init(arena); try element.asNode().getChildTextContent(&buf.writer); @@ -333,65 +332,3 @@ pub fn parseImportmap(self: *ScriptManager, script: *const Script) !void { pub fn staticScriptsDone(self: *ScriptManager) void { self.base.staticScriptsDone(); } - -// Parses data:[][;base64], -fn parseDataURI(allocator: Allocator, src: []const u8) !?[]const u8 { - if (!std.mem.startsWith(u8, src, "data:")) { - return null; - } - - const uri = src[5..]; - const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; - const data = uri[data_starts + 1 ..]; - - const unescaped = try URL.unescape(allocator, data); - - const metadata = uri[0..data_starts]; - if (std.mem.endsWith(u8, metadata, ";base64") == false) { - return unescaped; - } - - // Forgiving base64 decode per WHATWG spec: - // https://infra.spec.whatwg.org/#forgiving-base64-decode - // Step 1: Remove all ASCII whitespace - var stripped = try std.ArrayList(u8).initCapacity(allocator, unescaped.len); - for (unescaped) |c| { - if (!std.ascii.isWhitespace(c)) { - stripped.appendAssumeCapacity(c); - } - } - const trimmed = std.mem.trimRight(u8, stripped.items, "="); - - // Length % 4 == 1 is invalid - if (trimmed.len % 4 == 1) { - return error.InvalidCharacterError; - } - - const decoded_size = std.base64.standard_no_pad.Decoder.calcSizeForSlice(trimmed) catch return error.InvalidCharacterError; - const buffer = try allocator.alloc(u8, decoded_size); - std.base64.standard_no_pad.Decoder.decode(buffer, trimmed) catch return error.InvalidCharacterError; - return buffer; -} - -const testing = @import("../testing.zig"); -test "DataURI: parse valid" { - try assertValidDataURI("data:text/javascript; charset=utf-8;base64,Zm9v", "foo"); - try assertValidDataURI("data:text/javascript; charset=utf-8;,foo", "foo"); - try assertValidDataURI("data:,foo", "foo"); -} - -test "DataURI: parse invalid" { - try assertInvalidDataURI("atad:,foo"); - try assertInvalidDataURI("data:foo"); - try assertInvalidDataURI("data:"); -} - -fn assertValidDataURI(uri: []const u8, expected: []const u8) !void { - defer testing.reset(); - const data_uri = try parseDataURI(testing.arena_allocator, uri) orelse return error.TestFailed; - try testing.expectEqual(expected, data_uri); -} - -fn assertInvalidDataURI(uri: []const u8) !void { - try testing.expectEqual(null, parseDataURI(undefined, uri)); -} diff --git a/src/browser/data_url.zig b/src/browser/data_url.zig new file mode 100644 index 00000000..4c39d1b1 --- /dev/null +++ b/src/browser/data_url.zig @@ -0,0 +1,202 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +// "data: URL processor" — https://fetch.spec.whatwg.org/#data-url-processor. +// The single home for data: parsing: the HttpClient synthetic-scheme path and +// ScriptManager ( + + + + + + + + + + + + + diff --git a/src/browser/tests/frames/data_url_iframe.html b/src/browser/tests/frames/data_url_iframe.html new file mode 100644 index 00000000..17cbe8bf --- /dev/null +++ b/src/browser/tests/frames/data_url_iframe.html @@ -0,0 +1,23 @@ + + + + diff --git a/src/browser/tests/worker/worker.html b/src/browser/tests/worker/worker.html index 8b15346a..2560de8f 100644 --- a/src/browser/tests/worker/worker.html +++ b/src/browser/tests/worker/worker.html @@ -360,3 +360,25 @@ }); } + + diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 4c96a0e4..be698018 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -25,7 +25,6 @@ const URL = @import("../URL.zig"); const Frame = @import("../Frame.zig"); const HttpClient = @import("../HttpClient.zig"); -const Blob = @import("Blob.zig"); const EventTarget = @import("EventTarget.zig"); const MessageEvent = @import("event/MessageEvent.zig"); const ErrorEvent = @import("event/ErrorEvent.zig"); @@ -89,16 +88,6 @@ pub fn init(url: []const u8, frame: *Frame) !*Worker { return self; } - if (std.mem.startsWith(u8, url, "blob:")) { - errdefer frame.removeWorker(self); - const blob: *Blob = frame.lookupBlobUrl(url) orelse { - log.warn(.js, "invalid blob", .{ .target = "worker" }); - return error.BlobNotFound; - }; - try self.loadInitialScript(blob._slice); - return self; - } - const headers = try session.browser.http_client.newHeaders(); frame.makeRequest(.{ .ctx = self, diff --git a/src/browser/webapi/WorkerGlobalScope.zig b/src/browser/webapi/WorkerGlobalScope.zig index 7744eca0..ee36864d 100644 --- a/src/browser/webapi/WorkerGlobalScope.zig +++ b/src/browser/webapi/WorkerGlobalScope.zig @@ -144,6 +144,8 @@ pub fn init(worker: *Worker, url: [:0]const u8) !*WorkerGlobalScope { }); errdefer factory.destroy(self); + self._http_owner.blob_urls = &self._blob_urls; + self._script_manager = ScriptManagerBase.init( arena, &session.browser.http_client, @@ -231,10 +233,6 @@ pub fn isSameOrigin(self: *const WorkerGlobalScope, url: [:0]const u8) bool { return std.mem.eql(u8, URL.getHost(url), URL.getHost(current_origin)); } -pub fn lookupBlobUrl(self: *WorkerGlobalScope, url: []const u8) ?*Blob { - return self._blob_urls.get(url); -} - pub fn makeRequest(self: *WorkerGlobalScope, req: HttpClient.Request) !void { return self._session.browser.http_client.request(req, &self._http_owner); } diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index f5fd4562..a8f12ff5 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -24,7 +24,6 @@ const js = @import("../../js/js.zig"); const Page = @import("../../Page.zig"); const URL = @import("../../URL.zig"); -const Blob = @import("../Blob.zig"); const Request = @import("Request.zig"); const Response = @import("Response.zig"); const AbortSignal = @import("../AbortSignal.zig"); @@ -58,10 +57,6 @@ pub fn init(input: Input, options: ?InitOpts, exec: *const Execution) !js.Promis } } - if (std.mem.startsWith(u8, request._url, "blob:")) { - return handleBlobUrl(request._url, resolver, exec); - } - const response = try Response.init(null, .{ .status = 0 }, exec); errdefer response.deinit(exec.context.page); @@ -121,26 +116,6 @@ pub fn init(input: Input, options: ?InitOpts, exec: *const Execution) !js.Promis return resolver.promise(); } -fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, exec: *const Execution) !js.Promise { - const blob: *Blob = exec.lookupBlobUrl(url) orelse { - resolver.rejectError("fetch blob error", .{ .type_error = "BlobNotFound" }); - return resolver.promise(); - }; - - const response = try Response.init(null, .{ .status = 200 }, exec); - response._body = .{ .bytes = try response._arena.dupe(u8, blob._slice) }; - response._url = try response._arena.dupeZ(u8, url); - response._type = .basic; - - if (blob._mime.len > 0) { - try response._headers.append("Content-Type", blob._mime, exec); - } - - const js_val = try exec.context.local.?.zigValueToJs(response, .{}); - resolver.resolve("fetch blob done", js_val); - return resolver.promise(); -} - fn httpStartCallback(response: HttpClient.Response) !void { const self: *Fetch = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 98027202..de6b001d 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -249,10 +249,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?BodyInit, exec_: *const Execution) !v const exec = self._exec; - if (std.mem.startsWith(u8, self._url, "blob:")) { - return self.handleBlobUrl(exec); - } - const session = exec.context.page.session; const http_client = &session.browser.http_client; var headers = try http_client.newHeaders(); @@ -293,38 +289,6 @@ pub fn send(self: *XMLHttpRequest, body_: ?BodyInit, exec_: *const Execution) !v }; } -fn handleBlobUrl(self: *XMLHttpRequest, exec: *const Execution) !void { - const blob = exec.lookupBlobUrl(self._url) orelse { - self.handleError(error.BlobNotFound); - return; - }; - - self._response_status = 200; - self._response_url = self._url; - - try self._response_data.appendSlice(self._arena, blob._slice); - self._response_len = blob._slice.len; - - try self.stateChanged(.headers_received, exec); - try self._proto.dispatch(.load_start, .{ .loaded = 0, .total = self._response_len orelse 0 }, exec); - try self.stateChanged(.loading, exec); - try self._proto.dispatch(.progress, .{ - .total = self._response_len orelse 0, - .loaded = self._response_data.items.len, - }, exec); - try self.stateChanged(.done, exec); - - const loaded = self._response_data.items.len; - try self._proto.dispatch(.load, .{ - .total = loaded, - .loaded = loaded, - }, exec); - try self._proto.dispatch(.load_end, .{ - .total = loaded, - .loaded = loaded, - }, exec); -} - pub fn getReadyState(self: *const XMLHttpRequest) u32 { return @intFromEnum(self._ready_state); }