Merge branch 'main' into agent

This commit is contained in:
Adrià Arrufat
2026-04-13 17:00:23 +02:00
19 changed files with 337 additions and 117 deletions

View File

@@ -111,6 +111,7 @@ pub const PageRemove = struct {};
pub const PageNavigate = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
url: [:0]const u8,
@@ -119,6 +120,7 @@ pub const PageNavigate = struct {
pub const PageNavigated = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
url: [:0]const u8,
@@ -127,17 +129,20 @@ pub const PageNavigated = struct {
pub const PageNetworkIdle = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const PageNetworkAlmostIdle = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const PageFrameCreated = struct {
page_id: u32,
frame_id: u32,
parent_id: u32,
timestamp: u64,
@@ -145,12 +150,14 @@ pub const PageFrameCreated = struct {
pub const PageDOMContentLoaded = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
};
pub const PageLoaded = struct {
req_id: u32,
page_id: u32,
frame_id: u32,
timestamp: u64,
};
@@ -343,6 +350,7 @@ test "Notification" {
// noop
notifier.dispatch(.page_navigate, &.{
.page_id = 39,
.frame_id = 0,
.req_id = 1,
.timestamp = 4,
@@ -354,6 +362,7 @@ test "Notification" {
try notifier.register(.page_navigate, &tc, TestClient.pageNavigate);
notifier.dispatch(.page_navigate, &.{
.page_id = 39,
.frame_id = 0,
.req_id = 1,
.timestamp = 4,
@@ -364,6 +373,7 @@ test "Notification" {
notifier.unregisterAll(&tc);
notifier.dispatch(.page_navigate, &.{
.page_id = 39,
.frame_id = 0,
.req_id = 1,
.timestamp = 10,
@@ -375,25 +385,27 @@ test "Notification" {
try notifier.register(.page_navigate, &tc, TestClient.pageNavigate);
try notifier.register(.page_navigated, &tc, TestClient.pageNavigated);
notifier.dispatch(.page_navigate, &.{
.page_id = 39,
.frame_id = 0,
.req_id = 1,
.timestamp = 10,
.url = undefined,
.opts = .{},
});
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 6, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 6, .url = undefined, .opts = .{} });
try testing.expectEqual(14, tc.page_navigate);
try testing.expectEqual(6, tc.page_navigated);
notifier.unregisterAll(&tc);
notifier.dispatch(.page_navigate, &.{
.page_id = 39,
.frame_id = 0,
.req_id = 1,
.timestamp = 100,
.url = undefined,
.opts = .{},
});
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
try testing.expectEqual(14, tc.page_navigate);
try testing.expectEqual(6, tc.page_navigated);
@@ -401,27 +413,27 @@ test "Notification" {
// unregister
try notifier.register(.page_navigate, &tc, TestClient.pageNavigate);
try notifier.register(.page_navigated, &tc, TestClient.pageNavigated);
notifier.dispatch(.page_navigate, &.{ .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigate, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
try testing.expectEqual(114, tc.page_navigate);
try testing.expectEqual(1006, tc.page_navigated);
notifier.unregister(.page_navigate, &tc);
notifier.dispatch(.page_navigate, &.{ .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigate, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
try testing.expectEqual(114, tc.page_navigate);
try testing.expectEqual(2006, tc.page_navigated);
notifier.unregister(.page_navigated, &tc);
notifier.dispatch(.page_navigate, &.{ .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigate, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
try testing.expectEqual(114, tc.page_navigate);
try testing.expectEqual(2006, tc.page_navigated);
// already unregistered, try anyways
notifier.unregister(.page_navigated, &tc);
notifier.dispatch(.page_navigate, &.{ .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigate, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 100, .url = undefined, .opts = .{} });
notifier.dispatch(.page_navigated, &.{ .page_id = 39, .frame_id = 0, .req_id = 1, .timestamp = 1000, .url = undefined, .opts = .{} });
try testing.expectEqual(114, tc.page_navigate);
try testing.expectEqual(2006, tc.page_navigated);
}

View File

@@ -460,6 +460,7 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R
.method = .GET,
.headers = headers,
.blocking = false,
.page_id = req.page_id,
.frame_id = req.frame_id,
.cookie_jar = req.cookie_jar,
.cookie_origin = req.cookie_origin,
@@ -917,17 +918,15 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
// Transfer is done (success or error). Caller (processMessages) owns deinit.
// Return true = done (caller will deinit), false = continues (redirect/auth).
// When the server sends "Connection: close" and closes the TLS
// connection without a close_notify alert, BoringSSL reports
// RecvError. If we already received valid HTTP headers, this is
// a normal end-of-body (the connection closure signals the end
// When the server closes the TLS onnection without a close_notify alert,
// BoringSSL reports RecvError. If we already received valid HTTP headers,
// this is a normal end-of-body (the connection closure signals the end
// of the response per HTTP/1.1 when there is no Content-Length).
// We must check this before endTransfer, which may reset the
// easy handle.
// We must check this before endTransfer, which may reset the easy handle.
const is_conn_close_recv = blk: {
const err = msg.err orelse break :blk false;
if (err != error.RecvError) break :blk false;
const hdr = msg.conn.getResponseHeader("connection", 0) orelse break :blk false;
const hdr = msg.conn.getResponseHeader("connection", 0) orelse break :blk true;
break :blk std.ascii.eqlIgnoreCase(hdr.value, "close");
};
@@ -1069,6 +1068,7 @@ fn ensureNoActiveConnection(self: *const Client) !void {
}
pub const Request = struct {
page_id: u32,
frame_id: u32,
method: Method,
url: [:0]const u8,

View File

@@ -366,9 +366,10 @@ pub fn deinit(self: *Page, abort_http: bool) void {
}
{
var it: ?*std.DoublyLinkedList.Node = self._mutation_observers.first;
while (it) |node| : (it = node.next) {
const observer: *MutationObserver = @fieldParentPtr("node", node);
var node: ?*std.DoublyLinkedList.Node = self._mutation_observers.first;
while (node) |n| {
node = n.next; // capture before we potentially delete observer
const observer: *MutationObserver = @fieldParentPtr("node", n);
observer.releaseRef(session);
}
}
@@ -526,9 +527,10 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
}
session.notification.dispatch(.page_navigate, &.{
.frame_id = self._frame_id,
.req_id = req_id,
.opts = opts,
.req_id = req_id,
.page_id = self.id,
.frame_id = self._frame_id,
.url = request_url,
.timestamp = timestamp(.monotonic),
});
@@ -542,8 +544,9 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
});
session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = req_id,
.page_id = self.id,
.frame_id = self._frame_id,
.opts = .{
.cdp_id = opts.cdp_id,
.reason = opts.reason,
@@ -579,10 +582,11 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
// We dispatch page_navigate event before sending the request.
// It ensures the event page_navigated is not dispatched before this one.
session.notification.dispatch(.page_navigate, &.{
.frame_id = self._frame_id,
.req_id = req_id,
.opts = opts,
.url = self.url,
.req_id = req_id,
.page_id = self.id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
});
@@ -597,6 +601,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
http_client.request(.{
.ctx = self,
.url = self.url,
.page_id = self.id,
.frame_id = self._frame_id,
.method = opts.method,
.headers = headers,
@@ -778,8 +783,9 @@ pub fn _documentIsLoaded(self: *Page) !void {
);
self._session.notification.dispatch(.page_dom_content_loaded, &.{
.frame_id = self._frame_id,
.page_id = self.id,
.req_id = self._req_id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
});
}
@@ -859,8 +865,9 @@ fn _documentIsComplete(self: *Page) !void {
}
self._session.notification.dispatch(.page_loaded, &.{
.frame_id = self._frame_id,
.page_id = self.id,
.req_id = self._req_id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
});
@@ -919,10 +926,11 @@ fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
// "navigating" to about:blank, in which case this notification has
// already been sent
self._session.notification.dispatch(.page_navigated, &.{
.frame_id = self._frame_id,
.req_id = self._req_id,
.opts = no,
.url = self.url,
.page_id = self.id,
.req_id = self._req_id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
});
}
@@ -1184,6 +1192,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *IFrame) !void {
// on first load, dispatch frame_created event
self._session.notification.dispatch(.page_frame_created, &.{
.page_id = self.id,
.frame_id = frame_id,
.parent_id = self._frame_id,
.timestamp = timestamp(.monotonic),
@@ -1545,6 +1554,7 @@ pub fn deliverSlotchangeEvents(self: *Page) void {
pub fn notifyNetworkIdle(self: *Page) void {
lp.assert(self._notified_network_idle == .done, "Page.notifyNetworkIdle", .{});
self._session.notification.dispatch(.page_network_idle, &.{
.page_id = self.id,
.req_id = self._req_id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
@@ -1554,6 +1564,7 @@ pub fn notifyNetworkIdle(self: *Page) void {
pub fn notifyNetworkAlmostIdle(self: *Page) void {
lp.assert(self._notified_network_almost_idle == .done, "Page.notifyNetworkAlmostIdle", .{});
self._session.notification.dispatch(.page_network_almost_idle, &.{
.page_id = self.id,
.req_id = self._req_id,
.frame_id = self._frame_id,
.timestamp = timestamp(.monotonic),
@@ -3580,6 +3591,7 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form
}
const FormData = @import("webapi/net/FormData.zig");
// The submitter can be an input box (if enter was entered on the box)
// I don't think this is technically correct, but FormData handles it ok
const form_data = try FormData.init(form, submitter_, self);
@@ -3587,10 +3599,22 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form
const arena = try self._session.getArena(.medium, "submitForm");
errdefer self._session.releaseArena(arena);
const encoding = form_element.getAttributeSafe(comptime .wrap("enctype"));
const enctype = form_element.getAttributeSafe(comptime .wrap("enctype"));
// Get charset from accept-charset attribute or fall back to document charset
const charset: []const u8 = blk: {
if (form_element.getAttributeSafe(.wrap("accept-charset"))) |ac| {
// Normalize to canonical encoding name
const info = h5e.encoding_for_label(ac.ptr, ac.len);
if (info.isValid()) {
break :blk info.name();
}
}
break :blk self.charset;
};
var buf = std.Io.Writer.Allocating.init(arena);
try form_data.write(encoding, &buf.writer);
try form_data.write(.{ .enctype = enctype, .charset = charset, .allocator = arena }, &buf.writer);
const method = form_element.getAttributeSafe(comptime .wrap("method")) orelse "";
var action = form_element.getAttributeSafe(comptime .wrap("action")) orelse self.url;

View File

@@ -295,6 +295,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
.url = url,
.ctx = script,
.method = .GET,
.page_id = page.id,
.frame_id = page._frame_id,
.headers = try self.getHeaders(),
.blocking = is_blocking,
@@ -410,6 +411,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
.url = url,
.ctx = script,
.method = .GET,
.page_id = page.id,
.frame_id = page._frame_id,
.headers = try self.getHeaders(),
.cookie_jar = &page._session.cookie_jar,
@@ -514,6 +516,7 @@ pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.C
self.client.request(.{
.url = url,
.method = .GET,
.page_id = page.id,
.frame_id = page._frame_id,
.headers = try self.getHeaders(),
.ctx = script,

View File

@@ -360,6 +360,7 @@ fn handleError(comptime T: type, comptime F: type, local: *const Local, err: any
error.TryCatchRethrow => return,
error.InvalidArgument => isolate.createTypeError("invalid argument"),
error.TypeError => isolate.createTypeError(""),
error.RangeError => isolate.createRangeError(""),
error.OutOfMemory => isolate.createError("out of memory"),
error.IllegalConstructor => isolate.createError("Illegal Constructor"),
else => blk: {

View File

@@ -504,3 +504,14 @@
testing.expectEqual(form, capturedSubmitter);
}
</script>
<script id="acceptCharset_property">
{
const form = document.createElement('form');
testing.expectEqual('', form.acceptCharset);
form.acceptCharset = 'big5';
testing.expectEqual('big5', form.acceptCharset);
testing.expectEqual('big5', form.getAttribute('accept-charset'));
}
</script>

View File

@@ -1,4 +1,5 @@
<!DOCTYPE html>
<meta charset="UTF-8">
<script src="../testing.js"></script>
<script>
@@ -85,7 +86,8 @@
usp2.append('spaces', 'hello world');
usp2.append('special', '!@#$%^&*()');
usp2.append('utf8', 'café');
testing.expectEqual('spaces=hello+world&special=%21%40%23%24%25%5E%26*%28%29&utf8=caf%C3%83%C2%A9', usp2.toString());
// UTF-8 encoding of 'é' (U+00E9) is C3 A9, percent-encoded as %C3%A9
testing.expectEqual('spaces=hello+world&special=%21%40%23%24%25%5E%26*%28%29&utf8=caf%C3%A9', usp2.toString());
}
</script>
@@ -413,3 +415,48 @@
testing.expectEqual('3.14', usp.get('pi'));
}
</script>
<script id=utf8Encoding>
{
// Test that UTF-8 characters are properly percent-encoded (not double-encoded)
// 2-byte UTF-8: é (U+00E9) -> C3 A9 -> %C3%A9
const usp1 = new URLSearchParams();
usp1.set('cafe', 'café');
testing.expectEqual('cafe=caf%C3%A9', usp1.toString());
// 3-byte UTF-8: 中 (U+4E2D) -> E4 B8 AD -> %E4%B8%AD
const usp2 = new URLSearchParams();
usp2.set('chinese', '中文');
testing.expectEqual('chinese=%E4%B8%AD%E6%96%87', usp2.toString());
// 3-byte UTF-8: 日 (U+65E5) -> E6 97 A5 -> %E6%97%A5
const usp3 = new URLSearchParams();
usp3.set('japanese', '日本語');
testing.expectEqual('japanese=%E6%97%A5%E6%9C%AC%E8%AA%9E', usp3.toString());
// Mixed ASCII and UTF-8
const usp4 = new URLSearchParams();
usp4.set('mixed', 'hello世界');
testing.expectEqual('mixed=hello%E4%B8%96%E7%95%8C', usp4.toString());
// 4-byte UTF-8: 😀 (U+1F600) -> F0 9F 98 80 -> %F0%9F%98%80
const usp5 = new URLSearchParams();
usp5.set('emoji', '😀');
testing.expectEqual('emoji=%F0%9F%98%80', usp5.toString());
}
</script>
<script id=utf8EncodingRoundtrip>
{
// Test that UTF-8 encoding round-trips correctly
const original = '日本語テスト café 中文 😀';
const usp = new URLSearchParams();
usp.set('test', original);
// Parse the encoded string back
const encoded = usp.toString();
const usp2 = new URLSearchParams(encoded);
testing.expectEqual(original, usp2.get('test'));
}
</script>

View File

@@ -71,7 +71,7 @@ pub const ObserverInit = struct {
};
pub fn init(callback: js.Function.Temp, options: ?ObserverInit, page: *Page) !*IntersectionObserver {
const arena = try page.getArena(.medium, "IntersectionObserver");
const arena = try page.getArena(.small, "IntersectionObserver");
errdefer page.releaseArena(arena);
const opts = options orelse ObserverInit{};

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
@@ -22,6 +22,7 @@ const String = @import("../../string.zig").String;
const js = @import("../js/js.zig");
const Page = @import("../Page.zig");
const h5e = @import("../parser/html5ever.zig");
const Allocator = std.mem.Allocator;
@@ -177,21 +178,24 @@ const URLEncodeMode = enum {
query,
};
pub fn urlEncode(self: *const KeyValueList, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
// URL-encode the key-value pairs.
// For UTF-8 charset, does standard percent encoding.
// For legacy charsets, converts to that encoding with NCR fallback for unmappable chars.
pub fn urlEncode(self: *const KeyValueList, comptime mode: URLEncodeMode, allocator_: ?Allocator, charset: []const u8, writer: *std.Io.Writer) !void {
const entries = self._entries.items;
if (entries.len == 0) {
return;
}
try urlEncodeEntry(entries[0], mode, writer);
try urlEncodeEntry(entries[0], mode, allocator_, charset, writer);
for (entries[1..]) |entry| {
try writer.writeByte('&');
try urlEncodeEntry(entry, mode, writer);
try urlEncodeEntry(entry, mode, allocator_, charset, writer);
}
}
fn urlEncodeEntry(entry: Entry, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
try urlEncodeValue(entry.name.str(), mode, writer);
fn urlEncodeEntry(entry: Entry, comptime mode: URLEncodeMode, allocator_: ?Allocator, charset: []const u8, writer: *std.Io.Writer) !void {
try urlEncodeValue(entry.name.str(), mode, allocator_, charset, writer);
// for a form, for an empty value, we'll do "spice="
// but for a query, we do "spice"
@@ -200,10 +204,58 @@ fn urlEncodeEntry(entry: Entry, comptime mode: URLEncodeMode, writer: *std.Io.Wr
}
try writer.writeByte('=');
try urlEncodeValue(entry.value.str(), mode, writer);
try urlEncodeValue(entry.value.str(), mode, allocator_, charset, writer);
}
fn urlEncodeValue(value: []const u8, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
fn urlEncodeValue(value: []const u8, comptime mode: URLEncodeMode, allocator_: ?Allocator, charset: []const u8, writer: *std.Io.Writer) !void {
// For UTF-8, do standard percent encoding
if (std.mem.eql(u8, charset, "UTF-8")) {
return urlEncodeValueUtf8(value, mode, writer);
}
const allocator = allocator_ orelse return urlEncodeValueUtf8(value, mode, writer);
const enc_info = h5e.encoding_for_label(charset.ptr, charset.len);
if (!enc_info.isValid()) {
// Unknown encoding, fall back to UTF-8
return urlEncodeValueUtf8(value, mode, writer);
}
// Calculate max buffer size for encoded output
// encoding_max_encode_buffer_length doesn't account for NCR expansion,
// so we need extra space. Each UTF-8 char (1-4 bytes) can become &#NNNNNNN; (10 bytes)
const base_len = h5e.encoding_max_encode_buffer_length(enc_info.handle.?, value.len);
if (base_len == 0) {
return urlEncodeValueUtf8(value, mode, writer);
}
// For NCR encoding, each character could expand significantly
// Use 4x the base buffer to be safe (NCRs are ~10 bytes for a 3-byte UTF-8 char)
const max_encoded_len = base_len * 4;
const encode_buf = try allocator.alloc(u8, max_encoded_len);
defer allocator.free(encode_buf);
// Encode UTF-8 to legacy encoding with NCR fallback
const result = h5e.encoding_encode_with_ncr(
enc_info.handle.?,
value.ptr,
value.len,
encode_buf.ptr,
encode_buf.len,
);
if (!result.isSuccess()) {
// Encoding failed, fall back to UTF-8
return urlEncodeValueUtf8(value, mode, writer);
}
// Percent-encode the result, preserving NCRs (& and ; must be encoded)
const encoded_bytes = encode_buf[0..result.bytes_written];
return urlEncodeValueLegacy(encoded_bytes, mode, writer);
}
/// Percent-encode a UTF-8 value - bytes >= 0x80 are percent-encoded directly.
fn urlEncodeValueUtf8(value: []const u8, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
if (!urlEncodeShouldEscape(value, mode)) {
return writer.writeAll(value);
}
@@ -213,13 +265,22 @@ fn urlEncodeValue(value: []const u8, comptime mode: URLEncodeMode, writer: *std.
try writer.writeByte(b);
} else if (b == ' ') {
try writer.writeByte('+');
} else if (b >= 0x80) {
// Double-encode: treat byte as Latin-1 code point, encode to UTF-8, then percent-encode
// For bytes 0x80-0xFF (U+0080 to U+00FF), UTF-8 encoding is 2 bytes:
// [0xC0 | (b >> 6), 0x80 | (b & 0x3F)]
const byte1 = 0xC0 | (b >> 6);
const byte2 = 0x80 | (b & 0x3F);
try writer.print("%{X:0>2}%{X:0>2}", .{ byte1, byte2 });
} else {
try writer.print("%{X:0>2}", .{b});
}
}
}
/// Percent-encode a legacy-encoded value - must also encode & and ; to preserve NCRs.
fn urlEncodeValueLegacy(value: []const u8, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
for (value) |b| {
if (urlEncodeUnreserved(b, mode)) {
try writer.writeByte(b);
} else if (b == ' ') {
try writer.writeByte('+');
} else if (b == '&' or b == ';') {
// Must encode & and ; to preserve NCRs like &#12345;
try writer.print("%{X:0>2}", .{b});
} else {
try writer.print("%{X:0>2}", .{b});
}
@@ -281,3 +342,71 @@ const GenericIterator = @import("collections/iterator.zig").Entry;
pub const KeyIterator = GenericIterator(Iterator, "0");
pub const ValueIterator = GenericIterator(Iterator, "1");
pub const EntryIterator = GenericIterator(Iterator, null);
const testing = @import("../../testing.zig");
test "KeyValueList: urlEncode UTF-8" {
// Test that UTF-8 characters are properly percent-encoded (not double-encoded)
const allocator = testing.arena_allocator;
var list = KeyValueList.init();
try list.append(allocator, "cafe", "café"); // é = C3 A9 in UTF-8
var buf = std.Io.Writer.Allocating.init(allocator);
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
// é (U+00E9) in UTF-8 is C3 A9, percent-encoded as %C3%A9
try testing.expectString("cafe=caf%C3%A9", buf.written());
}
test "KeyValueList: urlEncode UTF-8 CJK" {
// Test 3-byte UTF-8 characters (Chinese/Japanese)
const allocator = testing.arena_allocator;
var list = KeyValueList.init();
try list.append(allocator, "text", "中文"); // 中 = E4 B8 AD, 文 = E6 96 87
var buf = std.Io.Writer.Allocating.init(allocator);
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
try testing.expectString("text=%E4%B8%AD%E6%96%87", buf.written());
}
test "KeyValueList: urlEncode GBK with NCR fallback" {
// Test legacy encoding with NCR fallback for unmappable characters
// U+3D34 (㴴) is NOT in GBK, should become &#15668;
const allocator = testing.arena_allocator;
var list = KeyValueList.init();
try list.append(allocator, "q", "\u{3D34}");
var buf = std.Io.Writer.Allocating.init(allocator);
try list.urlEncode(.form, allocator, "GBK", &buf.writer);
// &#15668; percent-encoded is %26%2315668%3B
try testing.expectString("q=%26%2315668%3B", buf.written());
}
test "KeyValueList: urlEncode GBK mappable character" {
// Test legacy encoding with a character that IS in GBK
// U+4E2D (中) IS in GBK, should encode to GBK bytes D6 D0
const allocator = testing.arena_allocator;
var list = KeyValueList.init();
try list.append(allocator, "q", "");
var buf = std.Io.Writer.Allocating.init(allocator);
try list.urlEncode(.form, allocator, "GBK", &buf.writer);
// GBK encoding of 中 is D6 D0, percent-encoded as %D6%D0
try testing.expectString("q=%D6%D0", buf.written());
}
test "KeyValueList: urlEncode Big5 unmappable character" {
// U+70A3 (炣) is NOT in Big5, should become &#28835;
const allocator = testing.arena_allocator;
var list = KeyValueList.init();
try list.append(allocator, "q", "\u{70A3}");
var buf = std.Io.Writer.Allocating.init(allocator);
try list.urlEncode(.form, allocator, "Big5", &buf.writer);
// &#28835; percent-encoded is %26%2328835%3B
try testing.expectString("q=%26%2328835%3B", buf.written());
}

View File

@@ -76,9 +76,8 @@ pub const ObserveOptions = struct {
};
pub fn init(callback: js.Function.Temp, page: *Page) !*MutationObserver {
const arena = try page.getArena(.medium, "MutationObserver");
const arena = try page.getArena(.small, "MutationObserver");
errdefer page.releaseArena(arena);
const self = try arena.create(MutationObserver);
self.* = .{
._arena = arena,

View File

@@ -112,6 +112,14 @@ pub fn setTarget(self: *Form, value: []const u8, page: *Page) !void {
try self.asElement().setAttributeSafe(comptime .wrap("target"), .wrap(value), page);
}
pub fn getAcceptCharset(self: *Form) []const u8 {
return self.asElement().getAttributeSafe(.wrap("accept-charset")) orelse "";
}
pub fn setAcceptCharset(self: *Form, value: []const u8, page: *Page) !void {
try self.asElement().setAttributeSafe(.wrap("accept-charset"), .wrap(value), page);
}
pub fn getLength(self: *Form, page: *Page) !u32 {
const elements = try self.getElements(page);
return elements.length(page);
@@ -174,6 +182,7 @@ pub const JsApi = struct {
pub const method = bridge.accessor(Form.getMethod, Form.setMethod, .{});
pub const action = bridge.accessor(Form.getAction, Form.setAction, .{});
pub const target = bridge.accessor(Form.getTarget, Form.setTarget, .{});
pub const acceptCharset = bridge.accessor(Form.getAcceptCharset, Form.setAcceptCharset, .{});
pub const elements = bridge.accessor(Form.getElements, null, .{});
pub const length = bridge.accessor(Form.getLength, null, .{});
pub const submit = bridge.function(Form.submit, .{});

View File

@@ -90,6 +90,7 @@ pub fn init(input: Input, options: ?InitOpts, page: *Page) !js.Promise {
.ctx = fetch,
.url = request._url,
.method = request._method,
.page_id = page.id,
.frame_id = page._frame_id,
.body = request._body,
.headers = headers,

View File

@@ -99,17 +99,23 @@ pub fn forEach(self: *FormData, cb_: js.Function, js_this_: ?js.Object) !void {
}
}
pub fn write(self: *const FormData, encoding_: ?[]const u8, writer: *std.Io.Writer) !void {
const encoding = encoding_ orelse {
return self._list.urlEncode(.form, writer);
pub const WriteOpts = struct {
enctype: ?[]const u8 = null,
charset: []const u8 = "UTF-8",
allocator: ?std.mem.Allocator = null,
};
pub fn write(self: *const FormData, opts: WriteOpts, writer: *std.Io.Writer) !void {
const enctype = opts.enctype orelse {
return self._list.urlEncode(.form, opts.allocator, opts.charset, writer);
};
if (std.ascii.eqlIgnoreCase(encoding, "application/x-www-form-urlencoded")) {
return self._list.urlEncode(.form, writer);
if (std.ascii.eqlIgnoreCase(enctype, "application/x-www-form-urlencoded")) {
return self._list.urlEncode(.form, opts.allocator, opts.charset, writer);
}
log.warn(.not_implemented, "FormData.encoding", .{
.encoding = encoding,
.encoding = enctype,
});
}

View File

@@ -112,7 +112,8 @@ pub fn entries(self: *URLSearchParams, page: *Page) !*KeyValueList.EntryIterator
}
pub fn toString(self: *const URLSearchParams, writer: *std.Io.Writer) !void {
return self._params.urlEncode(.query, writer);
// URLSearchParams always uses UTF-8 per the URL Standard
return self._params.urlEncode(.query, null, "UTF-8", writer);
}
pub fn format(self: *const URLSearchParams, writer: *std.Io.Writer) !void {
@@ -280,34 +281,6 @@ inline fn decodeHex(char: u8) u8 {
return @as([*]const u8, @ptrFromInt((@intFromPtr(&HEX_DECODE_ARRAY) - @as(usize, '0'))))[char];
}
fn escape(input: []const u8, writer: *std.Io.Writer) !void {
for (input) |c| {
if (isUnreserved(c)) {
try writer.writeByte(c);
} else if (c == ' ') {
try writer.writeByte('+');
} else if (c == '*') {
try writer.writeByte('*');
} else if (c >= 0x80) {
// Double-encode: treat byte as Latin-1 code point, encode to UTF-8, then percent-encode
// For bytes 0x80-0xFF (U+0080 to U+00FF), UTF-8 encoding is 2 bytes:
// [0xC0 | (c >> 6), 0x80 | (c & 0x3F)]
const byte1 = 0xC0 | (c >> 6);
const byte2 = 0x80 | (c & 0x3F);
try writer.print("%{X:0>2}%{X:0>2}", .{ byte1, byte2 });
} else {
try writer.print("%{X:0>2}", .{c});
}
}
}
fn isUnreserved(c: u8) bool {
return switch (c) {
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_' => true,
else => false,
};
}
pub const Iterator = struct {
index: u32 = 0,
list: *const URLSearchParams,

View File

@@ -260,6 +260,7 @@ pub fn send(self: *XMLHttpRequest, body_: ?[]const u8) !void {
.url = self._url,
.method = self._method,
.headers = headers,
.page_id = page.id,
.frame_id = page._frame_id,
.body = self._request_body,
.cookie_jar = if (cookie_support) &page._session.cookie_jar else null,

View File

@@ -204,7 +204,7 @@ pub fn requestIntercept(bc: *CDP.BrowserContext, intercept: *const Notification.
.document => "Document",
.fetch => "Fetch",
},
.networkId = &id.toRequestId(transfer.id), // matches the Network REQ-ID
.networkId = &id.toRequestId(transfer), // matches the Network REQ-ID
}, .{ .session_id = session_id });
log.debug(.cdp, "request intercept", .{
@@ -414,7 +414,7 @@ pub fn requestAuthRequired(bc: *CDP.BrowserContext, intercept: *const Notificati
.scheme = if (challenge.scheme) |s| (if (s == .digest) "digest" else "basic") else "",
.realm = challenge.realm orelse "",
},
.networkId = &id.toRequestId(transfer.id),
.networkId = &id.toRequestId(transfer),
}, .{ .session_id = session_id });
log.debug(.cdp, "request auth required", .{

View File

@@ -242,7 +242,7 @@ pub fn httpRequestFail(bc: *CDP.BrowserContext, msg: *const Notification.Request
// We're missing a bunch of fields, but, for now, this seems like enough
try bc.cdp.sendEvent("Network.loadingFailed", .{
.requestId = &id.toRequestId(msg.transfer.id),
.requestId = &id.toRequestId(msg.transfer),
// Seems to be what chrome answers with. I assume it depends on the type of error?
.type = "Ping",
.errorText = msg.err,
@@ -265,10 +265,10 @@ pub fn httpRequestStart(bc: *CDP.BrowserContext, msg: *const Notification.Reques
try req.headers.add(extra);
}
// We're missing a bunch of fields, but, for now, this seems like enough
// We're missing a bunch of fields, but, for now, this eems like enough
try bc.cdp.sendEvent("Network.requestWillBeSent", .{
.loaderId = &id.toLoaderId(transfer.id),
.requestId = &id.toRequestId(transfer.id),
.loaderId = &id.toLoaderId(req.page_id),
.requestId = &id.toRequestId(transfer),
.frameId = &id.toFrameId(frame_id),
.type = req.resource_type.string(),
.documentURL = page.url,
@@ -285,13 +285,14 @@ pub fn httpResponseHeaderDone(arena: Allocator, bc: *CDP.BrowserContext, msg: *c
const session_id = bc.session_id orelse return;
const transfer = msg.transfer;
const req = &transfer.req;
// We're missing a bunch of fields, but, for now, this seems like enough
try bc.cdp.sendEvent("Network.responseReceived", .{
.loaderId = &id.toLoaderId(transfer.id),
.requestId = &id.toRequestId(transfer.id),
.frameId = &id.toFrameId(transfer.req.frame_id),
.response = TransferAsResponseWriter.init(arena, msg.transfer),
.loaderId = &id.toLoaderId(req.page_id),
.requestId = &id.toRequestId(transfer),
.frameId = &id.toFrameId(req.frame_id),
.response = TransferAsResponseWriter.init(arena, transfer),
.hasExtraInfo = false, // TODO change after adding Network.responseReceivedExtraInfo
}, .{ .session_id = session_id });
}
@@ -302,7 +303,7 @@ pub fn httpRequestDone(bc: *CDP.BrowserContext, msg: *const Notification.Request
const session_id = bc.session_id orelse return;
const transfer = msg.transfer;
try bc.cdp.sendEvent("Network.loadingFinished", .{
.requestId = &id.toRequestId(transfer.id),
.requestId = &id.toRequestId(transfer),
.encodedDataLength = transfer.bytes_received,
}, .{ .session_id = session_id });
}

View File

@@ -134,7 +134,7 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void {
if (page._load_state == .complete) {
const frame_id = &id.toFrameId(page._frame_id);
const loader_id = &id.toLoaderId(page._req_id);
const loader_id = &id.toLoaderId(page.id);
const now = timestampF(.monotonic);
try sendPageLifecycle(bc, "DOMContentLoaded", now, frame_id, loader_id);
@@ -331,7 +331,7 @@ pub fn pageNavigate(bc: *CDP.BrowserContext, event: *const Notification.PageNavi
bc.reset();
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
const loader_id = &id.toLoaderId(event.page_id);
var cdp = bc.cdp;
const reason_: ?[]const u8 = switch (event.opts.reason) {
@@ -414,7 +414,7 @@ pub fn pageFrameCreated(bc: *CDP.BrowserContext, event: *const Notification.Page
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.name = "init",
.frameId = frame_id,
.loaderId = &id.toLoaderId(event.frame_id),
.loaderId = &id.toLoaderId(event.page_id),
.timestamp = event.timestamp,
}, .{ .session_id = session_id });
}
@@ -426,7 +426,7 @@ pub fn pageNavigated(arena: Allocator, bc: *CDP.BrowserContext, event: *const No
const session_id = bc.session_id orelse return;
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
const loader_id = &id.toLoaderId(event.page_id);
var cdp = bc.cdp;
@@ -585,7 +585,7 @@ pub fn pageDOMContentLoaded(bc: anytype, event: *const Notification.PageDOMConte
if (bc.page_life_cycle_events) {
const frame_id = &id.toFrameId(event.frame_id);
const loader_id = &id.toLoaderId(event.req_id);
const loader_id = &id.toLoaderId(event.page_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "DOMContentLoaded",
@@ -609,7 +609,7 @@ pub fn pageLoaded(bc: anytype, event: *const Notification.PageLoaded) !void {
);
if (bc.page_life_cycle_events) {
const loader_id = &id.toLoaderId(event.req_id);
const loader_id = &id.toLoaderId(event.page_id);
try cdp.sendEvent("Page.lifecycleEvent", LifecycleEvent{
.timestamp = timestamp,
.name = "load",
@@ -624,11 +624,11 @@ pub fn pageLoaded(bc: anytype, event: *const Notification.PageLoaded) !void {
}
pub fn pageNetworkIdle(bc: *CDP.BrowserContext, event: *const Notification.PageNetworkIdle) !void {
return sendPageLifecycle(bc, "networkIdle", event.timestamp, &id.toFrameId(event.frame_id), &id.toLoaderId(event.req_id));
return sendPageLifecycle(bc, "networkIdle", event.timestamp, &id.toFrameId(event.frame_id), &id.toLoaderId(event.page_id));
}
pub fn pageNetworkAlmostIdle(bc: *CDP.BrowserContext, event: *const Notification.PageNetworkAlmostIdle) !void {
return sendPageLifecycle(bc, "networkAlmostIdle", event.timestamp, &id.toFrameId(event.frame_id), &id.toLoaderId(event.req_id));
return sendPageLifecycle(bc, "networkAlmostIdle", event.timestamp, &id.toFrameId(event.frame_id), &id.toLoaderId(event.page_id));
}
fn sendPageLifecycle(bc: *CDP.BrowserContext, name: []const u8, timestamp: u64, frame_id: []const u8, loader_id: []const u8) !void {

View File

@@ -31,27 +31,35 @@ pub fn toPageId(comptime id_type: enum { frame_id, loader_id }, input: []const u
return std.fmt.parseInt(u32, input[4..], 10) catch err;
}
pub fn toFrameId(page_id: u32) [14]u8 {
pub fn toFrameId(id: u32) [14]u8 {
var buf: [14]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "FID-{d:0>10}", .{page_id}) catch unreachable;
_ = std.fmt.bufPrint(&buf, "FID-{d:0>10}", .{id}) catch unreachable;
return buf;
}
pub fn toLoaderId(page_id: u32) [14]u8 {
pub fn toLoaderId(id: u32) [14]u8 {
var buf: [14]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "LID-{d:0>10}", .{page_id}) catch unreachable;
_ = std.fmt.bufPrint(&buf, "LID-{d:0>10}", .{id}) catch unreachable;
return buf;
}
pub fn toRequestId(page_id: u32) [14]u8 {
// requestId has special requirements. If it's the main document navigation,
// then it should match the loader id.
const Transfer = @import("../browser/HttpClient.zig").Transfer;
pub fn toRequestId(transfer: *const Transfer) [14]u8 {
const req = transfer.req;
if (req.resource_type == .document) {
return toLoaderId(req.page_id);
}
var buf: [14]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{page_id}) catch unreachable;
_ = std.fmt.bufPrint(&buf, "REQ-{d:0>10}", .{transfer.id}) catch unreachable;
return buf;
}
pub fn toInterceptId(page_id: u32) [14]u8 {
pub fn toInterceptId(id: u32) [14]u8 {
var buf: [14]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "INT-{d:0>10}", .{page_id}) catch unreachable;
_ = std.fmt.bufPrint(&buf, "INT-{d:0>10}", .{id}) catch unreachable;
return buf;
}
@@ -172,11 +180,6 @@ test "id: toLoaderId" {
try testing.expectEqual("LID-4294967295", toLoaderId(4294967295));
}
test "id: toRequestId" {
try testing.expectEqual("REQ-0000000000", toRequestId(0));
try testing.expectEqual("REQ-4294967295", toRequestId(4294967295));
}
test "id: toInterceptId" {
try testing.expectEqual("INT-0000000000", toInterceptId(0));
try testing.expectEqual("INT-4294967295", toInterceptId(4294967295));