From f5cfc4d31577273336e3bf3d251d96257f0a3e92 Mon Sep 17 00:00:00 2001 From: Lucien Coffe Date: Mon, 23 Mar 2026 14:17:50 +0100 Subject: [PATCH 01/21] feat: add --block_private_networks and --block_cidrs CLI flags Block outbound HTTP requests to specified IP ranges before TCP handshake using libcurl CURLOPT_OPENSOCKETFUNCTION callback. Fires after DNS resolution, reads resolved IP directly from sockaddr, does bitwise CIDR comparison. Fail-closed: unknown address families are blocked. --block_private_networks blocks RFC1918, localhost, link-local, ULA. --block_cidrs blocks additional comma-separated CIDRs. IPv4-mapped IPv6 (::ffff:x.x.x.x) is unwrapped to prevent bypass. --- src/Config.zig | 44 +++++ src/network/IpFilter.zig | 407 +++++++++++++++++++++++++++++++++++++++ src/network/Network.zig | 49 ++++- src/network/http.zig | 96 ++++++++- src/sys/libcurl.zig | 58 ++++++ 5 files changed, 649 insertions(+), 5 deletions(-) create mode 100644 src/network/IpFilter.zig diff --git a/src/Config.zig b/src/Config.zig index 25e033e3..10520b79 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -205,6 +205,20 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig { }; } +pub fn blockPrivateNetworks(self: *const Config) bool { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.block_private_networks, + else => unreachable, + }; +} + +pub fn blockCidrs(self: *const Config) ?[]const u8 { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.block_cidrs, + else => unreachable, + }; +} + pub fn maxConnections(self: *const Config) u16 { return switch (self.mode) { .serve => |opts| opts.cdp_max_connections, @@ -292,6 +306,9 @@ pub const Common = struct { web_bot_auth_key_file: ?[]const u8 = null, web_bot_auth_keyid: ?[]const u8 = null, web_bot_auth_domain: ?[]const u8 = null, + + block_private_networks: bool = false, + block_cidrs: ?[]const u8 = null, }; /// Pre-formatted HTTP headers for reuse across Http and Client. @@ -351,6 +368,19 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ we make requests towards. \\ Defaults to false. \\ + \\--block_private_networks + \\ Blocks HTTP requests to private/internal IP addresses + \\ after DNS resolution. Useful for sandboxing, multi-tenant + \\ deployments, and preventing access to internal infrastructure + \\ regardless of what triggers the request (JavaScript, HTML + \\ resources, redirects, etc.). + \\ Defaults to false. + \\ + \\--block_cidrs + \\ Additional CIDR ranges to block, comma-separated. + \\ e.g. --block_cidrs 169.254.169.254/32,fd00:ec2::254/128 + \\ Can be used standalone or combined with --block_private_networks. + \\ \\--http-proxy The HTTP proxy to use for all HTTP requests. \\ A username:password can be included for basic authentication. \\ Defaults to none. @@ -1094,5 +1124,19 @@ fn parseCommonArg( return true; } + if (std.mem.eql(u8, "--block_private_networks", opt)) { + common.block_private_networks = true; + return true; + } + + if (std.mem.eql(u8, "--block_cidrs", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--block_cidrs" }); + return error.InvalidArgument; + }; + common.block_cidrs = try allocator.dupe(u8, str); + return true; + } + return false; } diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig new file mode 100644 index 00000000..f44b9b07 --- /dev/null +++ b/src/network/IpFilter.zig @@ -0,0 +1,407 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const posix = std.posix; +const libcurl = @import("../sys/libcurl.zig"); + +const IpFilter = @This(); + +/// Binary representation for bitwise CIDR comparison. +pub const Ipv4Addr = [4]u8; +pub const Ipv6Addr = [16]u8; + +pub const CidrV4 = struct { + network: Ipv4Addr, + prefix_len: u6, // 0-32 +}; + +pub const CidrV6 = struct { + network: Ipv6Addr, + prefix_len: u8, // 0-128 +}; + +// IpFilter fields +block_private: bool, +custom_v4: []const CidrV4, +custom_v6: []const CidrV6, + +// ── Comptime helpers ───────────────────────────────────────────────────────── + +/// Comptime helper: parse dotted-decimal IPv4 to [4]u8. +fn parseIpv4Comptime(comptime s: []const u8) Ipv4Addr { + var result: Ipv4Addr = undefined; + var octet: u8 = 0; + var octet_idx: usize = 0; + for (s) |ch| { + if (ch == '.') { + result[octet_idx] = octet; + octet_idx += 1; + octet = 0; + } else { + octet = octet * 10 + (ch - '0'); + } + } + result[octet_idx] = octet; + return result; +} + +/// Comptime helper: build a CidrV4. +fn makeCidrV4(comptime addr: []const u8, comptime prefix: u6) CidrV4 { + return .{ .network = parseIpv4Comptime(addr), .prefix_len = prefix }; +} + +/// Comptime helper: build a CidrV6 from a 16-byte literal array. +fn makeCidrV6(comptime bytes: Ipv6Addr, comptime prefix: u8) CidrV6 { + return .{ .network = bytes, .prefix_len = prefix }; +} + +// ── Comptime CIDR range tables ─────────────────────────────────────────────── + +const PRIVATE_V4 = [_]CidrV4{ + makeCidrV4("127.0.0.0", 8), // localhost + makeCidrV4("0.0.0.0", 8), // current network + makeCidrV4("10.0.0.0", 8), // RFC1918 + makeCidrV4("172.16.0.0", 12), // RFC1918 + makeCidrV4("192.168.0.0", 16), // RFC1918 + makeCidrV4("169.254.0.0", 16), // link-local +}; + +const PRIVATE_V6 = [_]CidrV6{ + // ::1/128 — IPv6 localhost + makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128), + // fe80::/10 — link-local + makeCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10), + // fc00::/7 — ULA + makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 7), +}; + +// ── Runtime IP parsing ─────────────────────────────────────────────────────── + +/// Parse dotted-decimal IPv4 string to 4-byte array. Returns null on parse failure. +fn parseIpv4(str: []const u8) ?Ipv4Addr { + var addr: Ipv4Addr = undefined; + var it = std.mem.splitScalar(u8, str, '.'); + var i: usize = 0; + while (it.next()) |part| : (i += 1) { + if (i >= 4) return null; + addr[i] = std.fmt.parseInt(u8, part, 10) catch return null; + } + if (i != 4) return null; + return addr; +} + +/// Parse IPv6 string to 16-byte array. Handles compressed notation. +/// Strips zone ID (e.g. "fe80::1%eth0" -> "fe80::1"). +/// Returns null on parse failure. +fn parseIpv6(str: []const u8) ?Ipv6Addr { + // Strip zone ID + const clean = if (std.mem.indexOfScalar(u8, str, '%')) |idx| str[0..idx] else str; + const parsed = std.net.Address.parseIp6(clean, 0) catch return null; + return parsed.in6.sa.addr; +} + +// ── CIDR matching ──────────────────────────────────────────────────────────── + +/// Detect IPv4-mapped IPv6 address (::ffff:x.x.x.x). +/// Returns the embedded IPv4 address if detected, null otherwise. +fn isIpv4Mapped(addr: Ipv6Addr) ?Ipv4Addr { + // IPv4-mapped prefix: 10 zero bytes + 2 0xFF bytes + const prefix = [12]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; + if (!std.mem.eql(u8, addr[0..12], &prefix)) return null; + return addr[12..16].*; +} + +/// Check if IPv4 address falls within a CIDR range. +fn matchesCidrV4(addr: Ipv4Addr, cidr: CidrV4) bool { + if (cidr.prefix_len == 0) return true; + const full_bytes: usize = cidr.prefix_len / 8; + const rem_bits: u4 = @intCast(cidr.prefix_len % 8); + + var i: usize = 0; + // Check full bytes + while (i < full_bytes) : (i += 1) { + if (addr[i] != cidr.network[i]) return false; + } + // Check partial byte (if any) + if (rem_bits > 0 and i < 4) { + const shift: u3 = @intCast(8 - rem_bits); + const mask: u8 = @as(u8, 0xFF) << shift; + if ((addr[i] & mask) != (cidr.network[i] & mask)) return false; + } + return true; +} + +/// Check if IPv6 address falls within a CIDR range. +fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool { + if (cidr.prefix_len == 0) return true; + const full_bytes: usize = cidr.prefix_len / 8; + const rem_bits: u4 = @intCast(cidr.prefix_len % 8); + + var i: usize = 0; + while (i < full_bytes) : (i += 1) { + if (addr[i] != cidr.network[i]) return false; + } + if (rem_bits > 0 and i < 16) { + const shift: u3 = @intCast(8 - rem_bits); + const mask: u8 = @as(u8, 0xFF) << shift; + if ((addr[i] & mask) != (cidr.network[i] & mask)) return false; + } + return true; +} + +// ── Public API ─────────────────────────────────────────────────────────────── + +pub const ParsedCidrs = struct { v4: []CidrV4, v6: []CidrV6 }; + +/// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32") +/// into separate IPv4 and IPv6 slices. Caller owns the returned slices and must +/// free them with the same allocator. Returns error.InvalidCidr on any malformed entry. +pub fn parseCidrList( + allocator: std.mem.Allocator, + cidr_str: []const u8, +) !ParsedCidrs { + var v4_list: std.ArrayList(CidrV4) = .empty; + errdefer v4_list.deinit(allocator); + var v6_list: std.ArrayList(CidrV6) = .empty; + errdefer v6_list.deinit(allocator); + + var it = std.mem.splitScalar(u8, cidr_str, ','); + while (it.next()) |entry| { + const trimmed = std.mem.trim(u8, entry, " \t"); + if (trimmed.len == 0) continue; + + const slash = std.mem.indexOfScalar(u8, trimmed, '/') orelse return error.InvalidCidr; + const addr_str = trimmed[0..slash]; + const prefix_str = trimmed[slash + 1 ..]; + + if (parseIpv4(addr_str)) |v4| { + const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; + if (prefix > 32) return error.InvalidCidr; + try v4_list.append(allocator, .{ .network = v4, .prefix_len = @intCast(prefix) }); + } else if (parseIpv6(addr_str)) |v6| { + const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; + if (prefix > 128) return error.InvalidCidr; + try v6_list.append(allocator, .{ .network = v6, .prefix_len = prefix }); + } else { + return error.InvalidCidr; + } + } + + const v4 = try v4_list.toOwnedSlice(allocator); + errdefer allocator.free(v4); + const v6 = try v6_list.toOwnedSlice(allocator); + return .{ .v4 = v4, .v6 = v6 }; +} + +/// Create an IpFilter. Set block_private to block outbound requests to +/// RFC1918, localhost, link-local, and ULA ranges — useful for sandboxing +/// and preventing access to internal infrastructure. custom_v4/custom_v6 +/// are additional user-defined ranges (caller owns the slices). +pub fn init(block_private: bool, custom_v4: []const CidrV4, custom_v6: []const CidrV6) IpFilter { + return .{ + .block_private = block_private, + .custom_v4 = custom_v4, + .custom_v6 = custom_v6, + }; +} + +fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { + if (self.block_private) { + for (PRIVATE_V4) |cidr| { + if (matchesCidrV4(addr, cidr)) return true; + } + } + for (self.custom_v4) |cidr| { + if (matchesCidrV4(addr, cidr)) return true; + } + return false; +} + +fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool { + if (self.block_private) { + for (PRIVATE_V6) |cidr| { + if (matchesCidrV6(addr, cidr)) return true; + } + } + for (self.custom_v6) |cidr| { + if (matchesCidrV6(addr, cidr)) return true; + } + return false; +} + +/// Check if an address from curl's opensocket callback should be blocked. +/// Extracts the IP directly from the sockaddr structure; no string parsing needed. +/// Fail-closed: unknown address family -> true (blocked). +pub fn isBlockedSockaddr(self: *const IpFilter, sa: *const libcurl.CurlSockAddr) bool { + switch (sa.family) { + posix.AF.INET => { + const sin: *const posix.sockaddr.in = @ptrCast(&sa.addr); + // sin.addr is in network byte order (big-endian); convert to host bytes + const bytes: [4]u8 = @bitCast(sin.addr); + return self.isBlockedV4(bytes); + }, + posix.AF.INET6 => { + const sin6: *const posix.sockaddr.in6 = @ptrCast(&sa.addr); + const addr: Ipv6Addr = sin6.addr; + if (isIpv4Mapped(addr)) |v4| return self.isBlockedV4(v4); + return self.isBlockedV6(addr); + }, + else => return true, // unknown family -> fail-closed + } +} + +// ── Unit tests ─────────────────────────────────────────────────────────────── + +/// Test-only convenience: parse an IP string and check against the filter. +/// Test inputs must be valid IPs; unreachable on parse failure. +fn testBlocked(self: *const IpFilter, ip: []const u8) bool { + if (parseIpv4(ip)) |v4| return self.isBlockedV4(v4); + if (parseIpv6(ip)) |v6| { + if (isIpv4Mapped(v6)) |v4| return self.isBlockedV4(v4); + return self.isBlockedV6(v6); + } + unreachable; +} + +test "IPv4 CIDR matching: private group boundaries" { + const filter = IpFilter.init(true, &.{}, &.{}); + const t = std.testing; + + // Loopback + try t.expect(filter.testBlocked("127.0.0.1")); + try t.expect(filter.testBlocked("127.255.255.255")); + try t.expect(!filter.testBlocked("128.0.0.1")); + + // RFC1918 10.0.0.0/8 + try t.expect(filter.testBlocked("10.0.0.1")); + try t.expect(filter.testBlocked("10.255.255.255")); + try t.expect(!filter.testBlocked("11.0.0.0")); + + // RFC1918 172.16.0.0/12 — critical boundary + try t.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block + try t.expect(filter.testBlocked("172.16.0.0")); // MUST block + try t.expect(filter.testBlocked("172.31.255.255")); // MUST block + try t.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block + + // RFC1918 192.168.0.0/16 + try t.expect(filter.testBlocked("192.168.0.1")); + try t.expect(!filter.testBlocked("192.169.0.0")); + + // Link-local + try t.expect(filter.testBlocked("169.254.1.1")); + try t.expect(!filter.testBlocked("169.255.0.0")); + + // Public IP — must NOT be blocked + try t.expect(!filter.testBlocked("8.8.8.8")); + try t.expect(!filter.testBlocked("1.1.1.1")); + try t.expect(!filter.testBlocked("93.184.216.34")); // example.com +} + +test "IPv6 CIDR matching: private group" { + const filter = IpFilter.init(true, &.{}, &.{}); + const t = std.testing; + + try t.expect(filter.testBlocked("::1")); // localhost + try t.expect(filter.testBlocked("fe80::1")); // link-local + try t.expect(filter.testBlocked("fc00::1")); // ULA + try t.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7) + try t.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public + try t.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare +} + +test "IPv4-mapped IPv6 bypass prevention" { + const filter = IpFilter.init(true, &.{}, &.{}); + const t = std.testing; + + // ::ffff:127.0.0.1 must be blocked (maps to loopback) + try t.expect(filter.testBlocked("::ffff:127.0.0.1")); + // ::ffff:10.0.0.1 must be blocked (maps to RFC1918) + try t.expect(filter.testBlocked("::ffff:10.0.0.1")); + // ::ffff:8.8.8.8 must NOT be blocked (maps to public) + try t.expect(!filter.testBlocked("::ffff:8.8.8.8")); +} + +test "fail-closed: unknown address family blocked by isBlockedSockaddr" { + const filter = IpFilter.init(false, &.{}, &.{}); + const t = std.testing; + + // Construct a sockaddr with an unknown address family + var sa: libcurl.CurlSockAddr = .{ + .family = 255, // not AF_INET or AF_INET6 + .socktype = posix.SOCK.STREAM, + .protocol = 0, + .addrlen = 0, + .addr = undefined, + }; + try t.expect(filter.isBlockedSockaddr(&sa)); +} + +test "custom CIDR ranges" { + const custom_v4 = [_]CidrV4{ + .{ .network = .{ 203, 0, 113, 0 }, .prefix_len = 24 }, // TEST-NET-3 + }; + const filter = IpFilter.init(false, &custom_v4, &.{}); + const t = std.testing; + + try t.expect(filter.testBlocked("203.0.113.1")); // in custom range + try t.expect(filter.testBlocked("203.0.113.255")); // in custom range + try t.expect(!filter.testBlocked("203.0.114.0")); // outside custom range + try t.expect(!filter.testBlocked("8.8.8.8")); // not in range +} + +test "private group blocks cloud metadata IP via link-local" { + // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. + // Users who want targeted cloud-metadata-only blocking can use --block_cidrs. + const filter_private = IpFilter.init(true, &.{}, &.{}); + const filter_none = IpFilter.init(false, &.{}, &.{}); + const t = std.testing; + + try t.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local + try t.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled +} + +test "parseCidrList: mixed IPv4 and IPv6" { + const t = std.testing; + const result = try parseCidrList(t.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); + defer t.allocator.free(result.v4); + defer t.allocator.free(result.v6); + + try t.expectEqual(2, result.v4.len); + try t.expectEqual(1, result.v6.len); + + // spot-check: 203.0.113.0/24 and 192.168.1.0/24 + const f = IpFilter.init(false, result.v4, result.v6); + try t.expect(f.testBlocked("203.0.113.1")); + try t.expect(!f.testBlocked("203.0.114.0")); + try t.expect(f.testBlocked("192.168.1.1")); + try t.expect(f.testBlocked("2001:db8::1")); + try t.expect(!f.testBlocked("2001:db9::1")); +} + +test "parseCidrList: invalid input returns error" { + const t = std.testing; + try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "not-a-cidr")); + try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0/33")); // prefix too large + try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0")); // missing prefix + try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0/abc")); // non-numeric prefix +} + +test { + std.testing.refAllDecls(@This()); +} diff --git a/src/network/Network.zig b/src/network/Network.zig index 1fb8c8fb..883ceef5 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -28,6 +28,7 @@ const Config = @import("../Config.zig"); const libcurl = @import("../sys/libcurl.zig"); const http = @import("http.zig"); +const IpFilter = @import("IpFilter.zig"); const RobotStore = @import("Robots.zig").RobotStore; const WebBotAuth = @import("WebBotAuth.zig"); @@ -85,6 +86,12 @@ callbacks: [MAX_TICK_CALLBACKS]TickCallback = undefined, callbacks_len: usize = 0, callbacks_mutex: std.Thread.Mutex = .{}, +/// Optional IP filter for blocking requests to private/internal networks (--block_private_networks). +ip_filter: ?*IpFilter = null, +// Custom CIDR slices backing ip_filter; null when --block_cidrs was not set. +ip_filter_custom_v4: ?[]IpFilter.CidrV4 = null, +ip_filter_custom_v6: ?[]IpFilter.CidrV6 = null, + const TickCallback = struct { ctx: *anyopaque, fun: *const fn (*anyopaque) void, @@ -230,13 +237,39 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { ca_blob = try loadCerts(allocator); } + // IP filter for blocking requests to private/internal networks. Heap-allocated + // for pointer stability: connections need a stable *const IpFilter to pass to + // curl's opensocket callback. + const block_private = config.blockPrivateNetworks(); + const custom_cidrs: ?IpFilter.ParsedCidrs = blk: { + const s = config.blockCidrs() orelse break :blk null; + break :blk try IpFilter.parseCidrList(allocator, s); + }; + errdefer if (custom_cidrs) |c| { + allocator.free(c.v4); + allocator.free(c.v6); + }; + + const ip_filter: ?*IpFilter = blk: { + const has_custom = if (custom_cidrs) |c| c.v4.len > 0 or c.v6.len > 0 else false; + if (!block_private and !has_custom) break :blk null; + const f = try allocator.create(IpFilter); + f.* = IpFilter.init( + block_private, + if (custom_cidrs) |c| c.v4 else &.{}, + if (custom_cidrs) |c| c.v6 else &.{}, + ); + break :blk f; + }; + errdefer if (ip_filter) |f| allocator.destroy(f); + const count: usize = config.httpMaxConcurrent(); const connections = try allocator.alloc(http.Connection, count); errdefer allocator.free(connections); var available: std.DoublyLinkedList = .{}; for (0..count) |i| { - connections[i] = try http.Connection.init(ca_blob, config); + connections[i] = try http.Connection.init(ca_blob, config, ip_filter); available.append(&connections[i].node); } @@ -280,6 +313,10 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { .ws_pool = .init(allocator), .ws_max = config.wsMaxConcurrent(), + + .ip_filter = ip_filter, + .ip_filter_custom_v4 = if (custom_cidrs) |c| c.v4 else null, + .ip_filter_custom_v6 = if (custom_cidrs) |c| c.v6 else null, }; } @@ -316,6 +353,12 @@ pub fn deinit(self: *Network) void { if (self.cache) |*cache| cache.deinit(); + if (self.ip_filter) |f| { + self.allocator.destroy(f); + } + if (self.ip_filter_custom_v4) |v4| self.allocator.free(v4); + if (self.ip_filter_custom_v6) |v6| self.allocator.free(v6); + globalDeinit(); } @@ -612,7 +655,7 @@ pub fn releaseConnection(self: *Network, conn: *http.Connection) void { self.ws_count -= 1; }, else => { - conn.reset(self.config, self.ca_blob) catch |err| { + conn.reset(self.config, self.ca_blob, self.ip_filter) catch |err| { lp.assert(false, "couldn't reset curl easy", .{ .err = err }); }; self.conn_mutex.lock(); @@ -637,7 +680,7 @@ pub fn newConnection(self: *Network) ?*http.Connection { }; // don't do this under lock - conn.* = http.Connection.init(self.ca_blob, self.config) catch { + conn.* = http.Connection.init(self.ca_blob, self.config, self.ip_filter) catch { self.ws_mutex.lock(); defer self.ws_mutex.unlock(); self.ws_pool.destroy(conn); diff --git a/src/network/http.zig b/src/network/http.zig index 4bf71ded..08cf3df2 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -17,9 +17,11 @@ // along with this program. If not, see . const std = @import("std"); +const posix = std.posix; const Config = @import("../Config.zig"); const libcurl = @import("../sys/libcurl.zig"); +const IpFilter = @import("IpFilter.zig"); const log = @import("lightpanda").log; const assert = @import("lightpanda").assert; @@ -222,6 +224,35 @@ pub const ResponseHead = struct { } }; +/// Opensocket callback: blocks connections to private/internal IP ranges +/// before TCP SYN, regardless of request origin (JS, HTML resources, redirects, etc.). +/// Called by curl after DNS resolution, before the socket is created. +/// Returns CURL_SOCKET_BAD to block; otherwise creates and returns a real socket fd. +/// clientp is a *const IpFilter passed via CURLOPT_OPENSOCKETDATA. +fn opensocketCallback( + purpose: libcurl.CurlSockType, + address: *libcurl.CurlSockAddr, + clientp: ?*anyopaque, +) libcurl.CurlSocket { + const filter: *const IpFilter = @ptrCast(@alignCast(clientp orelse return libcurl.CURL_SOCKET_BAD)); + if (filter.isBlockedSockaddr(address)) { + if (address.family == posix.AF.INET or address.family == posix.AF.INET6) { + const ip = std.net.Address.initPosix(@ptrCast(&address.addr)); + log.warn(.http, "blocked by IP filter", .{ .ip = ip }); + } else { + log.warn(.http, "blocked by IP filter", .{ .family = address.family }); + } + return libcurl.CURL_SOCKET_BAD; + } + _ = purpose; // purpose is informational; we always open the same socket type + const fd = posix.socket( + @intCast(address.family), + @intCast(address.socktype), + @intCast(address.protocol), + ) catch return libcurl.CURL_SOCKET_BAD; + return fd; +} + pub const Connection = struct { _easy: *libcurl.Curl, transport: Transport, @@ -233,13 +264,17 @@ pub const Connection = struct { websocket: *@import("../browser/webapi/net/WebSocket.zig"), }; - pub fn init(ca_blob: ?libcurl.CurlBlob, config: *const Config) !Connection { + pub fn init( + ca_blob: ?libcurl.CurlBlob, + config: *const Config, + ip_filter: ?*const IpFilter, + ) !Connection { const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy; var self = Connection{ ._easy = easy, .transport = .none }; errdefer self.deinit(); - try self.reset(config, ca_blob); + try self.reset(config, ca_blob, ip_filter); return self; } @@ -364,6 +399,7 @@ pub const Connection = struct { self: *Connection, config: *const Config, ca_blob: ?libcurl.CurlBlob, + ip_filter: ?*const IpFilter, ) !void { libcurl.curl_easy_reset(self._easy); self.transport = .none; @@ -414,6 +450,12 @@ pub const Connection = struct { // try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback); } + + // IP filter: block private/internal network addresses + if (ip_filter) |filter| { + try libcurl.curl_easy_setopt(self._easy, .opensocket_function, opensocketCallback); + try libcurl.curl_easy_setopt(self._easy, .opensocket_data, @constCast(filter)); + } } fn discardBody(_: [*]const u8, count: usize, len: usize, _: ?*anyopaque) usize { @@ -596,3 +638,53 @@ fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8, } return 0; } + +// ── Unit tests for opensocketCallback ──────────────────────────────────────── + +fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr { + var sa: posix.sockaddr.in = .{ + .port = 0, + .addr = @bitCast(ip), + }; + var curl_sa: libcurl.CurlSockAddr = .{ + .family = posix.AF.INET, + .socktype = posix.SOCK.STREAM, + .protocol = 0, + .addrlen = @sizeOf(posix.sockaddr.in), + .addr = undefined, + }; + @memcpy(std.mem.asBytes(&curl_sa.addr)[0..@sizeOf(posix.sockaddr.in)], std.mem.asBytes(&sa)); + return curl_sa; +} + +test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { + const filter = IpFilter.init(true, &.{}, &.{}); + var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); + const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); +} + +test "opensocketCallback: public IPv4 opens a real socket" { + // 8.8.8.8 — not in any blocked range; callback should create a real socket + const filter = IpFilter.init(true, &.{}, &.{}); + var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); + const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + // A real fd is always >= 0 + try std.testing.expect(fd >= 0); + posix.close(fd); +} + +test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" { + var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); + const result = opensocketCallback(.ipcxn, &sa, null); + try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); +} + +test "opensocketCallback: block_private=false allows private IP" { + // When block_private is false the filter blocks nothing + const filter = IpFilter.init(false, &.{}, &.{}); + var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); + const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + try std.testing.expect(fd >= 0); + posix.close(fd); +} diff --git a/src/sys/libcurl.zig b/src/sys/libcurl.zig index 31587823..19605f2a 100644 --- a/src/sys/libcurl.zig +++ b/src/sys/libcurl.zig @@ -43,6 +43,27 @@ pub const curl_writefunc_error: usize = c.CURL_WRITEFUNC_ERROR; pub const curl_readfunc_pause: usize = c.CURL_READFUNC_PAUSE; pub const CurlReadFunction = fn ([*]u8, usize, usize, *anyopaque) usize; +pub const CurlSockType = enum(c.curlsocktype) { + ipcxn = c.CURLSOCKTYPE_IPCXN, + accept = c.CURLSOCKTYPE_ACCEPT, +}; + +/// Mirror of curl's struct curl_sockaddr. The addr field is a struct sockaddr +/// inline (not a pointer), so addrlen tells you how many bytes of addr are valid. +pub const CurlSockAddr = extern struct { + family: c_int, + socktype: c_int, + protocol: c_int, + addrlen: c_uint, + addr: std.posix.sockaddr, +}; + +pub const CURL_SOCKET_BAD: c.curl_socket_t = c.CURL_SOCKET_BAD; + +/// Zig-side opensocket callback: purpose and address first, user data last. +/// Return a valid socket fd to allow the connection, or CURL_SOCKET_BAD to block. +pub const CurlOpenSocketFunction = fn (CurlSockType, *CurlSockAddr, ?*anyopaque) c.curl_socket_t; + pub const FreeCallback = fn (ptr: ?*anyopaque) void; pub const StrdupCallback = fn (str: [*:0]const u8) ?[*:0]u8; pub const MallocCallback = fn (size: usize) ?*anyopaque; @@ -137,8 +158,17 @@ comptime { return 0; } }.cb; + const opensocket_cb_check: c.curl_opensocket_callback = struct { + fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t { + _ = clientp; + _ = purpose; + _ = address; + return CURL_SOCKET_BAD; + } + }.cb; _ = debug_cb_check; _ = write_cb_check; + _ = opensocket_cb_check; if (@sizeOf(CurlWaitFd) != @sizeOf(c.curl_waitfd)) { @compileError("CurlWaitFd size mismatch"); @@ -152,6 +182,17 @@ comptime { if (c.CURL_WAIT_POLLIN != 1 or c.CURL_WAIT_POLLPRI != 2 or c.CURL_WAIT_POLLOUT != 4) { @compileError("CURL_WAIT_* flag values don't match CurlWaitEvents packed struct bit layout"); } + if (@sizeOf(CurlSockAddr) != @sizeOf(c.curl_sockaddr)) { + @compileError("CurlSockAddr size mismatch with curl_sockaddr"); + } + if (@offsetOf(CurlSockAddr, "family") != @offsetOf(c.curl_sockaddr, "family") or + @offsetOf(CurlSockAddr, "socktype") != @offsetOf(c.curl_sockaddr, "socktype") or + @offsetOf(CurlSockAddr, "protocol") != @offsetOf(c.curl_sockaddr, "protocol") or + @offsetOf(CurlSockAddr, "addrlen") != @offsetOf(c.curl_sockaddr, "addrlen") or + @offsetOf(CurlSockAddr, "addr") != @offsetOf(c.curl_sockaddr, "addr")) + { + @compileError("CurlSockAddr layout mismatch with curl_sockaddr"); + } } pub const CurlOption = enum(c.CURLoption) { @@ -190,6 +231,8 @@ pub const CurlOption = enum(c.CURLoption) { read_function = c.CURLOPT_READFUNCTION, connect_only = c.CURLOPT_CONNECT_ONLY, upload = c.CURLOPT_UPLOAD, + opensocket_function = c.CURLOPT_OPENSOCKETFUNCTION, + opensocket_data = c.CURLOPT_OPENSOCKETDATA, }; pub const CurlMOption = enum(c.CURLMoption) { @@ -620,6 +663,7 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype .header_data, .read_data, .write_data, + .opensocket_data, => blk: { const ptr: ?*anyopaque = switch (@typeInfo(@TypeOf(value))) { .null => null, @@ -643,6 +687,20 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype break :blk c.curl_easy_setopt(easy, opt, cb); }, + .opensocket_function => blk: { + const cb: c.curl_opensocket_callback = switch (@typeInfo(@TypeOf(value))) { + .null => null, + .@"fn" => struct { + fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t { + const addr: *CurlSockAddr = @ptrCast(address orelse return CURL_SOCKET_BAD); + return value(@enumFromInt(purpose), addr, clientp); + } + }.cb, + else => @compileError("expected Zig function or null for " ++ @tagName(option) ++ ", got " ++ @typeName(@TypeOf(value))), + }; + break :blk c.curl_easy_setopt(easy, opt, cb); + }, + .header_function => blk: { const cb: c.curl_write_callback = switch (@typeInfo(@TypeOf(value))) { .null => null, From fb6c4e4978bff25d0ba6b3db50c4de19240c0260 Mon Sep 17 00:00:00 2001 From: Lucien Coffe Date: Tue, 24 Mar 2026 10:29:13 +0100 Subject: [PATCH 02/21] feat: add allow-list exclusions to --block_cidrs CIDRs prefixed with '-' are treated as allow rules that exempt matching IPs from blocking. Allow rules take precedence over both --block_private_networks and custom block CIDRs. Example: --block_private_networks --block_cidrs -10.0.0.42/32 blocks all private ranges except 10.0.0.42. Adds 3 new tests for allow-list behavior. --- src/Config.zig | 2 + src/network/IpFilter.zig | 136 +++++++++++++++++++++++++++++++++------ src/network/Network.zig | 6 +- src/network/http.zig | 6 +- 4 files changed, 127 insertions(+), 23 deletions(-) diff --git a/src/Config.zig b/src/Config.zig index 10520b79..8d3e35aa 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -378,7 +378,9 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ \\--block_cidrs \\ Additional CIDR ranges to block, comma-separated. + \\ Prefix with '-' to allow (exempt from blocking). \\ e.g. --block_cidrs 169.254.169.254/32,fd00:ec2::254/128 + \\ e.g. --block_cidrs 10.0.0.0/8,-10.0.0.42/32 \\ Can be used standalone or combined with --block_private_networks. \\ \\--http-proxy The HTTP proxy to use for all HTTP requests. diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig index f44b9b07..ca421ab0 100644 --- a/src/network/IpFilter.zig +++ b/src/network/IpFilter.zig @@ -40,6 +40,8 @@ pub const CidrV6 = struct { block_private: bool, custom_v4: []const CidrV4, custom_v6: []const CidrV6, +allow_v4: []const CidrV4, +allow_v6: []const CidrV6, // ── Comptime helpers ───────────────────────────────────────────────────────── @@ -167,11 +169,13 @@ fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool { // ── Public API ─────────────────────────────────────────────────────────────── -pub const ParsedCidrs = struct { v4: []CidrV4, v6: []CidrV6 }; +pub const ParsedCidrs = struct { v4: []CidrV4, v6: []CidrV6, allow_v4: []CidrV4, allow_v6: []CidrV6 }; /// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32") -/// into separate IPv4 and IPv6 slices. Caller owns the returned slices and must -/// free them with the same allocator. Returns error.InvalidCidr on any malformed entry. +/// into separate IPv4 and IPv6 slices. Entries prefixed with '-' are added to the +/// allow list (e.g. "-10.0.0.42/32" exempts that IP from blocking). +/// Caller owns the returned slices and must free them with the same allocator. +/// Returns error.InvalidCidr on any malformed entry. pub fn parseCidrList( allocator: std.mem.Allocator, cidr_str: []const u8, @@ -180,24 +184,41 @@ pub fn parseCidrList( errdefer v4_list.deinit(allocator); var v6_list: std.ArrayList(CidrV6) = .empty; errdefer v6_list.deinit(allocator); + var allow_v4_list: std.ArrayList(CidrV4) = .empty; + errdefer allow_v4_list.deinit(allocator); + var allow_v6_list: std.ArrayList(CidrV6) = .empty; + errdefer allow_v6_list.deinit(allocator); var it = std.mem.splitScalar(u8, cidr_str, ','); while (it.next()) |entry| { const trimmed = std.mem.trim(u8, entry, " \t"); if (trimmed.len == 0) continue; - const slash = std.mem.indexOfScalar(u8, trimmed, '/') orelse return error.InvalidCidr; - const addr_str = trimmed[0..slash]; - const prefix_str = trimmed[slash + 1 ..]; + const is_allow = trimmed[0] == '-'; + const cidr_part = if (is_allow) trimmed[1..] else trimmed; + + const slash = std.mem.indexOfScalar(u8, cidr_part, '/') orelse return error.InvalidCidr; + const addr_str = cidr_part[0..slash]; + const prefix_str = cidr_part[slash + 1 ..]; if (parseIpv4(addr_str)) |v4| { const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; if (prefix > 32) return error.InvalidCidr; - try v4_list.append(allocator, .{ .network = v4, .prefix_len = @intCast(prefix) }); + const cidr = CidrV4{ .network = v4, .prefix_len = @intCast(prefix) }; + if (is_allow) { + try allow_v4_list.append(allocator, cidr); + } else { + try v4_list.append(allocator, cidr); + } } else if (parseIpv6(addr_str)) |v6| { const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; if (prefix > 128) return error.InvalidCidr; - try v6_list.append(allocator, .{ .network = v6, .prefix_len = prefix }); + const cidr = CidrV6{ .network = v6, .prefix_len = prefix }; + if (is_allow) { + try allow_v6_list.append(allocator, cidr); + } else { + try v6_list.append(allocator, cidr); + } } else { return error.InvalidCidr; } @@ -206,22 +227,39 @@ pub fn parseCidrList( const v4 = try v4_list.toOwnedSlice(allocator); errdefer allocator.free(v4); const v6 = try v6_list.toOwnedSlice(allocator); - return .{ .v4 = v4, .v6 = v6 }; + errdefer allocator.free(v6); + const allow_v4 = try allow_v4_list.toOwnedSlice(allocator); + errdefer allocator.free(allow_v4); + const allow_v6 = try allow_v6_list.toOwnedSlice(allocator); + return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 }; } /// Create an IpFilter. Set block_private to block outbound requests to /// RFC1918, localhost, link-local, and ULA ranges — useful for sandboxing /// and preventing access to internal infrastructure. custom_v4/custom_v6 -/// are additional user-defined ranges (caller owns the slices). -pub fn init(block_private: bool, custom_v4: []const CidrV4, custom_v6: []const CidrV6) IpFilter { +/// are additional user-defined ranges to block; allow_v4/allow_v6 are +/// exemptions that take precedence over all block rules. +/// Caller owns the slices. +pub fn init( + block_private: bool, + custom_v4: []const CidrV4, + custom_v6: []const CidrV6, + allow_v4: []const CidrV4, + allow_v6: []const CidrV6, +) IpFilter { return .{ .block_private = block_private, .custom_v4 = custom_v4, .custom_v6 = custom_v6, + .allow_v4 = allow_v4, + .allow_v6 = allow_v6, }; } fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { + for (self.allow_v4) |cidr| { + if (matchesCidrV4(addr, cidr)) return false; + } if (self.block_private) { for (PRIVATE_V4) |cidr| { if (matchesCidrV4(addr, cidr)) return true; @@ -234,6 +272,9 @@ fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { } fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool { + for (self.allow_v6) |cidr| { + if (matchesCidrV6(addr, cidr)) return false; + } if (self.block_private) { for (PRIVATE_V6) |cidr| { if (matchesCidrV6(addr, cidr)) return true; @@ -280,7 +321,7 @@ fn testBlocked(self: *const IpFilter, ip: []const u8) bool { } test "IPv4 CIDR matching: private group boundaries" { - const filter = IpFilter.init(true, &.{}, &.{}); + const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; // Loopback @@ -314,7 +355,7 @@ test "IPv4 CIDR matching: private group boundaries" { } test "IPv6 CIDR matching: private group" { - const filter = IpFilter.init(true, &.{}, &.{}); + const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; try t.expect(filter.testBlocked("::1")); // localhost @@ -326,7 +367,7 @@ test "IPv6 CIDR matching: private group" { } test "IPv4-mapped IPv6 bypass prevention" { - const filter = IpFilter.init(true, &.{}, &.{}); + const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; // ::ffff:127.0.0.1 must be blocked (maps to loopback) @@ -338,7 +379,7 @@ test "IPv4-mapped IPv6 bypass prevention" { } test "fail-closed: unknown address family blocked by isBlockedSockaddr" { - const filter = IpFilter.init(false, &.{}, &.{}); + const filter = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); const t = std.testing; // Construct a sockaddr with an unknown address family @@ -356,7 +397,7 @@ test "custom CIDR ranges" { const custom_v4 = [_]CidrV4{ .{ .network = .{ 203, 0, 113, 0 }, .prefix_len = 24 }, // TEST-NET-3 }; - const filter = IpFilter.init(false, &custom_v4, &.{}); + const filter = IpFilter.init(false, &custom_v4, &.{}, &.{}, &.{}); const t = std.testing; try t.expect(filter.testBlocked("203.0.113.1")); // in custom range @@ -368,8 +409,8 @@ test "custom CIDR ranges" { test "private group blocks cloud metadata IP via link-local" { // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. // Users who want targeted cloud-metadata-only blocking can use --block_cidrs. - const filter_private = IpFilter.init(true, &.{}, &.{}); - const filter_none = IpFilter.init(false, &.{}, &.{}); + const filter_private = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); + const filter_none = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); const t = std.testing; try t.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local @@ -381,12 +422,14 @@ test "parseCidrList: mixed IPv4 and IPv6" { const result = try parseCidrList(t.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); defer t.allocator.free(result.v4); defer t.allocator.free(result.v6); + defer t.allocator.free(result.allow_v4); + defer t.allocator.free(result.allow_v6); try t.expectEqual(2, result.v4.len); try t.expectEqual(1, result.v6.len); // spot-check: 203.0.113.0/24 and 192.168.1.0/24 - const f = IpFilter.init(false, result.v4, result.v6); + const f = IpFilter.init(false, result.v4, result.v6, result.allow_v4, result.allow_v6); try t.expect(f.testBlocked("203.0.113.1")); try t.expect(!f.testBlocked("203.0.114.0")); try t.expect(f.testBlocked("192.168.1.1")); @@ -394,6 +437,61 @@ test "parseCidrList: mixed IPv4 and IPv6" { try t.expect(!f.testBlocked("2001:db9::1")); } +test "allow list exempts from private blocking" { + const allow_v4 = [_]CidrV4{ + .{ .network = .{ 10, 0, 0, 42 }, .prefix_len = 32 }, + }; + const allow_v6 = [_]CidrV6{ + makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128), + }; + const filter = IpFilter.init(true, &.{}, &.{}, &allow_v4, &allow_v6); + const t = std.testing; + + // Allowed IPs pass through despite being in private ranges + try t.expect(!filter.testBlocked("10.0.0.42")); + try t.expect(!filter.testBlocked("fc00::1")); + + // Other private IPs still blocked + try t.expect(filter.testBlocked("10.0.0.43")); + try t.expect(filter.testBlocked("10.0.0.41")); + try t.expect(filter.testBlocked("192.168.1.1")); + try t.expect(filter.testBlocked("fc00::2")); +} + +test "allow list exempts from custom CIDR blocking" { + const custom_v4 = [_]CidrV4{ + .{ .network = .{ 203, 0, 113, 0 }, .prefix_len = 24 }, + }; + const allow_v4 = [_]CidrV4{ + .{ .network = .{ 203, 0, 113, 100 }, .prefix_len = 32 }, + }; + const filter = IpFilter.init(false, &custom_v4, &.{}, &allow_v4, &.{}); + const t = std.testing; + + try t.expect(!filter.testBlocked("203.0.113.100")); // allowed + try t.expect(filter.testBlocked("203.0.113.99")); // blocked + try t.expect(filter.testBlocked("203.0.113.101")); // blocked +} + +test "parseCidrList: allow entries with '-' prefix" { + const t = std.testing; + const result = try parseCidrList(t.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); + defer t.allocator.free(result.v4); + defer t.allocator.free(result.v6); + defer t.allocator.free(result.allow_v4); + defer t.allocator.free(result.allow_v6); + + try t.expectEqual(1, result.v4.len); + try t.expectEqual(0, result.v6.len); + try t.expectEqual(1, result.allow_v4.len); + try t.expectEqual(1, result.allow_v6.len); + + const f = IpFilter.init(false, result.v4, result.v6, result.allow_v4, result.allow_v6); + try t.expect(!f.testBlocked("10.0.0.42")); // allowed + try t.expect(f.testBlocked("10.0.0.43")); // blocked + try t.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed) +} + test "parseCidrList: invalid input returns error" { const t = std.testing; try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "not-a-cidr")); diff --git a/src/network/Network.zig b/src/network/Network.zig index 883ceef5..2968611b 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -248,16 +248,20 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { errdefer if (custom_cidrs) |c| { allocator.free(c.v4); allocator.free(c.v6); + allocator.free(c.allow_v4); + allocator.free(c.allow_v6); }; const ip_filter: ?*IpFilter = blk: { - const has_custom = if (custom_cidrs) |c| c.v4.len > 0 or c.v6.len > 0 else false; + const has_custom = if (custom_cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false; if (!block_private and !has_custom) break :blk null; const f = try allocator.create(IpFilter); f.* = IpFilter.init( block_private, if (custom_cidrs) |c| c.v4 else &.{}, if (custom_cidrs) |c| c.v6 else &.{}, + if (custom_cidrs) |c| c.allow_v4 else &.{}, + if (custom_cidrs) |c| c.allow_v6 else &.{}, ); break :blk f; }; diff --git a/src/network/http.zig b/src/network/http.zig index 08cf3df2..0cd30125 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -658,7 +658,7 @@ fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr { } test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { - const filter = IpFilter.init(true, &.{}, &.{}); + const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); @@ -666,7 +666,7 @@ test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { test "opensocketCallback: public IPv4 opens a real socket" { // 8.8.8.8 — not in any blocked range; callback should create a real socket - const filter = IpFilter.init(true, &.{}, &.{}); + const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); // A real fd is always >= 0 @@ -682,7 +682,7 @@ test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" { test "opensocketCallback: block_private=false allows private IP" { // When block_private is false the filter blocks nothing - const filter = IpFilter.init(false, &.{}, &.{}); + const filter = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); try std.testing.expect(fd >= 0); From 7f5abfc9cfdbf13bed8d170611550d958d0a079a Mon Sep 17 00:00:00 2001 From: Lucien Coffe Date: Tue, 24 Mar 2026 10:39:24 +0100 Subject: [PATCH 03/21] fix: use dashes in CLI flag names for consistency Rename --block_private_networks to --block-private-networks and --block_cidrs to --block-cidrs to match the existing flag naming convention (e.g. --http-proxy, --proxy-bearer-token). --- src/Config.zig | 16 ++++++++-------- src/network/IpFilter.zig | 2 +- src/network/Network.zig | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Config.zig b/src/Config.zig index 8d3e35aa..c31a3563 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -368,7 +368,7 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ we make requests towards. \\ Defaults to false. \\ - \\--block_private_networks + \\--block-private-networks \\ Blocks HTTP requests to private/internal IP addresses \\ after DNS resolution. Useful for sandboxing, multi-tenant \\ deployments, and preventing access to internal infrastructure @@ -376,12 +376,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ resources, redirects, etc.). \\ Defaults to false. \\ - \\--block_cidrs + \\--block-cidrs \\ Additional CIDR ranges to block, comma-separated. \\ Prefix with '-' to allow (exempt from blocking). - \\ e.g. --block_cidrs 169.254.169.254/32,fd00:ec2::254/128 - \\ e.g. --block_cidrs 10.0.0.0/8,-10.0.0.42/32 - \\ Can be used standalone or combined with --block_private_networks. + \\ e.g. --block-cidrs 169.254.169.254/32,fd00:ec2::254/128 + \\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32 + \\ Can be used standalone or combined with --block-private-networks. \\ \\--http-proxy The HTTP proxy to use for all HTTP requests. \\ A username:password can be included for basic authentication. @@ -1126,14 +1126,14 @@ fn parseCommonArg( return true; } - if (std.mem.eql(u8, "--block_private_networks", opt)) { + if (std.mem.eql(u8, "--block-private-networks", opt)) { common.block_private_networks = true; return true; } - if (std.mem.eql(u8, "--block_cidrs", opt)) { + if (std.mem.eql(u8, "--block-cidrs", opt)) { const str = args.next() orelse { - log.fatal(.app, "missing argument value", .{ .arg = "--block_cidrs" }); + log.fatal(.app, "missing argument value", .{ .arg = "--block-cidrs" }); return error.InvalidArgument; }; common.block_cidrs = try allocator.dupe(u8, str); diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig index ca421ab0..f81c7fc7 100644 --- a/src/network/IpFilter.zig +++ b/src/network/IpFilter.zig @@ -408,7 +408,7 @@ test "custom CIDR ranges" { test "private group blocks cloud metadata IP via link-local" { // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. - // Users who want targeted cloud-metadata-only blocking can use --block_cidrs. + // Users who want targeted cloud-metadata-only blocking can use --block-cidrs. const filter_private = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const filter_none = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); const t = std.testing; diff --git a/src/network/Network.zig b/src/network/Network.zig index 2968611b..459eb5a5 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -86,9 +86,9 @@ callbacks: [MAX_TICK_CALLBACKS]TickCallback = undefined, callbacks_len: usize = 0, callbacks_mutex: std.Thread.Mutex = .{}, -/// Optional IP filter for blocking requests to private/internal networks (--block_private_networks). +/// Optional IP filter for blocking requests to private/internal networks (--block-private-networks). ip_filter: ?*IpFilter = null, -// Custom CIDR slices backing ip_filter; null when --block_cidrs was not set. +// Custom CIDR slices backing ip_filter; null when --block-cidrs was not set. ip_filter_custom_v4: ?[]IpFilter.CidrV4 = null, ip_filter_custom_v6: ?[]IpFilter.CidrV6 = null, From e57b5c645b73b32af57b050bd91fd916dbe40322 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 8 Apr 2026 14:06:17 +0200 Subject: [PATCH 04/21] remove deadcode libcurl.CurlOpenSocketFunction --- src/sys/libcurl.zig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/sys/libcurl.zig b/src/sys/libcurl.zig index 19605f2a..b621a3a3 100644 --- a/src/sys/libcurl.zig +++ b/src/sys/libcurl.zig @@ -60,10 +60,6 @@ pub const CurlSockAddr = extern struct { pub const CURL_SOCKET_BAD: c.curl_socket_t = c.CURL_SOCKET_BAD; -/// Zig-side opensocket callback: purpose and address first, user data last. -/// Return a valid socket fd to allow the connection, or CURL_SOCKET_BAD to block. -pub const CurlOpenSocketFunction = fn (CurlSockType, *CurlSockAddr, ?*anyopaque) c.curl_socket_t; - pub const FreeCallback = fn (ptr: ?*anyopaque) void; pub const StrdupCallback = fn (str: [*:0]const u8) ?[*:0]u8; pub const MallocCallback = fn (size: usize) ?*anyopaque; From 6ef518438b312e529658879ae6894870bde70c74 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Wed, 8 Apr 2026 14:35:46 +0200 Subject: [PATCH 05/21] fix custom cidrs mem leak --- src/network/IpFilter.zig | 150 ++++++++++++++++++++------------------- src/network/Network.zig | 38 +++------- src/network/http.zig | 6 +- 3 files changed, 90 insertions(+), 104 deletions(-) diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig index f81c7fc7..687a345d 100644 --- a/src/network/IpFilter.zig +++ b/src/network/IpFilter.zig @@ -38,10 +38,7 @@ pub const CidrV6 = struct { // IpFilter fields block_private: bool, -custom_v4: []const CidrV4, -custom_v6: []const CidrV6, -allow_v4: []const CidrV4, -allow_v6: []const CidrV6, +cidrs: ?Cidrs, // ── Comptime helpers ───────────────────────────────────────────────────────── @@ -169,17 +166,29 @@ fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool { // ── Public API ─────────────────────────────────────────────────────────────── -pub const ParsedCidrs = struct { v4: []CidrV4, v6: []CidrV6, allow_v4: []CidrV4, allow_v6: []CidrV6 }; +pub const Cidrs = struct { + v4: []CidrV4, + v6: []CidrV6, + allow_v4: []CidrV4, + allow_v6: []CidrV6, + + pub fn deinit(self: Cidrs, allocator: std.mem.Allocator) void { + allocator.free(self.v4); + allocator.free(self.v6); + allocator.free(self.allow_v4); + allocator.free(self.allow_v6); + } +}; /// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32") -/// into separate IPv4 and IPv6 slices. Entries prefixed with '-' are added to the -/// allow list (e.g. "-10.0.0.42/32" exempts that IP from blocking). -/// Caller owns the returned slices and must free them with the same allocator. +/// into a Cidrs struct. Entries prefixed with '-' are added to the allow list +/// (e.g. "-10.0.0.42/32" exempts that IP from blocking). +/// Caller owns the returned Cidrs and must free them via Cidrs.deinit. /// Returns error.InvalidCidr on any malformed entry. pub fn parseCidrList( allocator: std.mem.Allocator, cidr_str: []const u8, -) !ParsedCidrs { +) !Cidrs { var v4_list: std.ArrayList(CidrV4) = .empty; errdefer v4_list.deinit(allocator); var v6_list: std.ArrayList(CidrV6) = .empty; @@ -234,54 +243,58 @@ pub fn parseCidrList( return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 }; } -/// Create an IpFilter. Set block_private to block outbound requests to -/// RFC1918, localhost, link-local, and ULA ranges — useful for sandboxing -/// and preventing access to internal infrastructure. custom_v4/custom_v6 -/// are additional user-defined ranges to block; allow_v4/allow_v6 are -/// exemptions that take precedence over all block rules. -/// Caller owns the slices. +/// Create a heap-allocated IpFilter. Set block_private to block outbound +/// requests to RFC1918, localhost, link-local, and ULA ranges. Pass parsed +/// CIDRs for additional custom block/allow ranges; the filter takes ownership +/// of the Cidrs and will free them on deinit. pub fn init( block_private: bool, - custom_v4: []const CidrV4, - custom_v6: []const CidrV6, - allow_v4: []const CidrV4, - allow_v6: []const CidrV6, + cidrs: ?Cidrs, ) IpFilter { return .{ .block_private = block_private, - .custom_v4 = custom_v4, - .custom_v6 = custom_v6, - .allow_v4 = allow_v4, - .allow_v6 = allow_v6, + .cidrs = cidrs, }; } +pub fn deinit(self: IpFilter, allocator: std.mem.Allocator) void { + if (self.cidrs) |c| c.deinit(allocator); +} + fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { - for (self.allow_v4) |cidr| { - if (matchesCidrV4(addr, cidr)) return false; + if (self.cidrs) |c| { + for (c.allow_v4) |cidr| { + if (matchesCidrV4(addr, cidr)) return false; + } } if (self.block_private) { for (PRIVATE_V4) |cidr| { if (matchesCidrV4(addr, cidr)) return true; } } - for (self.custom_v4) |cidr| { - if (matchesCidrV4(addr, cidr)) return true; + if (self.cidrs) |c| { + for (c.v4) |cidr| { + if (matchesCidrV4(addr, cidr)) return true; + } } return false; } fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool { - for (self.allow_v6) |cidr| { - if (matchesCidrV6(addr, cidr)) return false; + if (self.cidrs) |c| { + for (c.allow_v6) |cidr| { + if (matchesCidrV6(addr, cidr)) return false; + } } if (self.block_private) { for (PRIVATE_V6) |cidr| { if (matchesCidrV6(addr, cidr)) return true; } } - for (self.custom_v6) |cidr| { - if (matchesCidrV6(addr, cidr)) return true; + if (self.cidrs) |c| { + for (c.v6) |cidr| { + if (matchesCidrV6(addr, cidr)) return true; + } } return false; } @@ -321,8 +334,9 @@ fn testBlocked(self: *const IpFilter, ip: []const u8) bool { } test "IPv4 CIDR matching: private group boundaries" { - const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; + const filter = IpFilter.init(true, null); + defer filter.deinit(t.allocator); // Loopback try t.expect(filter.testBlocked("127.0.0.1")); @@ -355,8 +369,9 @@ test "IPv4 CIDR matching: private group boundaries" { } test "IPv6 CIDR matching: private group" { - const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; + const filter = IpFilter.init(true, null); + defer filter.deinit(t.allocator); try t.expect(filter.testBlocked("::1")); // localhost try t.expect(filter.testBlocked("fe80::1")); // link-local @@ -367,8 +382,9 @@ test "IPv6 CIDR matching: private group" { } test "IPv4-mapped IPv6 bypass prevention" { - const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); const t = std.testing; + const filter = IpFilter.init(true, null); + defer filter.deinit(t.allocator); // ::ffff:127.0.0.1 must be blocked (maps to loopback) try t.expect(filter.testBlocked("::ffff:127.0.0.1")); @@ -379,8 +395,9 @@ test "IPv4-mapped IPv6 bypass prevention" { } test "fail-closed: unknown address family blocked by isBlockedSockaddr" { - const filter = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); const t = std.testing; + const filter = IpFilter.init(false, null); + defer filter.deinit(t.allocator); // Construct a sockaddr with an unknown address family var sa: libcurl.CurlSockAddr = .{ @@ -394,11 +411,10 @@ test "fail-closed: unknown address family blocked by isBlockedSockaddr" { } test "custom CIDR ranges" { - const custom_v4 = [_]CidrV4{ - .{ .network = .{ 203, 0, 113, 0 }, .prefix_len = 24 }, // TEST-NET-3 - }; - const filter = IpFilter.init(false, &custom_v4, &.{}, &.{}, &.{}); const t = std.testing; + const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(t.allocator); try t.expect(filter.testBlocked("203.0.113.1")); // in custom range try t.expect(filter.testBlocked("203.0.113.255")); // in custom range @@ -409,9 +425,11 @@ test "custom CIDR ranges" { test "private group blocks cloud metadata IP via link-local" { // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. // Users who want targeted cloud-metadata-only blocking can use --block-cidrs. - const filter_private = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); - const filter_none = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); const t = std.testing; + const filter_private = IpFilter.init(true, null); + defer filter_private.deinit(t.allocator); + const filter_none = IpFilter.init(false, null); + defer filter_none.deinit(t.allocator); try t.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local try t.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled @@ -419,17 +437,14 @@ test "private group blocks cloud metadata IP via link-local" { test "parseCidrList: mixed IPv4 and IPv6" { const t = std.testing; - const result = try parseCidrList(t.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); - defer t.allocator.free(result.v4); - defer t.allocator.free(result.v6); - defer t.allocator.free(result.allow_v4); - defer t.allocator.free(result.allow_v6); + const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); - try t.expectEqual(2, result.v4.len); - try t.expectEqual(1, result.v6.len); + try t.expectEqual(2, cidrs.v4.len); + try t.expectEqual(1, cidrs.v6.len); // spot-check: 203.0.113.0/24 and 192.168.1.0/24 - const f = IpFilter.init(false, result.v4, result.v6, result.allow_v4, result.allow_v6); + const f = IpFilter.init(false, cidrs); + defer f.deinit(t.allocator); try t.expect(f.testBlocked("203.0.113.1")); try t.expect(!f.testBlocked("203.0.114.0")); try t.expect(f.testBlocked("192.168.1.1")); @@ -438,14 +453,10 @@ test "parseCidrList: mixed IPv4 and IPv6" { } test "allow list exempts from private blocking" { - const allow_v4 = [_]CidrV4{ - .{ .network = .{ 10, 0, 0, 42 }, .prefix_len = 32 }, - }; - const allow_v6 = [_]CidrV6{ - makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128), - }; - const filter = IpFilter.init(true, &.{}, &.{}, &allow_v4, &allow_v6); const t = std.testing; + const cidrs = try parseCidrList(t.allocator, "-10.0.0.42/32,-fc00::1/128"); + const filter = IpFilter.init(true, cidrs); + defer filter.deinit(t.allocator); // Allowed IPs pass through despite being in private ranges try t.expect(!filter.testBlocked("10.0.0.42")); @@ -459,14 +470,10 @@ test "allow list exempts from private blocking" { } test "allow list exempts from custom CIDR blocking" { - const custom_v4 = [_]CidrV4{ - .{ .network = .{ 203, 0, 113, 0 }, .prefix_len = 24 }, - }; - const allow_v4 = [_]CidrV4{ - .{ .network = .{ 203, 0, 113, 100 }, .prefix_len = 32 }, - }; - const filter = IpFilter.init(false, &custom_v4, &.{}, &allow_v4, &.{}); const t = std.testing; + const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24,-203.0.113.100/32"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(t.allocator); try t.expect(!filter.testBlocked("203.0.113.100")); // allowed try t.expect(filter.testBlocked("203.0.113.99")); // blocked @@ -475,18 +482,15 @@ test "allow list exempts from custom CIDR blocking" { test "parseCidrList: allow entries with '-' prefix" { const t = std.testing; - const result = try parseCidrList(t.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); - defer t.allocator.free(result.v4); - defer t.allocator.free(result.v6); - defer t.allocator.free(result.allow_v4); - defer t.allocator.free(result.allow_v6); + const cidrs = try parseCidrList(t.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); - try t.expectEqual(1, result.v4.len); - try t.expectEqual(0, result.v6.len); - try t.expectEqual(1, result.allow_v4.len); - try t.expectEqual(1, result.allow_v6.len); + try t.expectEqual(1, cidrs.v4.len); + try t.expectEqual(0, cidrs.v6.len); + try t.expectEqual(1, cidrs.allow_v4.len); + try t.expectEqual(1, cidrs.allow_v6.len); - const f = IpFilter.init(false, result.v4, result.v6, result.allow_v4, result.allow_v6); + const f = IpFilter.init(false, cidrs); + defer f.deinit(t.allocator); try t.expect(!f.testBlocked("10.0.0.42")); // allowed try t.expect(f.testBlocked("10.0.0.43")); // blocked try t.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed) diff --git a/src/network/Network.zig b/src/network/Network.zig index 459eb5a5..359646ce 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -88,9 +88,6 @@ callbacks_mutex: std.Thread.Mutex = .{}, /// Optional IP filter for blocking requests to private/internal networks (--block-private-networks). ip_filter: ?*IpFilter = null, -// Custom CIDR slices backing ip_filter; null when --block-cidrs was not set. -ip_filter_custom_v4: ?[]IpFilter.CidrV4 = null, -ip_filter_custom_v6: ?[]IpFilter.CidrV6 = null, const TickCallback = struct { ctx: *anyopaque, @@ -237,35 +234,23 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { ca_blob = try loadCerts(allocator); } - // IP filter for blocking requests to private/internal networks. Heap-allocated - // for pointer stability: connections need a stable *const IpFilter to pass to - // curl's opensocket callback. + // IP filter for blocking requests to private/internal networks. const block_private = config.blockPrivateNetworks(); - const custom_cidrs: ?IpFilter.ParsedCidrs = blk: { + const cidrs: ?IpFilter.Cidrs = blk: { const s = config.blockCidrs() orelse break :blk null; break :blk try IpFilter.parseCidrList(allocator, s); }; - errdefer if (custom_cidrs) |c| { - allocator.free(c.v4); - allocator.free(c.v6); - allocator.free(c.allow_v4); - allocator.free(c.allow_v6); - }; - + const has_cidrs = if (cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false; const ip_filter: ?*IpFilter = blk: { - const has_custom = if (custom_cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false; - if (!block_private and !has_custom) break :blk null; + if (!block_private and !has_cidrs) break :blk null; const f = try allocator.create(IpFilter); - f.* = IpFilter.init( - block_private, - if (custom_cidrs) |c| c.v4 else &.{}, - if (custom_cidrs) |c| c.v6 else &.{}, - if (custom_cidrs) |c| c.allow_v4 else &.{}, - if (custom_cidrs) |c| c.allow_v6 else &.{}, - ); + f.* = IpFilter.init(block_private, cidrs); break :blk f; }; - errdefer if (ip_filter) |f| allocator.destroy(f); + errdefer if (ip_filter) |f| { + f.deinit(allocator); + allocator.destroy(f); + }; const count: usize = config.httpMaxConcurrent(); const connections = try allocator.alloc(http.Connection, count); @@ -319,8 +304,6 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { .ws_max = config.wsMaxConcurrent(), .ip_filter = ip_filter, - .ip_filter_custom_v4 = if (custom_cidrs) |c| c.v4 else null, - .ip_filter_custom_v6 = if (custom_cidrs) |c| c.v6 else null, }; } @@ -358,10 +341,9 @@ pub fn deinit(self: *Network) void { if (self.cache) |*cache| cache.deinit(); if (self.ip_filter) |f| { + f.deinit(self.allocator); self.allocator.destroy(f); } - if (self.ip_filter_custom_v4) |v4| self.allocator.free(v4); - if (self.ip_filter_custom_v6) |v6| self.allocator.free(v6); globalDeinit(); } diff --git a/src/network/http.zig b/src/network/http.zig index 0cd30125..634690ff 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -658,7 +658,7 @@ fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr { } test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { - const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); + const filter = IpFilter.init(true, null); var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); @@ -666,7 +666,7 @@ test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { test "opensocketCallback: public IPv4 opens a real socket" { // 8.8.8.8 — not in any blocked range; callback should create a real socket - const filter = IpFilter.init(true, &.{}, &.{}, &.{}, &.{}); + const filter = IpFilter.init(true, null); var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); // A real fd is always >= 0 @@ -682,7 +682,7 @@ test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" { test "opensocketCallback: block_private=false allows private IP" { // When block_private is false the filter blocks nothing - const filter = IpFilter.init(false, &.{}, &.{}, &.{}, &.{}); + const filter = IpFilter.init(false, null); var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); try std.testing.expect(fd >= 0); From 0253092f201ca96a05b929b8042040fe3709c874 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 9 Apr 2026 15:40:16 +0800 Subject: [PATCH 06/21] Improvements to IpFilters The main change is changing how CidrV4 and CidrV6 are stored, by pre-calculating their mask and storing their address as integer. This allows significant simplification of matchesCidrV4 and matchesCidrV6. --- src/network/IpFilter.zig | 581 +++++++++++++++++++++++---------------- 1 file changed, 348 insertions(+), 233 deletions(-) diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig index 687a345d..73977188 100644 --- a/src/network/IpFilter.zig +++ b/src/network/IpFilter.zig @@ -27,13 +27,48 @@ pub const Ipv4Addr = [4]u8; pub const Ipv6Addr = [16]u8; pub const CidrV4 = struct { - network: Ipv4Addr, - prefix_len: u6, // 0-32 + network: u32, + mask: u32, + + fn fromPrefix(addr: Ipv4Addr, prefix_len: u6) CidrV4 { + const network = std.mem.readInt(u32, &addr, .big); + const mask: u32 = if (prefix_len == 0) + 0 + else if (prefix_len == 32) + 0xFFFFFFFF + else + ~(@as(u32, 0xFFFFFFFF) >> @intCast(prefix_len)); + return .{ .network = network, .mask = mask }; + } }; pub const CidrV6 = struct { - network: Ipv6Addr, - prefix_len: u8, // 0-128 + network_hi: u64, + network_lo: u64, + mask_hi: u64, + mask_lo: u64, + + fn fromPrefix(addr: Ipv6Addr, prefix_len: u8) CidrV6 { + const network_hi = std.mem.readInt(u64, addr[0..8], .big); + const network_lo = std.mem.readInt(u64, addr[8..16], .big); + var mask_hi: u64 = 0; + var mask_lo: u64 = 0; + if (prefix_len > 0) { + if (prefix_len < 64) { + mask_hi = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len)); + } else if (prefix_len == 64) { + mask_hi = 0xFFFFFFFFFFFFFFFF; + } else if (prefix_len < 128) { + mask_hi = 0xFFFFFFFFFFFFFFFF; + mask_lo = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len - 64)); + } else { + // prefix_len == 128 + mask_hi = 0xFFFFFFFFFFFFFFFF; + mask_lo = 0xFFFFFFFFFFFFFFFF; + } + } + return .{ .network_hi = network_hi, .network_lo = network_lo, .mask_hi = mask_hi, .mask_lo = mask_lo }; + } }; // IpFilter fields @@ -62,12 +97,12 @@ fn parseIpv4Comptime(comptime s: []const u8) Ipv4Addr { /// Comptime helper: build a CidrV4. fn makeCidrV4(comptime addr: []const u8, comptime prefix: u6) CidrV4 { - return .{ .network = parseIpv4Comptime(addr), .prefix_len = prefix }; + return CidrV4.fromPrefix(parseIpv4Comptime(addr), prefix); } /// Comptime helper: build a CidrV6 from a 16-byte literal array. fn makeCidrV6(comptime bytes: Ipv6Addr, comptime prefix: u8) CidrV6 { - return .{ .network = bytes, .prefix_len = prefix }; + return CidrV6.fromPrefix(bytes, prefix); } // ── Comptime CIDR range tables ─────────────────────────────────────────────── @@ -82,6 +117,8 @@ const PRIVATE_V4 = [_]CidrV4{ }; const PRIVATE_V6 = [_]CidrV6{ + // ::/128 — IPv6 Unspecified + makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 128), // ::1/128 — IPv6 localhost makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128), // fe80::/10 — link-local @@ -128,40 +165,16 @@ fn isIpv4Mapped(addr: Ipv6Addr) ?Ipv4Addr { /// Check if IPv4 address falls within a CIDR range. fn matchesCidrV4(addr: Ipv4Addr, cidr: CidrV4) bool { - if (cidr.prefix_len == 0) return true; - const full_bytes: usize = cidr.prefix_len / 8; - const rem_bits: u4 = @intCast(cidr.prefix_len % 8); - - var i: usize = 0; - // Check full bytes - while (i < full_bytes) : (i += 1) { - if (addr[i] != cidr.network[i]) return false; - } - // Check partial byte (if any) - if (rem_bits > 0 and i < 4) { - const shift: u3 = @intCast(8 - rem_bits); - const mask: u8 = @as(u8, 0xFF) << shift; - if ((addr[i] & mask) != (cidr.network[i] & mask)) return false; - } - return true; + const addr_int = std.mem.readInt(u32, &addr, .big); + return (addr_int ^ cidr.network) & cidr.mask == 0; } /// Check if IPv6 address falls within a CIDR range. fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool { - if (cidr.prefix_len == 0) return true; - const full_bytes: usize = cidr.prefix_len / 8; - const rem_bits: u4 = @intCast(cidr.prefix_len % 8); - - var i: usize = 0; - while (i < full_bytes) : (i += 1) { - if (addr[i] != cidr.network[i]) return false; - } - if (rem_bits > 0 and i < 16) { - const shift: u3 = @intCast(8 - rem_bits); - const mask: u8 = @as(u8, 0xFF) << shift; - if ((addr[i] & mask) != (cidr.network[i] & mask)) return false; - } - return true; + const addr_hi = std.mem.readInt(u64, addr[0..8], .big); + const addr_lo = std.mem.readInt(u64, addr[8..16], .big); + return ((addr_hi ^ cidr.network_hi) & cidr.mask_hi == 0) and + ((addr_lo ^ cidr.network_lo) & cidr.mask_lo == 0); } // ── Public API ─────────────────────────────────────────────────────────────── @@ -213,7 +226,7 @@ pub fn parseCidrList( if (parseIpv4(addr_str)) |v4| { const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; if (prefix > 32) return error.InvalidCidr; - const cidr = CidrV4{ .network = v4, .prefix_len = @intCast(prefix) }; + const cidr = CidrV4.fromPrefix(v4, @intCast(prefix)); if (is_allow) { try allow_v4_list.append(allocator, cidr); } else { @@ -222,7 +235,7 @@ pub fn parseCidrList( } else if (parseIpv6(addr_str)) |v6| { const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; if (prefix > 128) return error.InvalidCidr; - const cidr = CidrV6{ .network = v6, .prefix_len = prefix }; + const cidr = CidrV6.fromPrefix(v6, prefix); if (is_allow) { try allow_v6_list.append(allocator, cidr); } else { @@ -243,10 +256,10 @@ pub fn parseCidrList( return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 }; } -/// Create a heap-allocated IpFilter. Set block_private to block outbound -/// requests to RFC1918, localhost, link-local, and ULA ranges. Pass parsed -/// CIDRs for additional custom block/allow ranges; the filter takes ownership -/// of the Cidrs and will free them on deinit. +// Create a IpFilter. Set block_private to block outbound requests to RFC1918, +// localhost, link-local, and ULA ranges. Pass parsed CIDRs for additional +// custom block/allow ranges; the filter takes ownership of the Cidrs and will +// free them on deinit. pub fn init( block_private: bool, cidrs: ?Cidrs, @@ -258,42 +271,55 @@ pub fn init( } pub fn deinit(self: IpFilter, allocator: std.mem.Allocator) void { - if (self.cidrs) |c| c.deinit(allocator); + if (self.cidrs) |c| { + c.deinit(allocator); + } } fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { if (self.cidrs) |c| { for (c.allow_v4) |cidr| { - if (matchesCidrV4(addr, cidr)) return false; + if (matchesCidrV4(addr, cidr)) { + return false; + } + } + for (c.v4) |cidr| { + if (matchesCidrV4(addr, cidr)) { + return true; + } } } + if (self.block_private) { for (PRIVATE_V4) |cidr| { - if (matchesCidrV4(addr, cidr)) return true; - } - } - if (self.cidrs) |c| { - for (c.v4) |cidr| { - if (matchesCidrV4(addr, cidr)) return true; + if (matchesCidrV4(addr, cidr)) { + return true; + } } } + return false; } fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool { if (self.cidrs) |c| { for (c.allow_v6) |cidr| { - if (matchesCidrV6(addr, cidr)) return false; + if (matchesCidrV6(addr, cidr)) { + return false; + } + } + for (c.v6) |cidr| { + if (matchesCidrV6(addr, cidr)) { + return true; + } } } + if (self.block_private) { for (PRIVATE_V6) |cidr| { - if (matchesCidrV6(addr, cidr)) return true; - } - } - if (self.cidrs) |c| { - for (c.v6) |cidr| { - if (matchesCidrV6(addr, cidr)) return true; + if (matchesCidrV6(addr, cidr)) { + return true; + } } } return false; @@ -320,7 +346,271 @@ pub fn isBlockedSockaddr(self: *const IpFilter, sa: *const libcurl.CurlSockAddr) } } -// ── Unit tests ─────────────────────────────────────────────────────────────── +const testing = @import("../testing.zig"); +test "IpFilter: IPv4 CIDR matching: private group boundaries" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("0.0.0.0")); + + // Loopback + try testing.expect(filter.testBlocked("127.0.0.1")); + try testing.expect(filter.testBlocked("127.255.255.255")); + try testing.expect(!filter.testBlocked("128.0.0.1")); + + // RFC1918 10.0.0.0/8 + try testing.expect(filter.testBlocked("10.0.0.1")); + try testing.expect(filter.testBlocked("10.255.255.255")); + try testing.expect(!filter.testBlocked("11.0.0.0")); + + // RFC1918 172.16.0.0/12 — critical boundary + try testing.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block + try testing.expect(filter.testBlocked("172.16.0.0")); // MUST block + try testing.expect(filter.testBlocked("172.31.255.255")); // MUST block + try testing.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block + + // RFC1918 192.168.0.0/16 + try testing.expect(filter.testBlocked("192.168.0.1")); + try testing.expect(!filter.testBlocked("192.169.0.0")); + + // Link-local + try testing.expect(filter.testBlocked("169.254.1.1")); + try testing.expect(!filter.testBlocked("169.255.0.0")); + + // Public IP — must NOT be blocked + try testing.expect(!filter.testBlocked("8.8.8.8")); + try testing.expect(!filter.testBlocked("1.1.1.1")); + try testing.expect(!filter.testBlocked("93.184.216.34")); // example.com +} + +test "IpFilter: IPv6 CIDR matching: private group" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("::")); // unspecified + try testing.expect(filter.testBlocked("::1")); // localhost + try testing.expect(filter.testBlocked("fe80::1")); // link-local + try testing.expect(filter.testBlocked("fc00::1")); // ULA + try testing.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7) + try testing.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public + try testing.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare +} + +test "IpFilter: IPv4-mapped IPv6 bypass prevention" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + // ::ffff:127.0.0.1 must be blocked (maps to loopback) + try testing.expect(filter.testBlocked("::ffff:127.0.0.1")); + // ::ffff:10.0.0.1 must be blocked (maps to RFC1918) + try testing.expect(filter.testBlocked("::ffff:10.0.0.1")); + // ::ffff:8.8.8.8 must NOT be blocked (maps to public) + try testing.expect(!filter.testBlocked("::ffff:8.8.8.8")); +} + +test "IpFilter: fail-closed: unknown address family blocked by isBlockedSockaddr" { + const filter = IpFilter.init(false, null); + defer filter.deinit(testing.allocator); + + // Construct a sockaddr with an unknown address family + var sa: libcurl.CurlSockAddr = .{ + .family = 255, // not AF_INET or AF_INET6 + .socktype = posix.SOCK.STREAM, + .protocol = 0, + .addrlen = 0, + .addr = undefined, + }; + try testing.expect(filter.isBlockedSockaddr(&sa)); +} + +test "IpFilter: custom CIDR ranges" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("203.0.113.1")); // in custom range + try testing.expect(filter.testBlocked("203.0.113.255")); // in custom range + try testing.expect(!filter.testBlocked("203.0.114.0")); // outside custom range + try testing.expect(!filter.testBlocked("8.8.8.8")); // not in range +} + +test "IpFilter: private group blocks cloud metadata IP via link-local" { + // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. + // Users who want targeted cloud-metadata-only blocking can use --block-cidrs. + const filter_private = IpFilter.init(true, null); + defer filter_private.deinit(testing.allocator); + const filter_none = IpFilter.init(false, null); + defer filter_none.deinit(testing.allocator); + + try testing.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local + try testing.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled +} + +test "IpFilter: parseCidrList: mixed IPv4 and IPv6" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); + + try testing.expectEqual(2, cidrs.v4.len); + try testing.expectEqual(1, cidrs.v6.len); + + // spot-check: 203.0.113.0/24 and 192.168.1.0/24 + const f = IpFilter.init(false, cidrs); + defer f.deinit(testing.allocator); + try testing.expect(f.testBlocked("203.0.113.1")); + try testing.expect(!f.testBlocked("203.0.114.0")); + try testing.expect(f.testBlocked("192.168.1.1")); + try testing.expect(f.testBlocked("2001:db8::1")); + try testing.expect(!f.testBlocked("2001:db9::1")); +} + +test "IpFilter: allow list exempts from private blocking" { + const cidrs = try parseCidrList(testing.allocator, "-10.0.0.42/32,-fc00::1/128"); + const filter = IpFilter.init(true, cidrs); + defer filter.deinit(testing.allocator); + + // Allowed IPs pass through despite being in private ranges + try testing.expect(!filter.testBlocked("10.0.0.42")); + try testing.expect(!filter.testBlocked("fc00::1")); + + // Other private IPs still blocked + try testing.expect(filter.testBlocked("10.0.0.43")); + try testing.expect(filter.testBlocked("10.0.0.41")); + try testing.expect(filter.testBlocked("192.168.1.1")); + try testing.expect(filter.testBlocked("fc00::2")); +} + +test "IpFilter: allow list exempts from custom CIDR blocking" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24,-203.0.113.100/32"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(testing.allocator); + + try testing.expect(!filter.testBlocked("203.0.113.100")); // allowed + try testing.expect(filter.testBlocked("203.0.113.99")); // blocked + try testing.expect(filter.testBlocked("203.0.113.101")); // blocked +} + +test "IpFilter: parseCidrList: allow entries with '-' prefix" { + const cidrs = try parseCidrList(testing.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); + + try testing.expectEqual(1, cidrs.v4.len); + try testing.expectEqual(0, cidrs.v6.len); + try testing.expectEqual(1, cidrs.allow_v4.len); + try testing.expectEqual(1, cidrs.allow_v6.len); + + const f = IpFilter.init(false, cidrs); + defer f.deinit(testing.allocator); + try testing.expect(!f.testBlocked("10.0.0.42")); // allowed + try testing.expect(f.testBlocked("10.0.0.43")); // blocked + try testing.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed) +} + +test "IpFilter: parseCidrList: invalid input returns error" { + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "not-a-cidr")); + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/33")); // prefix too large + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0")); // missing prefix + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/abc")); // non-numeric prefix +} + +test "IpFilter: matchesCidrV4: exact match /32" { + const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 100 }, 32); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 100 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 101 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 99 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /0 matches everything" { + const cidr = CidrV4.fromPrefix(.{ 0, 0, 0, 0 }, 0); + try testing.expect(matchesCidrV4(.{ 0, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 255, 255, 255, 255 }, cidr)); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 1 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /8 boundary" { + const cidr = CidrV4.fromPrefix(.{ 10, 0, 0, 0 }, 8); + try testing.expect(matchesCidrV4(.{ 10, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 10, 255, 255, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 11, 0, 0, 0 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 9, 255, 255, 255 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /12 boundary (172.16.0.0/12)" { + const cidr = CidrV4.fromPrefix(.{ 172, 16, 0, 0 }, 12); + // In range + try testing.expect(matchesCidrV4(.{ 172, 16, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 172, 31, 255, 255 }, cidr)); + try testing.expect(matchesCidrV4(.{ 172, 20, 100, 50 }, cidr)); + // Out of range + try testing.expect(!matchesCidrV4(.{ 172, 15, 255, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 172, 32, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /24 network" { + const cidr = CidrV4.fromPrefix(.{ 203, 0, 113, 0 }, 24); + try testing.expect(matchesCidrV4(.{ 203, 0, 113, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 203, 0, 113, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 203, 0, 112, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 203, 0, 114, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV4: non-byte-aligned /25" { + const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 0 }, 25); + // 192.168.1.0 - 192.168.1.127 should match + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 127 }, cidr)); + // 192.168.1.128+ should not match + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 128 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 255 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /128 exact match" { + const addr: Ipv6Addr = .{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + const cidr = CidrV6.fromPrefix(addr, 128); + try testing.expect(matchesCidrV6(addr, cidr)); + + var different = addr; + different[15] = 2; + try testing.expect(!matchesCidrV6(different, cidr)); +} + +test "IpFilter: matchesCidrV6: /0 matches everything" { + const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); +} + +test "IpFilter: matchesCidrV6: /64 boundary" { + // 2001:db8::/64 + const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 64); + // In range - any suffix in lower 64 bits + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + // Out of range - different prefix + try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /48 network" { + // 2001:db8:abcd::/48 + const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 48); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /10 link-local (fe80::/10)" { + const cidr = CidrV6.fromPrefix(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10); + // fe80:: through febf:: should match (first 10 bits: 1111111010) + try testing.expect(matchesCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0xfe, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + // fec0:: should NOT match (11th bit differs) + try testing.expect(!matchesCidrV6(.{ 0xfe, 0xc0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: prefix > 64 bits (/96)" { + // ::ffff:0:0/96 (IPv4-mapped prefix) + const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0 }, 96); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 10, 0, 0, 1 }, cidr)); + try testing.expect(!matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xfe, 192, 168, 1, 1 }, cidr)); +} /// Test-only convenience: parse an IP string and check against the filter. /// Test inputs must be valid IPs; unreachable on parse failure. @@ -332,178 +622,3 @@ fn testBlocked(self: *const IpFilter, ip: []const u8) bool { } unreachable; } - -test "IPv4 CIDR matching: private group boundaries" { - const t = std.testing; - const filter = IpFilter.init(true, null); - defer filter.deinit(t.allocator); - - // Loopback - try t.expect(filter.testBlocked("127.0.0.1")); - try t.expect(filter.testBlocked("127.255.255.255")); - try t.expect(!filter.testBlocked("128.0.0.1")); - - // RFC1918 10.0.0.0/8 - try t.expect(filter.testBlocked("10.0.0.1")); - try t.expect(filter.testBlocked("10.255.255.255")); - try t.expect(!filter.testBlocked("11.0.0.0")); - - // RFC1918 172.16.0.0/12 — critical boundary - try t.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block - try t.expect(filter.testBlocked("172.16.0.0")); // MUST block - try t.expect(filter.testBlocked("172.31.255.255")); // MUST block - try t.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block - - // RFC1918 192.168.0.0/16 - try t.expect(filter.testBlocked("192.168.0.1")); - try t.expect(!filter.testBlocked("192.169.0.0")); - - // Link-local - try t.expect(filter.testBlocked("169.254.1.1")); - try t.expect(!filter.testBlocked("169.255.0.0")); - - // Public IP — must NOT be blocked - try t.expect(!filter.testBlocked("8.8.8.8")); - try t.expect(!filter.testBlocked("1.1.1.1")); - try t.expect(!filter.testBlocked("93.184.216.34")); // example.com -} - -test "IPv6 CIDR matching: private group" { - const t = std.testing; - const filter = IpFilter.init(true, null); - defer filter.deinit(t.allocator); - - try t.expect(filter.testBlocked("::1")); // localhost - try t.expect(filter.testBlocked("fe80::1")); // link-local - try t.expect(filter.testBlocked("fc00::1")); // ULA - try t.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7) - try t.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public - try t.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare -} - -test "IPv4-mapped IPv6 bypass prevention" { - const t = std.testing; - const filter = IpFilter.init(true, null); - defer filter.deinit(t.allocator); - - // ::ffff:127.0.0.1 must be blocked (maps to loopback) - try t.expect(filter.testBlocked("::ffff:127.0.0.1")); - // ::ffff:10.0.0.1 must be blocked (maps to RFC1918) - try t.expect(filter.testBlocked("::ffff:10.0.0.1")); - // ::ffff:8.8.8.8 must NOT be blocked (maps to public) - try t.expect(!filter.testBlocked("::ffff:8.8.8.8")); -} - -test "fail-closed: unknown address family blocked by isBlockedSockaddr" { - const t = std.testing; - const filter = IpFilter.init(false, null); - defer filter.deinit(t.allocator); - - // Construct a sockaddr with an unknown address family - var sa: libcurl.CurlSockAddr = .{ - .family = 255, // not AF_INET or AF_INET6 - .socktype = posix.SOCK.STREAM, - .protocol = 0, - .addrlen = 0, - .addr = undefined, - }; - try t.expect(filter.isBlockedSockaddr(&sa)); -} - -test "custom CIDR ranges" { - const t = std.testing; - const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24"); - const filter = IpFilter.init(false, cidrs); - defer filter.deinit(t.allocator); - - try t.expect(filter.testBlocked("203.0.113.1")); // in custom range - try t.expect(filter.testBlocked("203.0.113.255")); // in custom range - try t.expect(!filter.testBlocked("203.0.114.0")); // outside custom range - try t.expect(!filter.testBlocked("8.8.8.8")); // not in range -} - -test "private group blocks cloud metadata IP via link-local" { - // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. - // Users who want targeted cloud-metadata-only blocking can use --block-cidrs. - const t = std.testing; - const filter_private = IpFilter.init(true, null); - defer filter_private.deinit(t.allocator); - const filter_none = IpFilter.init(false, null); - defer filter_none.deinit(t.allocator); - - try t.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local - try t.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled -} - -test "parseCidrList: mixed IPv4 and IPv6" { - const t = std.testing; - const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); - - try t.expectEqual(2, cidrs.v4.len); - try t.expectEqual(1, cidrs.v6.len); - - // spot-check: 203.0.113.0/24 and 192.168.1.0/24 - const f = IpFilter.init(false, cidrs); - defer f.deinit(t.allocator); - try t.expect(f.testBlocked("203.0.113.1")); - try t.expect(!f.testBlocked("203.0.114.0")); - try t.expect(f.testBlocked("192.168.1.1")); - try t.expect(f.testBlocked("2001:db8::1")); - try t.expect(!f.testBlocked("2001:db9::1")); -} - -test "allow list exempts from private blocking" { - const t = std.testing; - const cidrs = try parseCidrList(t.allocator, "-10.0.0.42/32,-fc00::1/128"); - const filter = IpFilter.init(true, cidrs); - defer filter.deinit(t.allocator); - - // Allowed IPs pass through despite being in private ranges - try t.expect(!filter.testBlocked("10.0.0.42")); - try t.expect(!filter.testBlocked("fc00::1")); - - // Other private IPs still blocked - try t.expect(filter.testBlocked("10.0.0.43")); - try t.expect(filter.testBlocked("10.0.0.41")); - try t.expect(filter.testBlocked("192.168.1.1")); - try t.expect(filter.testBlocked("fc00::2")); -} - -test "allow list exempts from custom CIDR blocking" { - const t = std.testing; - const cidrs = try parseCidrList(t.allocator, "203.0.113.0/24,-203.0.113.100/32"); - const filter = IpFilter.init(false, cidrs); - defer filter.deinit(t.allocator); - - try t.expect(!filter.testBlocked("203.0.113.100")); // allowed - try t.expect(filter.testBlocked("203.0.113.99")); // blocked - try t.expect(filter.testBlocked("203.0.113.101")); // blocked -} - -test "parseCidrList: allow entries with '-' prefix" { - const t = std.testing; - const cidrs = try parseCidrList(t.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); - - try t.expectEqual(1, cidrs.v4.len); - try t.expectEqual(0, cidrs.v6.len); - try t.expectEqual(1, cidrs.allow_v4.len); - try t.expectEqual(1, cidrs.allow_v6.len); - - const f = IpFilter.init(false, cidrs); - defer f.deinit(t.allocator); - try t.expect(!f.testBlocked("10.0.0.42")); // allowed - try t.expect(f.testBlocked("10.0.0.43")); // blocked - try t.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed) -} - -test "parseCidrList: invalid input returns error" { - const t = std.testing; - try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "not-a-cidr")); - try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0/33")); // prefix too large - try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0")); // missing prefix - try t.expectError(error.InvalidCidr, parseCidrList(t.allocator, "10.0.0.0/abc")); // non-numeric prefix -} - -test { - std.testing.refAllDecls(@This()); -} From d19e62ec3c6d2bfb975e2b8f7505f00d35a8a794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= Date: Thu, 9 Apr 2026 21:56:37 +0200 Subject: [PATCH 07/21] http: add default write callback to prevent stdout pollution --- src/network/http.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/network/http.zig b/src/network/http.zig index ef3dfc65..c59a2756 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -421,6 +421,9 @@ pub const Connection = struct { // try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback); } + + // default write callback to prevent libcurl from writing to stdout + try self.setWriteCallback(discardBody); } fn discardBody(_: [*]const u8, count: usize, len: usize, _: ?*anyopaque) usize { From 075a660b2fb4ba7bf3e36ddca5e810214585d545 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 09:19:09 +0200 Subject: [PATCH 08/21] ci: use cache for snapshots Add a cache for v8 snapshot file. Use a cache key for v8 snapshot with the last hash changing src/browser/js/bridge.zig eg. v8-snapshot-4dcb2c997e01e4367ca6118629fb4ac712f9692c --- .github/workflows/e2e-test.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index f069ebb5..38500e4b 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -51,7 +51,24 @@ jobs: - uses: ./.github/actions/install + # Use the commit hash of src/browser/js/bridge.zig as cache key for + # snapshot. + - run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- src/browser/js/bridge.zig)" >> "$GITHUB_OUTPUT" + id: snapshot_cache_key + + # Fetch the cache for snapshot + - name: Cache V8 snapshot + id: cache-v8-snapshot + uses: actions/cache@v5 + env: + cache-name: cache-v8-snapshot + with: + path: src/snapshot.bin + key: ${{ steps.snapshot_cache_key.outputs.hash }} + + # Generate snapshot on cache miss. - name: v8 snapshot + if: ${{ steps.cache-v8-snapshot.outputs.cache-hit != 'true' }} run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin - name: zig build release From ca78bd7786cb7f777215361c7bf7829a2aed11e9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 09:52:43 +0200 Subject: [PATCH 09/21] ci: invalidate snapshot cache on src/browser/js/Snapshot.zig --- .github/workflows/e2e-test.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 38500e4b..f4d99ba5 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -51,9 +51,12 @@ jobs: - uses: ./.github/actions/install - # Use the commit hash of src/browser/js/bridge.zig as cache key for + # Use the commit hash of bridge.zig and Snapshot.zig as cache key for # snapshot. - - run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- src/browser/js/bridge.zig)" >> "$GITHUB_OUTPUT" + - run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- \ + src/browser/js/bridge.zig \ + src/browser/js/Snapshot.zig \ + )" >> "$GITHUB_OUTPUT" id: snapshot_cache_key # Fetch the cache for snapshot From 90069f6ab8b4b18cae1c4744d2d5fd5a6d27ce7b Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 10:10:21 +0200 Subject: [PATCH 10/21] ci: use a dedicated action for v8 snapshot --- .github/actions/v8-snapshot/action.yml | 32 ++++++++++++++++++++++++++ .github/workflows/e2e-test.yml | 24 +------------------ 2 files changed, 33 insertions(+), 23 deletions(-) create mode 100644 .github/actions/v8-snapshot/action.yml diff --git a/.github/actions/v8-snapshot/action.yml b/.github/actions/v8-snapshot/action.yml new file mode 100644 index 00000000..058c2221 --- /dev/null +++ b/.github/actions/v8-snapshot/action.yml @@ -0,0 +1,32 @@ +name: "V8 snaphsot" +description: "Generate v8 snapshot" + +runs: + using: "composite" + + steps: + # Use the commit hash of bridge.zig and Snapshot.zig as cache key for + # snapshot. + - name: V8 snapshot cache key + id: snapshot_cache_key + run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- + src/browser/js/bridge.zig + src/browser/js/Snapshot.zig + )" >> "$GITHUB_OUTPUT" + shell: bash + + # Fetch the cache for snapshot + - name: Cache V8 snapshot + id: cache-v8-snapshot + uses: actions/cache@v5 + env: + cache-name: cache-v8-snapshot + with: + path: src/snapshot.bin + key: ${{ steps.snapshot_cache_key.outputs.hash }} + + # Generate snapshot on cache miss. + - name: v8 snapshot + shell: bash + if: hashFiles('src/snapshot.bin') == '' + run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index f4d99ba5..c62f1411 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -50,29 +50,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/install - - # Use the commit hash of bridge.zig and Snapshot.zig as cache key for - # snapshot. - - run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- \ - src/browser/js/bridge.zig \ - src/browser/js/Snapshot.zig \ - )" >> "$GITHUB_OUTPUT" - id: snapshot_cache_key - - # Fetch the cache for snapshot - - name: Cache V8 snapshot - id: cache-v8-snapshot - uses: actions/cache@v5 - env: - cache-name: cache-v8-snapshot - with: - path: src/snapshot.bin - key: ${{ steps.snapshot_cache_key.outputs.hash }} - - # Generate snapshot on cache miss. - - name: v8 snapshot - if: ${{ steps.cache-v8-snapshot.outputs.cache-hit != 'true' }} - run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin + - uses: ./.github/actions/v8-snapshot - name: zig build release run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64 From bd4e88cec8d7119dd9179474a12e38c33bbf2fe9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 10:11:03 +0200 Subject: [PATCH 11/21] ci: typo fix --- .github/actions/install/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index f5bb72d3..3e29e96a 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -1,5 +1,5 @@ -name: "Browsercore install" -description: "Install deps for the project browsercore" +name: "Deps install" +description: "Install deps for the browser" inputs: arch: From d80e4227b4f6ba35136f70e8cd4f2ed32505971e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 9 Apr 2026 15:20:58 +0200 Subject: [PATCH 12/21] force an aggressive GC on v8 after snapshot creation --- src/browser/js/Snapshot.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/src/browser/js/Snapshot.zig b/src/browser/js/Snapshot.zig index 5a04861a..0b6a7fd1 100644 --- a/src/browser/js/Snapshot.zig +++ b/src/browser/js/Snapshot.zig @@ -126,6 +126,7 @@ pub fn create() !Snapshot { var data_start: usize = 0; const isolate = v8.v8__SnapshotCreator__getIsolate(snapshot_creator).?; + defer v8.v8__Isolate__LowMemoryNotification(isolate); { // CreateBlob, which we'll call once everything is setup, MUST NOT From 963682eb0dbefffe523924e1cc5493bf56c03e39 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Thu, 9 Apr 2026 18:08:44 +0200 Subject: [PATCH 13/21] ci: send wpt completion --- .github/workflows/wpt.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/wpt.yml b/.github/workflows/wpt.yml index c6cc7350..bfabc59c 100644 --- a/.github/workflows/wpt.yml +++ b/.github/workflows/wpt.yml @@ -170,6 +170,21 @@ jobs: cd ./wptdiff CGO_ENABLED=0 go build + - run: | + ./wptdiff/wptdiff --completion |tee completion.log + + - name: Send completion to slack + uses: slackapi/slack-github-action@v3.0.1 + with: + errors: true + method: files.uploadV2 + token: ${{ secrets.CI_SLACK_BOT_TOKEN }} + payload: | + channel_id: ${{ vars.WPT_SLACK_CHANNEL_ID }} + initial_comment: "Last WPT completion" + file: "./completion.log" + filename: "wpt-completion-${{ github.sha }}.txt" + - run: | ./wptdiff/wptdiff |tee diff.log From 828715b751b19190f36c13f5e9fdcb7dee308730 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 10:36:29 +0800 Subject: [PATCH 14/21] Improve TextDecoder to support all necessary encoding types Uses the newly added encoding_rs to implement TextDecoder for all encoding. Claude wrote 100% of the Rust binding. Improves various WPT tests, e.g. /encoding/api-basics.any.html. --- src/browser/parser/html5ever.zig | 62 +++++++ src/browser/tests/encoding/text_decoder.html | 73 +++++++- src/browser/tests/encoding/text_encoder.html | 3 + src/browser/tests/testing.js | 8 +- src/browser/webapi/encoding/TextDecoder.zig | 149 ++++++++++++--- src/browser/webapi/encoding/TextEncoder.zig | 19 +- src/html5ever/lib.rs | 183 +++++++++++++++++++ 7 files changed, 458 insertions(+), 39 deletions(-) diff --git a/src/browser/parser/html5ever.zig b/src/browser/parser/html5ever.zig index f6f81583..cb673789 100644 --- a/src/browser/parser/html5ever.zig +++ b/src/browser/parser/html5ever.zig @@ -216,3 +216,65 @@ pub extern "c" fn xml5ever_parse_document( appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void, appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void, ) void; + +// General encoding api +pub const EncodingInfo = extern struct { + found: u8, + handle: ?*anyopaque, + name_len: usize, + name_ptr: [*]const u8, + + pub fn isValid(self: *const EncodingInfo) bool { + return self.found != 0; + } + + pub fn name(self: *const EncodingInfo) []const u8 { + if (self.name_len == 0) { + return ""; + } + return self.name_ptr[0..self.name_len]; + } +}; + +pub const DecodeResult = extern struct { + had_errors: u8, + bytes_read: usize, + bytes_written: usize, + + pub fn hadErrors(self: *const DecodeResult) bool { + return self.had_errors != 0; + } +}; + +pub extern "c" fn encoding_for_label( + label: [*]const u8, + label_len: usize, +) EncodingInfo; + +pub extern "c" fn encoding_max_utf8_buffer_length( + handle: *anyopaque, + input_len: usize, +) usize; + +pub extern "c" fn encoding_decode( + handle: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_len: usize, + is_last: u8, +) DecodeResult; + +// Streaming decoder API +pub extern "c" fn encoding_decoder_new(handle: *anyopaque) ?*anyopaque; + +pub extern "c" fn encoding_decoder_decode( + decoder: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_len: usize, + is_last: u8, +) DecodeResult; + +pub extern "c" fn encoding_decoder_free(decoder: *anyopaque) void; diff --git a/src/browser/tests/encoding/text_decoder.html b/src/browser/tests/encoding/text_decoder.html index 2b01852e..6314c924 100644 --- a/src/browser/tests/encoding/text_decoder.html +++ b/src/browser/tests/encoding/text_decoder.html @@ -11,7 +11,6 @@ testing.expectEqual('', d1.decode()); testing.expectEqual('香料', d1.decode(new Uint8Array([233, 166, 153, 230, 150, 153]))); testing.expectEqual('香料', d1.decode(new Uint8Array([0xEF, 0xBB, 0xBF, 233, 166, 153, 230, 150, 153]))); - testing.expectEqual('�4', d1.decode(new Uint8Array([249, 52]))); { const buffer = new ArrayBuffer(6); @@ -38,7 +37,7 @@ } let d2 = new TextDecoder('utf8', {fatal: true}) - testing.expectError('Error: InvalidUtf8', () => { + testing.expectError('TypeError', () => { let data = new Uint8Array([241, 241, 159, 172]); d2.decode(data); }); @@ -46,8 +45,8 @@ + + + + + + diff --git a/src/browser/tests/encoding/text_encoder.html b/src/browser/tests/encoding/text_encoder.html index 540f60c1..99fd1959 100644 --- a/src/browser/tests/encoding/text_encoder.html +++ b/src/browser/tests/encoding/text_encoder.html @@ -5,6 +5,9 @@ diff --git a/src/browser/tests/testing.js b/src/browser/tests/testing.js index 037d15cf..12d0f761 100644 --- a/src/browser/tests/testing.js +++ b/src/browser/tests/testing.js @@ -37,7 +37,13 @@ function expectError(expected, fn) { withError((err) => { - expectEqual(true, err.toString().includes(expected)); + if (!err.toString().includes(expected)) { + console.error(`Expecte error to contains: ${expected}, was: ${err.toString()}`); + expectEqual(true, false); + } else { + // to record a successful case + expectTrue(true); + } }, fn); } diff --git a/src/browser/webapi/encoding/TextDecoder.zig b/src/browser/webapi/encoding/TextDecoder.zig index c117df09..1467aa86 100644 --- a/src/browser/webapi/encoding/TextDecoder.zig +++ b/src/browser/webapi/encoding/TextDecoder.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -19,6 +19,7 @@ const std = @import("std"); const lp = @import("lightpanda"); const js = @import("../../js/js.zig"); +const html5ever = @import("../../parser/html5ever.zig"); const Page = @import("../../Page.zig"); const Session = @import("../../Session.zig"); @@ -30,13 +31,11 @@ _rc: lp.RC(u8) = .{}, _fatal: bool, _arena: Allocator, _ignore_bom: bool, -_stream: std.ArrayList(u8), - -const Label = enum { - utf8, - @"utf-8", - @"unicode-1-1-utf-8", -}; +_bom_seen: bool, +_decoder: ?*anyopaque, // Persistent streaming decoder +_encoding_handle: *anyopaque, +_encoding_name: []const u8, +_lowercase_name: []const u8, // Cached lowercase version of encoding name const InitOpts = struct { fatal: bool = false, @@ -44,8 +43,17 @@ const InitOpts = struct { }; pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*TextDecoder { - if (label_) |label| { - _ = std.meta.stringToEnum(Label, label) orelse return error.RangeError; + const label = label_ orelse "utf-8"; + + const info = html5ever.encoding_for_label(label.ptr, label.len); + if (!info.isValid()) { + return error.RangeError; + } + + // Check for "replacement" encoding - it's not usable for decoding per spec + const enc_name = info.name(); + if (std.mem.eql(u8, enc_name, "replacement")) { + return error.RangeError; } const arena = try page.getArena(.{ .debug = "TextDecoder" }); @@ -55,14 +63,21 @@ pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*TextDecoder { const self = try arena.create(TextDecoder); self.* = .{ ._arena = arena, - ._stream = .empty, ._fatal = opts.fatal, ._ignore_bom = opts.ignoreBOM, + ._encoding_handle = info.handle.?, + ._decoder = null, + ._bom_seen = false, + ._lowercase_name = "", // Will be lazily allocated + ._encoding_name = enc_name, // Points to static Rust memory }; return self; } pub fn deinit(self: *TextDecoder, session: *Session) void { + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + } session.releaseArena(self._arena); } @@ -82,34 +97,110 @@ pub fn getFatal(self: *const TextDecoder) bool { return self._fatal; } +pub fn getEncoding(self: *TextDecoder) ![]const u8 { + // Spec requires lowercase encoding name + // Allocate buffer for lowercase name on first access + if (self._lowercase_name.len > 0) { + return self._lowercase_name; + } + self._lowercase_name = try std.ascii.allocLowerString(self._arena, self._encoding_name); + return self._lowercase_name; +} + const DecodeOpts = struct { stream: bool = false, }; + pub fn decode(self: *TextDecoder, input_: ?[]const u8, opts_: ?DecodeOpts) ![]const u8 { - var input = input_ orelse return ""; const opts: DecodeOpts = opts_ orelse .{}; + const input = input_ orelse ""; - if (self._stream.items.len > 0) { - try self._stream.appendSlice(self._arena, input); - input = self._stream.items; - } - - if (self._fatal and !std.unicode.utf8ValidateSlice(input)) { - if (opts.stream) { - if (self._stream.items.len == 0) { - try self._stream.appendSlice(self._arena, input); - } - return ""; + // For non-streaming calls, we don't need a persistent decoder + if (!opts.stream) { + // Reset decoder state if we had one + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + self._decoder = null; + } + } else if (self._decoder == null) { + self._decoder = html5ever.encoding_decoder_new(self._encoding_handle); + if (self._decoder == null) { + return error.OutOfMemory; } - return error.InvalidUtf8; } - self._stream.clearRetainingCapacity(); - if (self._ignore_bom == false and std.mem.startsWith(u8, input, &.{ 0xEF, 0xBB, 0xBF })) { - return input[3..]; + return self._decode(input, self._decoder); +} + +fn _decode(self: *TextDecoder, input: []const u8, streaming_decoder: ?*anyopaque) ![]const u8 { + if (input.len == 0) { + return ""; } - return input; + // Calculate max output size + const max_out = html5ever.encoding_max_utf8_buffer_length( + self._encoding_handle, + input.len, + ); + + if (max_out == 0) { + return ""; + } + + // Allocate output buffer + const output = try self._arena.alloc(u8, max_out); + + // Decode using either streaming or one-shot decoder + const result = if (streaming_decoder) |decoder| + html5ever.encoding_decoder_decode( + decoder, + input.ptr, + input.len, + output.ptr, + output.len, + 0, // is_last = false for streaming + ) + else + html5ever.encoding_decode( + self._encoding_handle, + input.ptr, + input.len, + output.ptr, + output.len, + 1, // is_last = true for one-shot + ); + + // Handle errors in fatal mode + if (self._fatal and result.hadErrors()) { + if (streaming_decoder != null) { + // Reset decoder on error + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + self._decoder = null; + } + } + self._bom_seen = false; + return error.TypeError; + } + + var decoded: []const u8 = output[0..result.bytes_written]; + + // Handle BOM stripping + if (!self._bom_seen and !self._ignore_bom) { + decoded = stripBom(decoded); + self._bom_seen = true; + } + + return decoded; +} + +fn stripBom(data: []const u8) []const u8 { + // UTF-8 BOM in decoded output appears as U+FEFF (EF BB BF in UTF-8) + const bom = "\u{FEFF}"; + if (std.mem.startsWith(u8, data, bom)) { + return data[bom.len..]; + } + return data; } pub const JsApi = struct { @@ -123,7 +214,7 @@ pub const JsApi = struct { pub const constructor = bridge.constructor(TextDecoder.init, .{}); pub const decode = bridge.function(TextDecoder.decode, .{}); - pub const encoding = bridge.property("utf-8", .{ .template = false }); + pub const encoding = bridge.accessor(TextDecoder.getEncoding, null, .{}); pub const fatal = bridge.accessor(TextDecoder.getFatal, null, .{}); pub const ignoreBOM = bridge.accessor(TextDecoder.getIgnoreBOM, null, .{}); }; diff --git a/src/browser/webapi/encoding/TextEncoder.zig b/src/browser/webapi/encoding/TextEncoder.zig index a6bff48e..112d2e32 100644 --- a/src/browser/webapi/encoding/TextEncoder.zig +++ b/src/browser/webapi/encoding/TextEncoder.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -26,12 +26,23 @@ pub fn init() TextEncoder { return .{}; } -pub fn encode(_: *const TextEncoder, v: []const u8) !js.TypedArray(u8) { - if (!std.unicode.utf8ValidateSlice(v)) { +pub fn encode(_: *const TextEncoder, v_: ?js.Value) !js.TypedArray(u8) { + const v = v_ orelse return .{ .values = "" }; + + if (v.isUndefined()) { + return .{ .values = "" }; + } + + if (v.isNull()) { + return .{ .values = "null" }; + } + + const str = try v.toStringSlice(); + if (!std.unicode.utf8ValidateSlice(str)) { return error.InvalidUtf8; } - return .{ .values = v }; + return .{ .values = str }; } pub const JsApi = struct { diff --git a/src/html5ever/lib.rs b/src/html5ever/lib.rs index 6fab9763..c684c039 100644 --- a/src/html5ever/lib.rs +++ b/src/html5ever/lib.rs @@ -151,6 +151,189 @@ pub extern "C" fn html5ever_parse_document_with_encoding( .one(StrTendril::from(decoded.as_ref())); } +// === Encoding API for TextDecoder === + +/// Result of encoding label lookup +#[repr(C)] +pub struct EncodingInfo { + /// 0 = not found, 1 = found + pub found: u8, + /// Opaque handle to the encoding (actually &'static Encoding) + pub handle: *const c_void, + /// Length of canonical name + pub name_len: usize, + /// Pointer to canonical encoding name (static, lowercase) + pub name_ptr: *const c_uchar, +} + +/// Look up an encoding by its label (case-insensitive, whitespace-trimmed) +#[no_mangle] +pub extern "C" fn encoding_for_label( + label: *const c_uchar, + label_len: usize, +) -> EncodingInfo { + if label.is_null() || label_len == 0 { + return EncodingInfo { + found: 0, + name_len: 0, + handle: std::ptr::null(), + name_ptr: std::ptr::null(), + }; + } + + let label_bytes = unsafe { std::slice::from_raw_parts(label, label_len) }; + + match Encoding::for_label(label_bytes) { + Some(encoding) => { + let name = encoding.name(); + EncodingInfo { + found: 1, + name_len: name.len(), + name_ptr: name.as_ptr(), + handle: encoding as *const _ as *const c_void, + } + } + None => EncodingInfo { + found: 0, + name_len: 0, + name_ptr: std::ptr::null(), + handle: std::ptr::null(), + }, + } +} + +/// Calculate maximum UTF-8 buffer size needed for decoding +#[no_mangle] +pub extern "C" fn encoding_max_utf8_buffer_length( + handle: *const c_void, + input_len: usize, +) -> usize { + if handle.is_null() { + return 0; + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let decoder = encoding.new_decoder(); + decoder.max_utf8_buffer_length(input_len).unwrap_or(0) +} + +/// Result of decoding operation +#[repr(C)] +pub struct DecodeResult { + /// 0 = no errors, 1 = had malformed sequences (replaced with U+FFFD) + pub had_errors: u8, + /// Number of input bytes consumed + pub bytes_read: usize, + /// Number of UTF-8 bytes written to output buffer + pub bytes_written: usize, +} + +/// Decode bytes from source encoding to UTF-8 +/// For streaming, set is_last=0; for final/complete decode, set is_last=1 +#[no_mangle] +pub extern "C" fn encoding_decode( + handle: *const c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_len: usize, + is_last: u8, +) -> DecodeResult { + if handle.is_null() || output.is_null() { + return DecodeResult { + had_errors: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let input_bytes = if input.is_null() || input_len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(input, input_len) } + }; + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; + + let mut decoder = encoding.new_decoder(); + let last = is_last != 0; + + let (result, bytes_read, bytes_written, had_errors) = + decoder.decode_to_utf8(input_bytes, output_slice, last); + + // If output buffer was too small, we still report what we could process + let _ = result; // CoderResult::InputEmpty or CoderResult::OutputFull + + DecodeResult { + had_errors: if had_errors { 1 } else { 0 }, + bytes_read, + bytes_written, + } +} + +// === Streaming Decoder API === + +use encoding_rs::Decoder; + +/// Create a streaming decoder that maintains state across calls +#[no_mangle] +pub extern "C" fn encoding_decoder_new(handle: *const c_void) -> *mut c_void { + if handle.is_null() { + return std::ptr::null_mut(); + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let decoder = Box::new(encoding.new_decoder()); + Box::into_raw(decoder) as *mut c_void +} + +/// Decode using a streaming decoder (maintains state for incomplete sequences) +#[no_mangle] +pub extern "C" fn encoding_decoder_decode( + decoder_ptr: *mut c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_len: usize, + is_last: u8, +) -> DecodeResult { + if decoder_ptr.is_null() || output.is_null() { + return DecodeResult { + had_errors: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let decoder: &mut Decoder = unsafe { &mut *(decoder_ptr as *mut Decoder) }; + let input_bytes = if input.is_null() || input_len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(input, input_len) } + }; + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; + + let last = is_last != 0; + let (result, bytes_read, bytes_written, had_errors) = + decoder.decode_to_utf8(input_bytes, output_slice, last); + + let _ = result; + + DecodeResult { + had_errors: if had_errors { 1 } else { 0 }, + bytes_read, + bytes_written, + } +} + +/// Free a streaming decoder +#[no_mangle] +pub extern "C" fn encoding_decoder_free(decoder_ptr: *mut c_void) { + if !decoder_ptr.is_null() { + unsafe { + drop(Box::from_raw(decoder_ptr as *mut Decoder)); + } + } +} + #[no_mangle] pub extern "C" fn html5ever_parse_fragment( html: *mut c_uchar, From f7c1710c2354d19a1625b2ae969cc36bb9f578a4 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 12:02:06 +0800 Subject: [PATCH 15/21] Expose correct charset document.characterSet, document.charset and document.inputEncoding now exposes the correct charset. --- src/browser/Page.zig | 41 +++++++++++------------- src/browser/tests/document/document.html | 4 +++ src/browser/tests/page/encoding.html | 4 +++ src/browser/webapi/Document.zig | 11 +++++-- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/browser/Page.zig b/src/browser/Page.zig index f12b606b..7c66faff 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -207,6 +207,9 @@ base_url: ?[:0]const u8 = null, // referer header cache. referer_header: ?[:0]const u8 = null, +// Document charset (canonical name from encoding_rs, static lifetime) +charset: []const u8 = "UTF-8", + // Arbitrary buffer. Need to temporarily lowercase a value? Use this. No lifetime // guarantee - it's valid until someone else uses it. buf: [BUF_SIZE]u8 = undefined, @@ -962,9 +965,13 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { switch (mime.content_type) { .text_html => { - self._parse_state = .{ .html = .{ - .mime = mime, - } }; + // Normalize and store the charset using encoding_rs canonical names + const charset_str = mime.charsetString(); + const info = h5e.encoding_for_label(charset_str.ptr, charset_str.len); + if (info.isValid()) { + self.charset = info.name(); + } + self._parse_state = .{ .html = .empty }; }, .application_json, .text_javascript, .text_css, .text_plain => { var arr: std.ArrayList(u8) = .empty; @@ -979,7 +986,7 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { } switch (self._parse_state) { - .html => |*html| try html.buf.appendSlice(self.arena, data), + .html => |*html| try html.appendSlice(self.arena, data), .text => |*buf| { // we have to escape the data... var v = data; @@ -1028,12 +1035,13 @@ fn pageDoneCallback(ctx: *anyopaque) !void { var parser = Parser.init(parse_arena, self.document.asNode(), self); switch (self._parse_state) { - .html => |*html_state| { - const raw_html = html_state.buf.items; - if (html_state.needsEncodingConversion()) { - parser.parseWithEncoding(raw_html, html_state.mime.charsetString()); - } else { + .html => |*html_buf| { + const raw_html = html_buf.items; + + if (std.mem.eql(u8, self.charset, "UTF-8")) { parser.parse(raw_html); + } else { + parser.parseWithEncoding(raw_html, self.charset); } self._script_manager.staticScriptsDone(); self._parse_state = .complete; @@ -3164,21 +3172,11 @@ const ParseState = union(enum) { pre, complete, err: anyerror, - html: Html, + html: std.ArrayList(u8), text: std.ArrayList(u8), image: std.ArrayList(u8), raw: std.ArrayList(u8), raw_done: []const u8, - - const Html = struct { - mime: Mime, - buf: std.ArrayList(u8) = .empty, - - fn needsEncodingConversion(self: *const Html) bool { - const charset = self.mime.charsetString(); - return !std.ascii.eqlIgnoreCase(charset, "utf-8") and !std.ascii.eqlIgnoreCase(charset, "utf8"); - } - }; }; const LoadState = enum { @@ -3628,9 +3626,6 @@ fn asUint(comptime string: anytype) std.meta.Int( const testing = @import("../testing.zig"); test "WebApi: Page" { - const filter: testing.LogFilter = .init(&.{ .http, .js }); - defer filter.deinit(); - try testing.htmlRunner("page", .{}); } diff --git a/src/browser/tests/document/document.html b/src/browser/tests/document/document.html index 74d8ff30..ede2b507 100644 --- a/src/browser/tests/document/document.html +++ b/src/browser/tests/document/document.html @@ -18,6 +18,10 @@ testing.expectEqual("visible", document.visibilityState); testing.expectEqual(false, document.prerendering); testing.expectEqual(undefined, Document.prerendering); + // characterSet should return canonical encoding name + testing.expectEqual("UTF-8", document.characterSet); + testing.expectEqual("UTF-8", document.charset); + testing.expectEqual("UTF-8", document.inputEncoding); diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index cf15c49b..6b48e4c3 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -1068,10 +1068,15 @@ pub const JsApi = struct { pub const hasFocus = bridge.function(Document.hasFocus, .{}); pub const prerendering = bridge.property(false, .{ .template = false }); - pub const characterSet = bridge.property("UTF-8", .{ .template = false }); - pub const charset = bridge.property("UTF-8", .{ .template = false }); - pub const inputEncoding = bridge.property("UTF-8", .{ .template = false }); + pub const characterSet = bridge.accessor(getCharacterSet, null, .{}); + pub const charset = bridge.accessor(getCharacterSet, null, .{}); + pub const inputEncoding = bridge.accessor(getCharacterSet, null, .{}); pub const compatMode = bridge.property("CSS1Compat", .{ .template = false }); + + fn getCharacterSet(self: *const Document) []const u8 { + const doc_page = self._page orelse return "UTF-8"; + return doc_page.charset; + } pub const referrer = bridge.property("", .{ .template = false }); }; From 05229fdc536645f9f21e2f40d5d2dfa2c5ed46e3 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 14:41:19 +0800 Subject: [PATCH 16/21] Use the document's charset to determine if/how to encode querystring Whenever we resolve a URL, say from `anchor.href`, we should consider the document's charset when encoding the querystring. This probably isn't the most important feature, but it makes tens of thousands of WPT cases pass, e.g /encoding/legacy-mb-tchinese/big5/big5-encode-href-errors-han.html?3001-4000 and /encoding/legacy-mb-japanese/euc-jp/eucjp-encode-href-errors-han.html?17001-18000 DOM elements previous called `URL.resolveURL(...)`. They now call `self.asNode().resolveURL(...)`, where `Node#resolveURL` will provide the document's charset. --- src/SemanticTree.zig | 2 +- src/browser/Page.zig | 4 +- src/browser/URL.zig | 91 ++++++++++++---- src/browser/interactive.zig | 2 +- src/browser/markdown.zig | 6 +- src/browser/parser/html5ever.zig | 24 +++++ src/browser/structured_data.zig | 2 +- src/browser/tests/page/encoding.html | 29 ++++++ src/browser/webapi/Node.zig | 13 +++ src/browser/webapi/element/html/Anchor.zig | 7 +- src/browser/webapi/element/html/Form.zig | 2 +- src/browser/webapi/element/html/IFrame.zig | 4 +- src/browser/webapi/element/html/Image.zig | 4 +- src/browser/webapi/element/html/Link.zig | 4 +- src/browser/webapi/element/html/Media.zig | 3 +- src/browser/webapi/element/html/Script.zig | 4 +- src/browser/webapi/element/html/Video.zig | 4 +- src/browser/webapi/net/WebSocket.zig | 2 +- src/browser/webapi/net/XMLHttpRequest.zig | 2 +- src/cdp/domains/page.zig | 2 +- src/cdp/domains/target.zig | 2 +- src/html5ever/lib.rs | 114 +++++++++++++++++++++ src/lightpanda.zig | 2 +- 23 files changed, 276 insertions(+), 53 deletions(-) diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 9bca520e..5b3f0ae5 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -671,7 +671,7 @@ pub fn getNodeDetails( if (el.getAttributeSafe(comptime .wrap("href"))) |h| { const URL = lp.URL; - href = URL.resolve(arena, page.base(), h, .{ .encode = true }) catch h; + href = URL.resolve(arena, page.base(), h, .{ .encoding = page.charset }) catch h; } if (el.is(Element.Html.Input)) |input| { diff --git a/src/browser/Page.zig b/src/browser/Page.zig index 7c66faff..1c3d39f0 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -661,7 +661,7 @@ fn scheduleNavigationWithArena(originator: *Page, arena: Allocator, request_url: arena, page_base, request_url, - .{ .always_dupe = true, .encode = true }, + .{ .always_dupe = true, .encoding = originator.charset }, ); break :blk .{ u, false }; }; @@ -1196,7 +1196,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *IFrame) !void { self.call_arena, // ok to use, page.navigate dupes this self.base(), src, - .{ .encode = true }, + .{ .encoding = self.charset }, ); }; diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 6f8cbebd..532f11a1 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -19,16 +19,19 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const ResolveOpts = struct { - encode: bool = false, +pub const ResolveOpts = struct { + /// null = don't encode, "UTF-8" = standard percent encoding, + /// other charset = encode query string using that charset with NCR fallback + encoding: ?[]const u8 = null, always_dupe: bool = false, }; // path is anytype, so that it can be used with both []const u8 and [:0]const u8 -pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 { +pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, opts: ResolveOpts) ![:0]const u8 { const PT = @TypeOf(source_path); - var path: [:0]const u8 = if (comptime !isNullTerminated(PT) or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path; + const needs_dupe = comptime !isNullTerminated(PT); + var path: [:0]const u8 = if (needs_dupe or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path; if (base.len == 0) { return processResolved(allocator, path, opts); @@ -186,14 +189,12 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c return processResolved(allocator, out[0..out_i :0], opts); } -fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { - if (!comptime opts.encode) { - return url; - } - return ensureEncoded(allocator, url); +fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) ![:0]const u8 { + const encoding = opts.encoding orelse return url; + return ensureEncoded(allocator, url, encoding); } -pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { +pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8, encoding: []const u8) ![:0]const u8 { const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; @@ -205,18 +206,18 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end; const path_to_encode = url[path_start..path_end]; + // Path is always UTF-8 percent encoded per URL spec const encoded_path = try percentEncodeSegment(allocator, path_to_encode, .path); + // Query string uses document encoding const encoded_query = if (query_start) |qs| blk: { const query_to_encode = url[qs + 1 .. query_end]; - const encoded = try percentEncodeSegment(allocator, query_to_encode, .query); - break :blk encoded; + break :blk try encodeQueryString(allocator, query_to_encode, encoding); } else null; const encoded_fragment = if (fragment_start) |fs| blk: { const fragment_to_encode = url[fs + 1 ..]; - const encoded = try percentEncodeSegment(allocator, fragment_to_encode, .query); - break :blk encoded; + break :blk try percentEncodeSegment(allocator, fragment_to_encode, .query); } else null; if (encoded_path.ptr == path_to_encode.ptr and @@ -242,7 +243,7 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { return buf.items[0 .. buf.items.len - 1 :0]; } -const EncodeSet = enum { path, query, userinfo, fragment }; +const EncodeSet = enum { path, query, query_legacy, userinfo, fragment }; fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 { // Check if encoding is needed @@ -285,17 +286,65 @@ fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime enco return buf.items; } +const h5e = @import("parser/html5ever.zig"); + +/// Encode a query string using the specified encoding. +/// For UTF-8, this is standard percent encoding. +/// For legacy encodings, unmappable characters are replaced with NCRs (&#codepoint;). +fn encodeQueryString(allocator: Allocator, query: []const u8, encoding: []const u8) ![]const u8 { + // For UTF-8, use standard percent encoding + if (std.mem.eql(u8, encoding, "UTF-8")) { + return percentEncodeSegment(allocator, query, .query); + } + + // For legacy encodings, first encode to the target charset with NCR fallback + const enc_info = h5e.encoding_for_label(encoding.ptr, encoding.len); + if (!enc_info.isValid()) { + // Unknown encoding, fall back to UTF-8 + return percentEncodeSegment(allocator, query, .query); + } + + // Calculate max buffer size for encoded output + const max_encoded_len = h5e.encoding_max_encode_buffer_length(enc_info.handle.?, query.len); + if (max_encoded_len == 0) { + return percentEncodeSegment(allocator, query, .query); + } + + const encode_buf = try allocator.alloc(u8, max_encoded_len); + defer allocator.free(encode_buf); + + // Encode UTF-8 to legacy encoding with NCR fallback + const result = h5e.encoding_encode_with_ncr( + enc_info.handle.?, + query.ptr, + query.len, + encode_buf.ptr, + encode_buf.len, + ); + + if (!result.isSuccess()) { + // Encoding failed, fall back to UTF-8 + return percentEncodeSegment(allocator, query, .query); + } + + // Now percent-encode the result using query_legacy to preserve NCRs + const encoded_bytes = encode_buf[0..result.bytes_written]; + return percentEncodeSegment(allocator, encoded_bytes, .query_legacy); +} + fn shouldPercentEncode(c: u8, comptime encode_set: EncodeSet) bool { return switch (c) { // Unreserved characters (RFC 3986) 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false, - // sub-delims allowed in path/query but some must be encoded in userinfo - '!', '$', '&', '\'', '(', ')', '*', '+', ',' => false, - ';', '=' => encode_set == .userinfo, + // sub-delims allowed in path/query but some must be encoded in userinfo/query_legacy + '!', '$', '\'', '(', ')', '*', '+', ',' => false, + // '&' and ';' must be encoded for legacy encoding (to preserve NCRs like &#nnnnn;) + '&', ';' => encode_set == .userinfo or encode_set == .query_legacy, + '=' => encode_set == .userinfo, // Separators: userinfo must encode these '/', ':', '@' => encode_set == .userinfo, // '?' is allowed in queries only - '?' => encode_set != .query, + '?' => encode_set != .query and encode_set != .query_legacy, // '#' is allowed in fragments only '#' => encode_set != .fragment, // Everything else needs encoding (including space) @@ -1130,7 +1179,7 @@ test "URL: ensureEncoded" { }; for (cases) |case| { - const result = try ensureEncoded(testing.arena_allocator, case.url); + const result = try ensureEncoded(testing.arena_allocator, case.url, "UTF-8"); try testing.expectString(case.expected, result); } } @@ -1296,7 +1345,7 @@ test "URL: resolve with encoding" { }; for (cases) |case| { - const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true }); + const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encoding = "UTF-8" }); try testing.expectString(case.expected, result); } } diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index a0b4528a..225633c7 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -182,7 +182,7 @@ pub fn collectInteractiveElements( .id = el.getAttributeSafe(comptime .wrap("id")), .class = el.getAttributeSafe(comptime .wrap("class")), .href = if (el.getAttributeSafe(comptime .wrap("href"))) |href| - URL.resolve(arena, page.base(), href, .{ .encode = true }) catch href + URL.resolve(arena, page.base(), href, .{ .encoding = page.charset }) catch href else null, .input_type = getInputType(el), diff --git a/src/browser/markdown.zig b/src/browser/markdown.zig index 5a83dfdc..437dbee6 100644 --- a/src/browser/markdown.zig +++ b/src/browser/markdown.zig @@ -278,7 +278,8 @@ const Context = struct { } try self.writer.writeAll("]("); if (el.getAttributeSafe(comptime .wrap("src"))) |src| { - const absolute_src = URL.resolve(self.page.call_arena, self.page.base(), src, .{ .encode = true }) catch src; + const page = self.page; + const absolute_src = URL.resolve(page.call_arena, page.base(), src, .{ .encoding = page.charset }) catch src; try self.writer.writeAll(absolute_src); } try self.writer.writeAll(")"); @@ -286,13 +287,14 @@ const Context = struct { return; }, .anchor => { + const page = self.page; const info = analyzeContent(el.asNode()); const label = getAnchorLabel(el); const href_raw = el.getAttributeSafe(comptime .wrap("href")); if (!info.has_visible and label == null and href_raw == null) return; - const href = if (href_raw) |h| URL.resolve(self.page.call_arena, self.page.base(), h, .{ .encode = true }) catch h else null; + const href = if (href_raw) |h| URL.resolve(page.call_arena, page.base(), h, .{ .encoding = page.charset }) catch h else null; if (info.has_block) { try self.renderChildren(el.asNode()); diff --git a/src/browser/parser/html5ever.zig b/src/browser/parser/html5ever.zig index cb673789..829ac429 100644 --- a/src/browser/parser/html5ever.zig +++ b/src/browser/parser/html5ever.zig @@ -278,3 +278,27 @@ pub extern "c" fn encoding_decoder_decode( ) DecodeResult; pub extern "c" fn encoding_decoder_free(decoder: *anyopaque) void; + +// Encoding API (UTF-8 to legacy encoding with NCR fallback) +pub const EncodeResult = extern struct { + status: u8, + bytes_read: usize, + bytes_written: usize, + + pub fn isSuccess(self: *const EncodeResult) bool { + return self.status == 0; + } +}; + +pub extern "c" fn encoding_encode_with_ncr( + handle: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_capacity: usize, +) EncodeResult; + +pub extern "c" fn encoding_max_encode_buffer_length( + handle: *anyopaque, + input_len: usize, +) usize; diff --git a/src/browser/structured_data.zig b/src/browser/structured_data.zig index 9b6e7fbe..cad1d9d8 100644 --- a/src/browser/structured_data.zig +++ b/src/browser/structured_data.zig @@ -288,7 +288,7 @@ fn collectLink( ) !void { const rel = el.getAttributeSafe(comptime .wrap("rel")) orelse return; const raw_href = el.getAttributeSafe(comptime .wrap("href")) orelse return; - const href = URL.resolve(arena, page.base(), raw_href, .{ .encode = true }) catch raw_href; + const href = URL.resolve(arena, page.base(), raw_href, .{ .encoding = page.charset }) catch raw_href; if (std.ascii.eqlIgnoreCase(rel, "alternate")) { try alternate.append(arena, .{ diff --git a/src/browser/tests/page/encoding.html b/src/browser/tests/page/encoding.html index 19e0134f..b740a465 100644 --- a/src/browser/tests/page/encoding.html +++ b/src/browser/tests/page/encoding.html @@ -77,3 +77,32 @@ }); } + + diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 0e7c2ffe..5871abee 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -22,6 +22,7 @@ const String = @import("../../string.zig").String; const js = @import("../js/js.zig"); const Page = @import("../Page.zig"); +const URL = @import("../URL.zig"); const reflect = @import("../reflect.zig"); const EventTarget = @import("EventTarget.zig"); @@ -511,6 +512,18 @@ pub fn ownerPage(self: *const Node, default: *Page) *Page { return doc._page orelse default; } +pub const ResolveURLOpts = struct { + allocator: ?Allocator = null, +}; + +// Resolve a URL relative to this node's owning document. +// Uses the document's charset for query string encoding (with NCR fallback for unmappable chars). +pub fn resolveURL(self: *const Node, url: anytype, page: *Page, opts: ResolveURLOpts) ![:0]const u8 { + const owner_page = self.ownerPage(page); + const allocator = opts.allocator orelse page.call_arena; + return URL.resolve(allocator, owner_page.base(), url, .{ .encoding = owner_page.charset }); +} + pub fn isSameDocumentAs(self: *const Node, other: *const Node, page: *const Page) bool { // Get the root document for each node const self_doc = if (self._type == .document) self._type.document else self.ownerDocument(page); diff --git a/src/browser/webapi/element/html/Anchor.zig b/src/browser/webapi/element/html/Anchor.zig index 33c8bded..e4207e84 100644 --- a/src/browser/webapi/element/html/Anchor.zig +++ b/src/browser/webapi/element/html/Anchor.zig @@ -39,12 +39,11 @@ pub fn asNode(self: *Anchor) *Node { } pub fn getHref(self: *Anchor, page: *Page) ![]const u8 { - const element = self.asElement(); - const href = element.getAttributeSafe(comptime .wrap("href")) orelse return ""; + const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return ""; if (href.len == 0) { return ""; } - return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true }); + return self.asNode().resolveURL(href, page, .{}); } pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void { @@ -203,7 +202,7 @@ fn getResolvedHref(self: *Anchor, page: *Page) !?[:0]const u8 { if (href.len == 0) { return null; } - return try URL.resolve(page.call_arena, page.base(), href, .{}); + return try self.asNode().resolveURL(href, page, .{}); } pub const JsApi = struct { diff --git a/src/browser/webapi/element/html/Form.zig b/src/browser/webapi/element/html/Form.zig index e8857e48..6628306b 100644 --- a/src/browser/webapi/element/html/Form.zig +++ b/src/browser/webapi/element/html/Form.zig @@ -97,7 +97,7 @@ pub fn getAction(self: *Form, page: *Page) ![]const u8 { if (action.len == 0) { return page.url; } - return URL.resolve(page.call_arena, page.base(), action, .{ .encode = true }); + return element.asNode().resolveURL(action, page, .{}); } pub fn setAction(self: *Form, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/IFrame.zig b/src/browser/webapi/element/html/IFrame.zig index e596f4ac..3b276dcd 100644 --- a/src/browser/webapi/element/html/IFrame.zig +++ b/src/browser/webapi/element/html/IFrame.zig @@ -48,9 +48,9 @@ pub fn getContentDocument(self: *const IFrame) ?*Document { return window._document; } -pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 { +pub fn getSrc(self: *IFrame, page: *Page) ![:0]const u8 { if (self._src.len == 0) return ""; - return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true }); + return self.asNode().resolveURL(self._src, page, .{}); } pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Image.zig b/src/browser/webapi/element/html/Image.zig index b6731144..e3b57cd9 100644 --- a/src/browser/webapi/element/html/Image.zig +++ b/src/browser/webapi/element/html/Image.zig @@ -40,9 +40,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 { if (src.len == 0) { return ""; } - - // Always resolve the src against the page URL - return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }); + return element.asConstNode().resolveURL(src, page, .{}); } pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig index ed3839f2..5b6ce0c6 100644 --- a/src/browser/webapi/element/html/Link.zig +++ b/src/browser/webapi/element/html/Link.zig @@ -44,9 +44,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 { if (href.len == 0) { return ""; } - - // Always resolve the href against the page URL - return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true }); + return element.asNode().resolveURL(href, page, .{}); } pub fn setHref(self: *Link, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Media.zig b/src/browser/webapi/element/html/Media.zig index 71013e71..6d62013f 100644 --- a/src/browser/webapi/element/html/Media.zig +++ b/src/browser/webapi/element/html/Media.zig @@ -235,8 +235,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 { if (src.len == 0) { return ""; } - const URL = @import("../../URL.zig"); - return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }); + return element.asConstNode().resolveURL(src, page, .{}); } pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Script.zig b/src/browser/webapi/element/html/Script.zig index d5e83b4f..77b6b7ef 100644 --- a/src/browser/webapi/element/html/Script.zig +++ b/src/browser/webapi/element/html/Script.zig @@ -45,9 +45,9 @@ pub fn asNode(self: *Script) *Node { return self.asElement().asNode(); } -pub fn getSrc(self: *const Script, page: *Page) ![]const u8 { +pub fn getSrc(self: *Script, page: *Page) ![]const u8 { if (self._src.len == 0) return ""; - return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true }); + return self.asNode().resolveURL(self._src, page, .{}); } pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Video.zig b/src/browser/webapi/element/html/Video.zig index 63ccda4a..8fabb3ae 100644 --- a/src/browser/webapi/element/html/Video.zig +++ b/src/browser/webapi/element/html/Video.zig @@ -57,9 +57,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 { if (poster.len == 0) { return ""; } - - const URL = @import("../../URL.zig"); - return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true }); + return element.asConstNode().resolveURL(poster, page, .{}); } pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/net/WebSocket.zig b/src/browser/webapi/net/WebSocket.zig index 1244a61e..c5228627 100644 --- a/src/browser/webapi/net/WebSocket.zig +++ b/src/browser/webapi/net/WebSocket.zig @@ -108,7 +108,7 @@ pub fn init(url: []const u8, protocols_: ?[]const u8, page: *Page) !*WebSocket { const arena = try page.getArena(.{ .debug = "WebSocket" }); errdefer page.releaseArena(arena); - const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encode = true }); + const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset }); const http_client = page._session.browser.http_client; const conn = http_client.network.newConnection() orelse { diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 62e05a17..8a56d370 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -210,7 +210,7 @@ pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void const page = self._page; self._method = try parseMethod(method_); - self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encode = true }); + self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset }); try self.stateChanged(.opened, page); } diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 267cada8..beb86c6b 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -286,7 +286,7 @@ fn navigate(cmd: *CDP.Command) !void { page = try session.replacePage(); } - const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8"); try page.navigate(encoded_url, .{ .reason = .address_bar, .cdp_id = cmd.input.id, diff --git a/src/cdp/domains/target.zig b/src/cdp/domains/target.zig index bce7e00d..822659f7 100644 --- a/src/cdp/domains/target.zig +++ b/src/cdp/domains/target.zig @@ -220,7 +220,7 @@ fn createTarget(cmd: *CDP.Command) !void { } if (!std.mem.eql(u8, "about:blank", params.url)) { - const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8"); try page.navigate( encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null } }, diff --git a/src/html5ever/lib.rs b/src/html5ever/lib.rs index c684c039..9d14e784 100644 --- a/src/html5ever/lib.rs +++ b/src/html5ever/lib.rs @@ -334,6 +334,120 @@ pub extern "C" fn encoding_decoder_free(decoder_ptr: *mut c_void) { } } +// === Encoding API (UTF-8 to legacy encoding with NCR fallback) === + +/// Result of encoding operation +#[repr(C)] +pub struct EncodeResult { + /// 0 = success, 1 = output buffer too small + pub status: u8, + /// Number of input bytes consumed + pub bytes_read: usize, + /// Number of bytes written to output buffer + pub bytes_written: usize, +} + +/// Encode UTF-8 to a legacy encoding, replacing unencodable characters with +/// HTML decimal numeric character references (&#codepoint;). +/// +/// This is used for URL query string encoding per WHATWG URL spec. +/// encoding_rs's encode_from_utf8 already produces NCRs for unmappable chars. +#[no_mangle] +pub extern "C" fn encoding_encode_with_ncr( + handle: *const c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_capacity: usize, +) -> EncodeResult { + if handle.is_null() || output.is_null() { + return EncodeResult { + status: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + + let input_str = if input.is_null() || input_len == 0 { + "" + } else { + let bytes = unsafe { std::slice::from_raw_parts(input, input_len) }; + match std::str::from_utf8(bytes) { + Ok(s) => s, + Err(_) => { + return EncodeResult { + status: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + } + }; + + // For UTF-8 encoding, just copy directly (no NCR needed) + if encoding == encoding_rs::UTF_8 { + if input_len > output_capacity { + return EncodeResult { + bytes_read: 0, + bytes_written: 0, + status: 1, + }; + } + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) }; + output_slice[..input_len].copy_from_slice(input_str.as_bytes()); + return EncodeResult { + bytes_read: input_len, + bytes_written: input_len, + status: 0, + }; + } + + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) }; + let mut encoder = encoding.new_encoder(); + + // encode_from_utf8 automatically produces NCRs for unmappable characters + let (result, bytes_read, bytes_written, _had_unmappables) = + encoder.encode_from_utf8(input_str, output_slice, true); + + match result { + encoding_rs::CoderResult::InputEmpty => EncodeResult { + bytes_read, + bytes_written, + status: 0, + }, + encoding_rs::CoderResult::OutputFull => EncodeResult { + bytes_read, + bytes_written, + status: 1, + }, + } +} + +/// Calculate maximum output buffer size needed for encoding with NCR fallback. +/// Worst case: every character becomes &#codepoint; where codepoint is up to 7 digits. +#[no_mangle] +pub extern "C" fn encoding_max_encode_buffer_length( + handle: *const c_void, + input_len: usize, +) -> usize { + if handle.is_null() { + return 0; + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let encoder = encoding.new_encoder(); + // This returns the max buffer size accounting for NCR expansion + encoder + .max_buffer_length_from_utf8_if_no_unmappables(input_len) + .map(|len| { + // Add extra space for potential NCRs (each char could become &#nnnnnn; = 10 bytes) + // But realistically, most chars are mappable, so add 2x as safety margin + len.saturating_mul(2) + }) + .unwrap_or(input_len * 10) +} + #[no_mangle] pub extern "C" fn html5ever_parse_fragment( html: *mut c_uchar, diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 4d6c23fb..b0356e93 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -107,7 +107,7 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { // } // } - const encoded_url = try URL.ensureEncoded(page.call_arena, url); + const encoded_url = try URL.ensureEncoded(page.call_arena, url, "UTF-8"); _ = try page.navigate(encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null }, From a5bf1f07afdb6b1d2bcff15702022d0a297144e4 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 15:09:32 +0800 Subject: [PATCH 17/21] chore: trigger CI From 7c6624014683e845708db81b45e77b2930e6cb56 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 15:41:38 +0800 Subject: [PATCH 18/21] chore: trigger CI From 36fcb0fd7f94903d146d75614f9dfac1f360d531 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 11:02:04 +0200 Subject: [PATCH 19/21] ci: use a longer timeout for e2e test When we have to generate a snapshot, the build duration is longer. --- .github/workflows/e2e-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index c62f1411..50af9e91 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -39,7 +39,7 @@ jobs: name: zig build release runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 20 # Don't run the CI with draft PR. if: github.event.pull_request.draft == false From e53e4579abd94788b078cdca0213903d7241bf99 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Fri, 10 Apr 2026 10:34:03 +0200 Subject: [PATCH 20/21] ci: use v8 snapshot cache w/ wpt test --- .github/actions/v8-snapshot/action.yml | 12 +++++++++++- .github/workflows/wpt.yml | 6 ++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/actions/v8-snapshot/action.yml b/.github/actions/v8-snapshot/action.yml index 058c2221..dc73b48a 100644 --- a/.github/actions/v8-snapshot/action.yml +++ b/.github/actions/v8-snapshot/action.yml @@ -1,6 +1,16 @@ name: "V8 snaphsot" description: "Generate v8 snapshot" +inputs: + arch: + description: 'CPU arch used to select the v8 lib' + required: false + default: 'x86_64' + os: + description: 'OS used to select the v8 lib' + required: false + default: 'linux' + runs: using: "composite" @@ -9,7 +19,7 @@ runs: # snapshot. - name: V8 snapshot cache key id: snapshot_cache_key - run: echo "hash=v8-snapshot-$(git log -n 1 --pretty=format:%H -- + run: echo "hash=v8-snapshot-${{ inputs.os }}_${{ inputs.arch }}-$(git log -n 1 --pretty=format:%H -- src/browser/js/bridge.zig src/browser/js/Snapshot.zig )" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/wpt.yml b/.github/workflows/wpt.yml index bfabc59c..8a3b8516 100644 --- a/.github/workflows/wpt.yml +++ b/.github/workflows/wpt.yml @@ -36,8 +36,10 @@ jobs: os: ${{env.OS}} arch: ${{env.ARCH}} - - name: v8 snapshot - run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin + - uses: ./.github/actions/v8-snapshot + with: + os: ${{env.OS}} + arch: ${{env.ARCH}} - name: zig build release run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=generic From ddf614a9d5ed4dbf5e8a58b79fc6bca5bfec76b7 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 10 Apr 2026 19:09:18 +0800 Subject: [PATCH 21/21] Add arena buckets to ArenaPool ArenaPool previously maintained up to 512 16KB buckets. The 16KB retention is small for things like XHR and scripts, but increasing it to something more reasonably, like 128KB, would use up to 8x more memory. This commit adds 4 buckets: 1KB, 4KB, 16KB and 128KB. Callers can request a tiny, small, medium or large bucket. We end up using less memory peak memory and less allocations. Furthermore, callers can request a specific size. This is particularly useful for WebSocket or Blob where the size could vary greatly (so we'd likely default to a large bucket), but that could needlessly use up a large arena. The bucket sizes were derived from analyzing allocations. A significant number of allocations were very small. Things like ScheduleCallback and FinalizerCallback are always less than 1K and can be generated in the thousands. The 16KB retention was wasteful in these cases...better to have a large number of 1K pools, so that we can have a handful of very large buffers. --- src/App.zig | 2 +- src/ArenaPool.zig | 291 ++++++++++++------ src/browser/HttpClient.zig | 3 +- src/browser/Page.zig | 14 +- src/browser/Runner.zig | 2 +- src/browser/ScriptManager.zig | 6 +- src/browser/Session.zig | 12 +- src/browser/StyleManager.zig | 2 +- src/browser/js/Env.zig | 2 +- src/browser/js/Local.zig | 2 +- src/browser/js/Origin.zig | 2 +- src/browser/js/String.zig | 30 +- src/browser/webapi/Blob.zig | 10 +- src/browser/webapi/DOMParser.zig | 2 +- src/browser/webapi/Document.zig | 2 +- src/browser/webapi/Event.zig | 4 +- src/browser/webapi/File.zig | 2 +- src/browser/webapi/FileReader.zig | 2 +- src/browser/webapi/IntersectionObserver.zig | 4 +- src/browser/webapi/MutationObserver.zig | 8 +- src/browser/webapi/Permissions.zig | 2 +- src/browser/webapi/Range.zig | 4 +- src/browser/webapi/Window.zig | 4 +- src/browser/webapi/animation/Animation.zig | 2 +- src/browser/webapi/collections/ChildNodes.zig | 2 +- src/browser/webapi/css/FontFace.zig | 2 +- src/browser/webapi/css/FontFaceSet.zig | 2 +- src/browser/webapi/element/Html.zig | 2 +- src/browser/webapi/encoding/TextDecoder.zig | 2 +- src/browser/webapi/event/CloseEvent.zig | 4 +- src/browser/webapi/event/CompositionEvent.zig | 2 +- src/browser/webapi/event/CustomEvent.zig | 2 +- src/browser/webapi/event/ErrorEvent.zig | 4 +- src/browser/webapi/event/FocusEvent.zig | 4 +- src/browser/webapi/event/FormDataEvent.zig | 4 +- src/browser/webapi/event/InputEvent.zig | 4 +- src/browser/webapi/event/KeyboardEvent.zig | 4 +- src/browser/webapi/event/MessageEvent.zig | 4 +- src/browser/webapi/event/MouseEvent.zig | 4 +- .../NavigationCurrentEntryChangeEvent.zig | 4 +- .../webapi/event/PageTransitionEvent.zig | 4 +- src/browser/webapi/event/PointerEvent.zig | 2 +- src/browser/webapi/event/PopStateEvent.zig | 4 +- src/browser/webapi/event/ProgressEvent.zig | 4 +- .../webapi/event/PromiseRejectionEvent.zig | 2 +- src/browser/webapi/event/SubmitEvent.zig | 4 +- src/browser/webapi/event/TextEvent.zig | 2 +- src/browser/webapi/event/UIEvent.zig | 2 +- src/browser/webapi/event/WheelEvent.zig | 2 +- src/browser/webapi/net/Response.zig | 4 +- src/browser/webapi/net/WebSocket.zig | 16 +- src/browser/webapi/net/XMLHttpRequest.zig | 2 +- src/browser/webapi/selector/Selector.zig | 2 +- src/cdp/CDP.zig | 4 +- 54 files changed, 314 insertions(+), 204 deletions(-) diff --git a/src/App.zig b/src/App.zig index 8e3fe0c9..9e8741ba 100644 --- a/src/App.zig +++ b/src/App.zig @@ -69,7 +69,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App { app.telemetry = try Telemetry.init(app, config.mode); errdefer app.telemetry.deinit(allocator); - app.arena_pool = ArenaPool.init(allocator, 512, 1024 * 16); + app.arena_pool = ArenaPool.init(allocator, .{}); errdefer app.arena_pool.deinit(); return app; diff --git a/src/ArenaPool.zig b/src/ArenaPool.zig index 96fb694f..2b501438 100644 --- a/src/ArenaPool.zig +++ b/src/ArenaPool.zig @@ -27,33 +27,52 @@ const ArenaPool = @This(); const IS_DEBUG = builtin.mode == .Debug; -allocator: Allocator, -retain_bytes: usize, -free_list_len: u16 = 0, -free_list: ?*Entry = null, -free_list_max: u16, -entry_pool: std.heap.MemoryPool(Entry), -mutex: std.Thread.Mutex = .{}, -// Debug mode: track acquire/release counts per debug name to detect leaks and double-frees -_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {}, +pub const BucketSize = enum { tiny, small, medium, large }; + +const Bucket = struct { + free_list: ?*Entry = null, + free_list_len: u16 = 0, + free_list_max: u16, + retain_bytes: usize, +}; const Entry = struct { next: ?*Entry, arena: ArenaAllocator, + bucket: *Bucket, debug: if (IS_DEBUG) []const u8 else void = if (IS_DEBUG) "" else {}, }; -pub const DebugInfo = struct { - debug: []const u8 = "", +pub const Config = struct { + tiny: Config.Bucket = .{ .max = 512, .retain = 1024 }, + small: Config.Bucket = .{ .max = 128, .retain = 4 * 1024 }, + medium: Config.Bucket = .{ .max = 64, .retain = 16 * 1024 }, + large: Config.Bucket = .{ .max = 32, .retain = 128 * 1024 }, + + const Bucket = struct { + max: u16, + retain: usize, + }; }; -pub fn init(allocator: Allocator, free_list_max: u16, retain_bytes: usize) ArenaPool { +tiny: Bucket, +small: Bucket, +medium: Bucket, +large: Bucket, +allocator: Allocator, +mutex: std.Thread.Mutex = .{}, +entry_pool: std.heap.MemoryPool(Entry), + +_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {}, + +pub fn init(allocator: Allocator, config: Config) ArenaPool { return .{ .allocator = allocator, - .free_list_max = free_list_max, - .retain_bytes = retain_bytes, .entry_pool = .init(allocator), - ._leak_track = if (IS_DEBUG) .empty else {}, + .tiny = .{ .free_list_max = config.tiny.max, .retain_bytes = config.tiny.retain }, + .small = .{ .free_list_max = config.small.max, .retain_bytes = config.small.retain }, + .medium = .{ .free_list_max = config.medium.max, .retain_bytes = config.medium.retain }, + .large = .{ .free_list_max = config.large.max, .retain_bytes = config.large.retain }, }; } @@ -73,24 +92,49 @@ pub fn deinit(self: *ArenaPool) void { self._leak_track.deinit(self.allocator); } - var entry = self.free_list; - while (entry) |e| { - entry = e.next; - e.arena.deinit(); + // Free all arenas in all buckets + inline for (&[_]*Bucket{ &self.tiny, &self.small, &self.medium, &self.large }) |bucket| { + var entry = bucket.free_list; + while (entry) |e| { + entry = e.next; + e.arena.deinit(); + } } self.entry_pool.deinit(); } -pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { +// Acquire an arena from the pool. +// - Pass a BucketSize (.tiny, .small, .medium, .large) for explicit bucket selection +// - Pass a usize for automatic bucket selection based on expected size +pub fn acquire(self: *ArenaPool, size_or_bucket: anytype, debug: []const u8) !Allocator { + const bucket = blk: { + const T = @TypeOf(size_or_bucket); + if (T == BucketSize or T == @TypeOf(.enum_literal)) { + break :blk switch (@as(BucketSize, size_or_bucket)) { + .tiny => &self.tiny, + .small => &self.small, + .medium => &self.medium, + .large => &self.large, + }; + } + if (T == usize or T == comptime_int) { + if (size_or_bucket <= self.tiny.retain_bytes) break :blk &self.tiny; + if (size_or_bucket <= self.small.retain_bytes) break :blk &self.small; + if (size_or_bucket <= self.medium.retain_bytes) break :blk &self.medium; + break :blk &self.large; + } + @compileError("acquire expects BucketSize or usize, got " ++ @typeName(T)); + }; + self.mutex.lock(); defer self.mutex.unlock(); - if (self.free_list) |entry| { - self.free_list = entry.next; - self.free_list_len -= 1; + if (bucket.free_list) |entry| { + bucket.free_list = entry.next; + bucket.free_list_len -= 1; if (IS_DEBUG) { - entry.debug = dbg.debug; - const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug); + entry.debug = debug; + const gop = try self._leak_track.getOrPut(self.allocator, debug); if (!gop.found_existing) { gop.value_ptr.* = 0; } @@ -102,12 +146,13 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { const entry = try self.entry_pool.create(); entry.* = .{ .next = null, + .bucket = bucket, + .debug = if (IS_DEBUG) debug else {}, .arena = ArenaAllocator.init(self.allocator), - .debug = if (IS_DEBUG) dbg.debug else {}, }; if (IS_DEBUG) { - const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug); + const gop = try self._leak_track.getOrPut(self.allocator, debug); if (!gop.found_existing) { gop.value_ptr.* = 0; } @@ -116,12 +161,14 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { return entry.arena.allocator(); } +// Universal release - determines bucket from the Entry automatically pub fn release(self: *ArenaPool, allocator: Allocator) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); const entry: *Entry = @fieldParentPtr("arena", arena); + const bucket = entry.bucket; // Reset the arena before acquiring the lock to minimize lock hold time - _ = arena.reset(.{ .retain_with_limit = self.retain_bytes }); + _ = arena.reset(.{ .retain_with_limit = bucket.retain_bytes }); self.mutex.lock(); defer self.mutex.unlock(); @@ -139,105 +186,113 @@ pub fn release(self: *ArenaPool, allocator: Allocator) void { } } - const free_list_len = self.free_list_len; - if (free_list_len == self.free_list_max) { + if (bucket.free_list_len >= bucket.free_list_max) { arena.deinit(); self.entry_pool.destroy(entry); return; } - entry.next = self.free_list; - self.free_list_len = free_list_len + 1; - self.free_list = entry; + entry.next = bucket.free_list; + bucket.free_list = entry; + bucket.free_list_len += 1; } pub fn reset(_: *const ArenaPool, allocator: Allocator, retain: usize) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); _ = arena.reset(.{ .retain_with_limit = retain }); } pub fn resetRetain(_: *const ArenaPool, allocator: Allocator) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); _ = arena.reset(.retain_capacity); } const testing = std.testing; - -test "arena pool - basic acquire and use" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: basic acquire and release" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const alloc = try pool.acquire(.{ .debug = "test" }); - const buf = try alloc.alloc(u8, 64); - @memset(buf, 0xAB); - try testing.expectEqual(@as(u8, 0xAB), buf[0]); + const tiny = try pool.acquire(.tiny, "test-tiny"); + const medium = try pool.acquire(.medium, "test-medium"); + const large = try pool.acquire(.large, "test-large"); - pool.release(alloc); + // All three must be distinct arenas + try testing.expect(tiny.ptr != medium.ptr); + try testing.expect(medium.ptr != large.ptr); + + _ = try tiny.alloc(u8, 64); + _ = try medium.alloc(u8, 1024); + _ = try large.alloc(u8, 4096); + + // Universal release works for all buckets + pool.release(tiny); + pool.release(medium); + pool.release(large); + + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); + try testing.expectEqual(1, pool.large.free_list_len); } -test "arena pool - reuse entry after release" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: reuse from correct bucket" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const alloc1 = try pool.acquire(.{ .debug = "test" }); - try testing.expectEqual(@as(u16, 0), pool.free_list_len); + const tiny1 = try pool.acquire(.tiny, "test"); + pool.release(tiny1); + try testing.expectEqual(1, pool.tiny.free_list_len); - pool.release(alloc1); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); + // Next acquire with .tiny should reuse from tiny bucket + const tiny2 = try pool.acquire(.tiny, "test"); + try testing.expectEqual(0, pool.tiny.free_list_len); + try testing.expectEqual(tiny1.ptr, tiny2.ptr); - // The same entry should be returned from the free list. - const alloc2 = try pool.acquire(.{ .debug = "test" }); - try testing.expectEqual(@as(u16, 0), pool.free_list_len); - try testing.expectEqual(alloc1.ptr, alloc2.ptr); + // acquire with .medium should NOT get the tiny arena + const medium = try pool.acquire(.medium, "test-medium"); + try testing.expect(medium.ptr != tiny2.ptr); - pool.release(alloc2); + pool.release(tiny2); + pool.release(medium); } -test "arena pool - multiple concurrent arenas" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: respects per-bucket max limits" { + var pool = ArenaPool.init(testing.allocator, .{ + .tiny = .{ .max = 1, .retain = 1024 }, + .medium = .{ .max = 2, .retain = 1024 }, + .large = .{ .max = 1, .retain = 1024 }, + }); defer pool.deinit(); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); - const a3 = try pool.acquire(.{ .debug = "test3" }); + // Acquire 3 tiny arenas + const t1 = try pool.acquire(.tiny, "t1"); + const t2 = try pool.acquire(.tiny, "t2"); + const t3 = try pool.acquire(.tiny, "t3"); - // All three must be distinct arenas. - try testing.expect(a1.ptr != a2.ptr); - try testing.expect(a2.ptr != a3.ptr); - try testing.expect(a1.ptr != a3.ptr); + // Release all 3, but only 1 should be kept (tiny_max = 1) + pool.release(t1); + try testing.expectEqual(1, pool.tiny.free_list_len); + pool.release(t2); + try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t2 discarded + pool.release(t3); + try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t3 discarded - _ = try a1.alloc(u8, 16); - _ = try a2.alloc(u8, 32); - _ = try a3.alloc(u8, 48); + // Acquire 3 medium arenas + const m1 = try pool.acquire(.medium, "m1"); + const m2 = try pool.acquire(.medium, "m2"); + const m3 = try pool.acquire(.medium, "m3"); - pool.release(a1); - pool.release(a2); - pool.release(a3); - - try testing.expectEqual(@as(u16, 3), pool.free_list_len); + // Release all 3, but only 2 should be kept (medium_max = 2) + pool.release(m1); + pool.release(m2); + pool.release(m3); + try testing.expectEqual(2, pool.medium.free_list_len); } -test "arena pool - free list respects max limit" { - // Cap the free list at 1 so the second release discards its arena. - var pool = ArenaPool.init(testing.allocator, 1, 1024 * 16); +test "ArenaPool: reset clears memory without releasing" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); - - pool.release(a1); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); - - // The free list is full; a2's arena should be destroyed, not queued. - pool.release(a2); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); -} - -test "arena pool - reset clears memory without releasing" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); - defer pool.deinit(); - - const alloc = try pool.acquire(.{ .debug = "test" }); + const alloc = try pool.acquire(.medium, "test"); const buf = try alloc.alloc(u8, 128); @memset(buf, 0xFF); @@ -246,7 +301,7 @@ test "arena pool - reset clears memory without releasing" { pool.reset(alloc, 0); // The free list must stay empty; the allocator was not released. - try testing.expectEqual(@as(u16, 0), pool.free_list_len); + try testing.expectEqual(0, pool.medium.free_list_len); // Allocating again through the same arena must still work. const buf2 = try alloc.alloc(u8, 64); @@ -256,18 +311,60 @@ test "arena pool - reset clears memory without releasing" { pool.release(alloc); } -test "arena pool - deinit with entries in free list" { +test "ArenaPool: deinit with entries in free list" { // Verifies that deinit properly cleans up free-listed arenas (no leaks // detected by the test allocator). - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); + var pool = ArenaPool.init(testing.allocator, .{}); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); + const a1 = try pool.acquire(.tiny, "test1"); + const a2 = try pool.acquire(.medium, "test2"); _ = try a1.alloc(u8, 256); _ = try a2.alloc(u8, 512); pool.release(a1); pool.release(a2); - try testing.expectEqual(@as(u16, 2), pool.free_list_len); + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); pool.deinit(); } + +test "ArenaPool: small bucket" { + var pool = ArenaPool.init(testing.allocator, .{ + .small = .{ .max = 2, .retain = 4 * 1024 }, + }); + defer pool.deinit(); + + const s1 = try pool.acquire(.small, "s1"); + const s2 = try pool.acquire(.small, "s2"); + const s3 = try pool.acquire(.small, "s3"); + + pool.release(s1); + pool.release(s2); + pool.release(s3); + + try testing.expectEqual(2, pool.small.free_list_len); +} + +test "ArenaPool: size-based acquire" { + var pool = ArenaPool.init(testing.allocator, .{}); + defer pool.deinit(); + + // <= 1KB -> tiny + const a = try pool.acquire(500, "fits-tiny"); + // <= 4KB -> small + const b = try pool.acquire(2000, "fits-small"); + // <= 16KB -> medium + const c = try pool.acquire(8000, "fits-medium"); + // > 16KB -> large + const d = try pool.acquire(20000, "fits-large"); + + pool.release(a); + pool.release(b); + pool.release(c); + pool.release(d); + + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.small.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); + try testing.expectEqual(1, pool.large.free_list_len); +} diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b90029ac..a2da34d5 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -374,7 +374,8 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { fn processRequest(self: *Client, req: Request) !void { if (self.network.cache) |*cache| { if (req.method == .GET) { - const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" }); + // cache is only used to read the meta data + const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); defer self.network.app.arena_pool.release(arena); var iter = req.headers.iterator(); diff --git a/src/browser/Page.zig b/src/browser/Page.zig index f12b606b..8ddd29ee 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -248,7 +248,7 @@ pub fn init(self: *Page, frame_id: u32, session: *Session, parent: ?*Page) !void log.debug(.page, "page.init", .{}); } - const call_arena = try session.getArena(.{ .debug = "call_arena" }); + const call_arena = try session.getArena(.medium, "call_arena"); errdefer session.releaseArena(call_arena); const factory = &session.factory; @@ -429,8 +429,8 @@ pub fn headersForRequest(self: *Page, headers: *HttpClient.Headers) !void { } } -pub fn getArena(self: *Page, comptime opts: Session.GetArenaOpts) !Allocator { - return self._session.getArena(opts); +pub fn getArena(self: *Page, size_or_bucket: anytype, debug: []const u8) !Allocator { + return self._session.getArena(size_or_bucket, debug); } pub fn releaseArena(self: *Page, allocator: Allocator) void { @@ -510,7 +510,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi log.warn(.js, "invalid blob", .{ .url = request_url }); return error.BlobNotFound; }; - const parse_arena = try self.getArena(.{ .debug = "Page.parseBlob" }); + const parse_arena = try self.getArena(.medium, "Page.parseBlob"); defer self.releaseArena(parse_arena); var parser = Parser.init(parse_arena, self.document.asNode(), self); parser.parse(blob._slice); @@ -619,7 +619,7 @@ pub fn scheduleNavigation(self: *Page, request_url: []const u8, opts: NavigateOp if (self.canScheduleNavigation(std.meta.activeTag(nt)) == false) { return; } - const arena = try self._session.getArena(.{ .debug = "scheduleNavigation" }); + const arena = try self._session.getArena(.small, "scheduleNavigation"); errdefer self._session.releaseArena(arena); return self.scheduleNavigationWithArena(arena, request_url, opts, nt); } @@ -1022,7 +1022,7 @@ fn pageDoneCallback(ctx: *anyopaque) !void { }); }; - const parse_arena = try self.getArena(.{ .debug = "Page.parse" }); + const parse_arena = try self.getArena(.medium, "Page.parse"); defer self.releaseArena(parse_arena); var parser = Parser.init(parse_arena, self.document.asNode(), self); @@ -3568,7 +3568,7 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form // I don't think this is technically correct, but FormData handles it ok const form_data = try FormData.init(form, submitter_, self); - const arena = try self._session.getArena(.{ .debug = "submitForm" }); + const arena = try self._session.getArena(.medium, "submitForm"); errdefer self._session.releaseArena(arena); const encoding = form_element.getAttributeSafe(comptime .wrap("enctype")); diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 4ee753ea..fd3889e6 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -249,7 +249,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { } pub fn waitForSelector(self: *Runner, selector: [:0]const u8, timeout_ms: u32) !*Node.Element { - const arena = try self.session.getArena(.{ .debug = "Runner.waitForSelector" }); + const arena = try self.session.getArena(.small, "Runner.waitForSelector"); defer self.session.releaseArena(arena); var timer = try std.time.Timer.start(); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 984ecccc..95b7f839 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -188,7 +188,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e var handover = false; const page = self.page; - const arena = try page.getArena(.{ .debug = "addFromElement" }); + const arena = try page.getArena(.large, "SM.addFromElement"); errdefer if (!handover) { page.releaseArena(arena); }; @@ -369,7 +369,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const errdefer _ = self.imported_modules.remove(url); const page = self.page; - const arena = try page.getArena(.{ .debug = "preloadImport" }); + const arena = try page.getArena(.large, "SM.preloadImport"); errdefer page.releaseArena(arena); const script = try arena.create(Script); @@ -469,7 +469,7 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource { pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { const page = self.page; - const arena = try page.getArena(.{ .debug = "getAsyncImport" }); + const arena = try page.getArena(.large, "SM.getAsyncImport"); errdefer page.releaseArena(arena); const script = try arena.create(Script); diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 8ec3e217..baea1590 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -110,10 +110,10 @@ pub fn init(self: *Session, browser: *Browser, notification: *Notification) !voi const allocator = browser.app.allocator; const arena_pool = browser.arena_pool; - const arena = try arena_pool.acquire(.{ .debug = "Session" }); + const arena = try arena_pool.acquire(.small, "Session"); errdefer arena_pool.release(arena); - const page_arena = try arena_pool.acquire(.{ .debug = "Session.page_arena" }); + const page_arena = try arena_pool.acquire(.large, "Session.page_arena"); errdefer arena_pool.release(page_arena); self.* = .{ @@ -186,12 +186,8 @@ pub fn removePage(self: *Session) void { } } -pub const GetArenaOpts = struct { - debug: []const u8, -}; - -pub fn getArena(self: *Session, opts: GetArenaOpts) !Allocator { - return self.arena_pool.acquire(.{ .debug = opts.debug }); +pub fn getArena(self: *Session, size_or_bucket: anytype, debug: []const u8) !Allocator { + return self.arena_pool.acquire(size_or_bucket, debug); } pub fn releaseArena(self: *Session, allocator: Allocator) void { diff --git a/src/browser/StyleManager.zig b/src/browser/StyleManager.zig index 161ebca0..404a11ed 100644 --- a/src/browser/StyleManager.zig +++ b/src/browser/StyleManager.zig @@ -66,7 +66,7 @@ dirty: bool = false, pub fn init(page: *Page) !StyleManager { return .{ .page = page, - .arena = try page.getArena(.{ .debug = "StyleManager" }), + .arena = try page.getArena(.medium, "StyleManager"), }; } diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 2c1ebf38..03eadac3 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -261,7 +261,7 @@ pub const ContextParams = struct { }; pub fn createContext(self: *Env, page: *Page, params: ContextParams) !*Context { - const context_arena = try self.app.arena_pool.acquire(.{ .debug = params.debug_name }); + const context_arena = try self.app.arena_pool.acquire(.large, params.debug_name); errdefer self.app.arena_pool.release(context_arena); const isolate = self.isolate; diff --git a/src/browser/js/Local.zig b/src/browser/js/Local.zig index 4d91ed2e..170e5c0c 100644 --- a/src/browser/js/Local.zig +++ b/src/browser/js/Local.zig @@ -1479,7 +1479,7 @@ fn createFinalizerCallback( ) !*Session.FinalizerCallback { const session = self.ctx.session; - const arena = try session.getArena(.{ .debug = "FinalizerCallback" }); + const arena = try session.getArena(.tiny, "FinalizerCallback"); errdefer session.releaseArena(arena); const fc = try arena.create(Session.FinalizerCallback); diff --git a/src/browser/js/Origin.zig b/src/browser/js/Origin.zig index c6c6bf81..fce37ce0 100644 --- a/src/browser/js/Origin.zig +++ b/src/browser/js/Origin.zig @@ -45,7 +45,7 @@ key: []const u8, security_token: v8.Global, pub fn init(app: *App, isolate: js.Isolate, key: []const u8) !*Origin { - const arena = try app.arena_pool.acquire(.{ .debug = "Origin" }); + const arena = try app.arena_pool.acquire(.tiny, "Origin"); errdefer app.arena_pool.release(arena); var hs: js.HandleScope = undefined; diff --git a/src/browser/js/String.zig b/src/browser/js/String.zig index 2cbe6a17..8d29d838 100644 --- a/src/browser/js/String.zig +++ b/src/browser/js/String.zig @@ -44,11 +44,11 @@ fn _toSlice(self: String, comptime null_terminate: bool, allocator: Allocator) ! const handle = self.handle; const isolate = local.isolate.handle; - const len = v8.v8__String__Utf8Length(handle, isolate); - const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(len), 0) else allocator.alloc(u8, @intCast(len))); + const l = v8.v8__String__Utf8Length(handle, isolate); + const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(l), 0) else allocator.alloc(u8, @intCast(l))); const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } return buf; @@ -64,32 +64,32 @@ pub fn toSSOWithAlloc(self: String, allocator: Allocator) !SSO { const handle = self.handle; const isolate = self.local.isolate.handle; - const len: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate)); + const l: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate)); - if (len <= 12) { + if (l <= 12) { var content: [12]u8 = undefined; const n = v8.v8__String__WriteUtf8(handle, isolate, &content[0], content.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } // Weird that we do this _after_, but we have to..I've seen weird issues // in ReleaseMode where v8 won't write to content if it starts off zero // initiated - @memset(content[len..], 0); - return .{ .len = @intCast(len), .payload = .{ .content = content } }; + @memset(content[l..], 0); + return .{ .len = @intCast(l), .payload = .{ .content = content } }; } - const buf = try allocator.alloc(u8, len); + const buf = try allocator.alloc(u8, l); const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } var prefix: [4]u8 = @splat(0); @memcpy(&prefix, buf[0..4]); return .{ - .len = @intCast(len), + .len = @intCast(l), .payload = .{ .heap = .{ .prefix = prefix, .ptr = buf.ptr, @@ -103,9 +103,13 @@ pub fn format(self: String, writer: *std.Io.Writer) !void { const isolate = local.isolate.handle; var small: [1024]u8 = undefined; - const len = v8.v8__String__Utf8Length(handle, isolate); - var buf = if (len < 1024) &small else local.call_arena.alloc(u8, @intCast(len)) catch return error.WriteFailed; + const l = v8.v8__String__Utf8Length(handle, isolate); + var buf = if (l < 1024) &small else local.call_arena.alloc(u8, @intCast(l)) catch return error.WriteFailed; const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); return writer.writeAll(buf[0..n]); } + +pub fn len(self: String) usize { + return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle)); +} diff --git a/src/browser/webapi/Blob.zig b/src/browser/webapi/Blob.zig index bf0c1118..0598d7fc 100644 --- a/src/browser/webapi/Blob.zig +++ b/src/browser/webapi/Blob.zig @@ -77,7 +77,15 @@ pub fn initWithMimeValidation( validate_mime: bool, page: *Page, ) !*Blob { - const arena = try page.getArena(.{ .debug = "Blob" }); + const data_len = blk: { + const parts = maybe_blob_parts orelse break :blk 0; + var size: usize = 0; + for (parts) |p| { + size += p.len; + } + break :blk size; + }; + const arena = try page.getArena(256 + data_len, "Blob"); errdefer page.releaseArena(arena); const options: InitOptions = maybe_options orelse .{}; diff --git a/src/browser/webapi/DOMParser.zig b/src/browser/webapi/DOMParser.zig index 10a94bca..7bd5b600 100644 --- a/src/browser/webapi/DOMParser.zig +++ b/src/browser/webapi/DOMParser.zig @@ -50,7 +50,7 @@ pub fn parseFromString( @"image/svg+xml", }, mime_type) orelse return error.NotSupported; - const arena = try page.getArena(.{ .debug = "DOMParser.parseFromString" }); + const arena = try page.getArena(.medium, "DOMParser.parseFromString"); defer page.releaseArena(arena); return switch (target_mime) { diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index cf15c49b..5b6c0a4a 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -666,7 +666,7 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void { page._parse_mode = .document_write; defer page._parse_mode = previous_parse_mode; - const arena = try page.getArena(.{ .debug = "Document.write" }); + const arena = try page.getArena(.medium, "Document.write"); defer page.releaseArena(arena); var parser = Parser.init(arena, fragment_node, page); diff --git a/src/browser/webapi/Event.zig b/src/browser/webapi/Event.zig index b573bfc7..50895866 100644 --- a/src/browser/webapi/Event.zig +++ b/src/browser/webapi/Event.zig @@ -90,14 +90,14 @@ pub const Options = struct { }; pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*Event { - const arena = try page.getArena(.{ .debug = "Event" }); + const arena = try page.getArena(.tiny, "Event"); errdefer page.releaseArena(arena); const str = try String.init(arena, typ, .{}); return initWithTrusted(arena, str, opts_, false); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*Event { - const arena = try page.getArena(.{ .debug = "Event.trusted" }); + const arena = try page.getArena(.tiny, "Event.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true); } diff --git a/src/browser/webapi/File.zig b/src/browser/webapi/File.zig index fb27359a..e4c70662 100644 --- a/src/browser/webapi/File.zig +++ b/src/browser/webapi/File.zig @@ -31,7 +31,7 @@ _proto: *Blob, // TODO: Implement File API. pub fn init(page: *Page) !*File { - const arena = try page.getArena(.{ .debug = "File" }); + const arena = try page.getArena(.tiny, "File"); errdefer page.releaseArena(arena); return page._factory.blob(arena, File{ ._proto = undefined }); } diff --git a/src/browser/webapi/FileReader.zig b/src/browser/webapi/FileReader.zig index 109fdc7b..33f0e209 100644 --- a/src/browser/webapi/FileReader.zig +++ b/src/browser/webapi/FileReader.zig @@ -63,7 +63,7 @@ const Result = union(enum) { }; pub fn init(page: *Page) !*FileReader { - const arena = try page.getArena(.{ .debug = "FileReader" }); + const arena = try page.getArena(.tiny, "FileReader"); errdefer page.releaseArena(arena); return page._factory.eventTargetWithAllocator(arena, FileReader{ diff --git a/src/browser/webapi/IntersectionObserver.zig b/src/browser/webapi/IntersectionObserver.zig index cbc9278f..990c45ee 100644 --- a/src/browser/webapi/IntersectionObserver.zig +++ b/src/browser/webapi/IntersectionObserver.zig @@ -71,7 +71,7 @@ pub const ObserverInit = struct { }; pub fn init(callback: js.Function.Temp, options: ?ObserverInit, page: *Page) !*IntersectionObserver { - const arena = try page.getArena(.{ .debug = "IntersectionObserver" }); + const arena = try page.getArena(.medium, "IntersectionObserver"); errdefer page.releaseArena(arena); const opts = options orelse ObserverInit{}; @@ -266,7 +266,7 @@ fn checkIntersection(self: *IntersectionObserver, target: *Element, page: *Page) (was_intersecting_opt != null and was_intersecting_opt.? != is_now_intersecting); if (should_report) { - const arena = try page.getArena(.{ .debug = "IntersectionObserverEntry" }); + const arena = try page.getArena(.tiny, "IntersectionObserverEntry"); errdefer page.releaseArena(arena); const entry = try arena.create(IntersectionObserverEntry); diff --git a/src/browser/webapi/MutationObserver.zig b/src/browser/webapi/MutationObserver.zig index 5453e797..6a99fcb5 100644 --- a/src/browser/webapi/MutationObserver.zig +++ b/src/browser/webapi/MutationObserver.zig @@ -76,7 +76,7 @@ pub const ObserveOptions = struct { }; pub fn init(callback: js.Function.Temp, page: *Page) !*MutationObserver { - const arena = try page.getArena(.{ .debug = "MutationObserver" }); + const arena = try page.getArena(.medium, "MutationObserver"); errdefer page.releaseArena(arena); const self = try arena.create(MutationObserver); @@ -227,7 +227,7 @@ pub fn notifyAttributeChange( } } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, @@ -271,7 +271,7 @@ pub fn notifyCharacterDataChange( continue; } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, @@ -318,7 +318,7 @@ pub fn notifyChildListChange( continue; } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, diff --git a/src/browser/webapi/Permissions.zig b/src/browser/webapi/Permissions.zig index 8a06b4f4..84ff810f 100644 --- a/src/browser/webapi/Permissions.zig +++ b/src/browser/webapi/Permissions.zig @@ -38,7 +38,7 @@ const QueryDescriptor = struct { }; // We always report 'prompt' (the default safe value — neither granted nor denied). pub fn query(_: *const Permissions, qd: QueryDescriptor, page: *Page) !js.Promise { - const arena = try page.getArena(.{ .debug = "PermissionStatus" }); + const arena = try page.getArena(.tiny, "PermissionStatus"); errdefer page.releaseArena(arena); const status = try arena.create(PermissionStatus); diff --git a/src/browser/webapi/Range.zig b/src/browser/webapi/Range.zig index 720fc5ff..dab3db89 100644 --- a/src/browser/webapi/Range.zig +++ b/src/browser/webapi/Range.zig @@ -33,7 +33,7 @@ const Range = @This(); _proto: *AbstractRange, pub fn init(page: *Page) !*Range { - const arena = try page.getArena(.{ .debug = "Range" }); + const arena = try page.getArena(.medium, "Range"); errdefer page.releaseArena(arena); return page._factory.abstractRange(arena, Range{ ._proto = undefined }, page); } @@ -312,7 +312,7 @@ pub fn intersectsNode(self: *const Range, node: *Node) bool { } pub fn cloneRange(self: *const Range, page: *Page) !*Range { - const arena = try page.getArena(.{ .debug = "Range.clone" }); + const arena = try page.getArena(.medium, "Range.clone"); errdefer page.releaseArena(arena); const clone = try page._factory.abstractRange(arena, Range{ ._proto = undefined }, page); diff --git a/src/browser/webapi/Window.zig b/src/browser/webapi/Window.zig index 418037fd..ef076663 100644 --- a/src/browser/webapi/Window.zig +++ b/src/browser/webapi/Window.zig @@ -407,7 +407,7 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons const target_page = self._page; const source_window = target_page.js.getIncumbent().window; - const arena = try target_page.getArena(.{ .debug = "Window.postMessage" }); + const arena = try target_page.getArena(.medium, "Window.postMessage"); errdefer target_page.releaseArena(arena); // Origin should be the source window's origin (where the message came from) @@ -645,7 +645,7 @@ fn scheduleCallback(self: *Window, cb: js.Function.Temp, delay_ms: u32, opts: Sc return error.TooManyTimeout; } - const arena = try page.getArena(.{ .debug = "Window.schedule" }); + const arena = try page.getArena(.tiny, "Window.schedule"); errdefer page.releaseArena(arena); const timer_id = self._timer_id +% 1; diff --git a/src/browser/webapi/animation/Animation.zig b/src/browser/webapi/animation/Animation.zig index 08eb21c2..4bddfd1d 100644 --- a/src/browser/webapi/animation/Animation.zig +++ b/src/browser/webapi/animation/Animation.zig @@ -52,7 +52,7 @@ _playState: PlayState = .idle, // // TODO add support for effect and timeline pub fn init(page: *Page) !*Animation { - const arena = try page.getArena(.{ .debug = "Animation" }); + const arena = try page.getArena(.tiny, "Animation"); errdefer page.releaseArena(arena); const self = try arena.create(Animation); diff --git a/src/browser/webapi/collections/ChildNodes.zig b/src/browser/webapi/collections/ChildNodes.zig index df3e7ee1..410c12b7 100644 --- a/src/browser/webapi/collections/ChildNodes.zig +++ b/src/browser/webapi/collections/ChildNodes.zig @@ -39,7 +39,7 @@ pub const ValueIterator = GenericIterator(Iterator, "1"); pub const EntryIterator = GenericIterator(Iterator, null); pub fn init(node: *Node, page: *Page) !*ChildNodes { - const arena = try page.getArena(.{ .debug = "ChildNodes" }); + const arena = try page.getArena(.small, "ChildNodes"); errdefer page.releaseArena(arena); const self = try arena.create(ChildNodes); diff --git a/src/browser/webapi/css/FontFace.zig b/src/browser/webapi/css/FontFace.zig index 075d9135..9ccb0c4c 100644 --- a/src/browser/webapi/css/FontFace.zig +++ b/src/browser/webapi/css/FontFace.zig @@ -33,7 +33,7 @@ _family: []const u8, pub fn init(family: []const u8, source: []const u8, page: *Page) !*FontFace { _ = source; - const arena = try page.getArena(.{ .debug = "FontFace" }); + const arena = try page.getArena(.tiny, "FontFace"); errdefer page.releaseArena(arena); const self = try arena.create(FontFace); diff --git a/src/browser/webapi/css/FontFaceSet.zig b/src/browser/webapi/css/FontFaceSet.zig index b20017ca..f43dc405 100644 --- a/src/browser/webapi/css/FontFaceSet.zig +++ b/src/browser/webapi/css/FontFaceSet.zig @@ -34,7 +34,7 @@ _proto: *EventTarget, _arena: Allocator, pub fn init(page: *Page) !*FontFaceSet { - const arena = try page.getArena(.{ .debug = "FontFaceSet" }); + const arena = try page.getArena(.tiny, "FontFaceSet"); errdefer page.releaseArena(arena); return page._factory.eventTargetWithAllocator(arena, FontFaceSet{ diff --git a/src/browser/webapi/element/Html.zig b/src/browser/webapi/element/Html.zig index d72b4fa8..7a33d25a 100644 --- a/src/browser/webapi/element/Html.zig +++ b/src/browser/webapi/element/Html.zig @@ -292,7 +292,7 @@ pub fn insertAdjacentHTML( }); const doc_node = doc.asNode(); - const arena = try page.getArena(.{ .debug = "HTML.insertAdjacentHTML" }); + const arena = try page.getArena(.medium, "HTML.insertAdjacentHTML"); defer page.releaseArena(arena); const Parser = @import("../../parser/Parser.zig"); diff --git a/src/browser/webapi/encoding/TextDecoder.zig b/src/browser/webapi/encoding/TextDecoder.zig index c117df09..7da889e4 100644 --- a/src/browser/webapi/encoding/TextDecoder.zig +++ b/src/browser/webapi/encoding/TextDecoder.zig @@ -48,7 +48,7 @@ pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*TextDecoder { _ = std.meta.stringToEnum(Label, label) orelse return error.RangeError; } - const arena = try page.getArena(.{ .debug = "TextDecoder" }); + const arena = try page.getArena(.large, "TextDecoder"); errdefer page.releaseArena(arena); const opts = opts_ orelse InitOpts{}; diff --git a/src/browser/webapi/event/CloseEvent.zig b/src/browser/webapi/event/CloseEvent.zig index aa9f1d2b..dbe5f21a 100644 --- a/src/browser/webapi/event/CloseEvent.zig +++ b/src/browser/webapi/event/CloseEvent.zig @@ -39,14 +39,14 @@ const CloseEventOptions = struct { const Options = Event.inheritOptions(CloseEvent, CloseEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*CloseEvent { - const arena = try page.getArena(.{ .debug = "CloseEvent" }); + const arena = try page.getArena(.tiny, "CloseEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*CloseEvent { - const arena = try page.getArena(.{ .debug = "CloseEvent.trusted" }); + const arena = try page.getArena(.tiny, "CloseEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/CompositionEvent.zig b/src/browser/webapi/event/CompositionEvent.zig index 7f3fd1d2..04077994 100644 --- a/src/browser/webapi/event/CompositionEvent.zig +++ b/src/browser/webapi/event/CompositionEvent.zig @@ -35,7 +35,7 @@ const CompositionEventOptions = struct { const Options = Event.inheritOptions(CompositionEvent, CompositionEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CompositionEvent { - const arena = try page.getArena(.{ .debug = "CompositionEvent" }); + const arena = try page.getArena(.tiny, "CompositionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/CustomEvent.zig b/src/browser/webapi/event/CustomEvent.zig index 9013bb4a..51efa36c 100644 --- a/src/browser/webapi/event/CustomEvent.zig +++ b/src/browser/webapi/event/CustomEvent.zig @@ -38,7 +38,7 @@ const CustomEventOptions = struct { const Options = Event.inheritOptions(CustomEvent, CustomEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CustomEvent { - const arena = try page.getArena(.{ .debug = "CustomEvent" }); + const arena = try page.getArena(.tiny, "CustomEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/ErrorEvent.zig b/src/browser/webapi/event/ErrorEvent.zig index aef63a0e..4bb68573 100644 --- a/src/browser/webapi/event/ErrorEvent.zig +++ b/src/browser/webapi/event/ErrorEvent.zig @@ -47,14 +47,14 @@ pub const ErrorEventOptions = struct { const Options = Event.inheritOptions(ErrorEvent, ErrorEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*ErrorEvent { - const arena = try page.getArena(.{ .debug = "ErrorEvent" }); + const arena = try page.getArena(.small, "ErrorEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*ErrorEvent { - const arena = try page.getArena(.{ .debug = "ErrorEvent.trusted" }); + const arena = try page.getArena(.small, "ErrorEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true, page); } diff --git a/src/browser/webapi/event/FocusEvent.zig b/src/browser/webapi/event/FocusEvent.zig index 776605db..59e88e36 100644 --- a/src/browser/webapi/event/FocusEvent.zig +++ b/src/browser/webapi/event/FocusEvent.zig @@ -42,13 +42,13 @@ pub const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FocusEvent { - const arena = try page.getArena(.{ .debug = "FocusEvent.trusted" }); + const arena = try page.getArena(.tiny, "FocusEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*FocusEvent { - const arena = try page.getArena(.{ .debug = "FocusEvent" }); + const arena = try page.getArena(.tiny, "FocusEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/FormDataEvent.zig b/src/browser/webapi/event/FormDataEvent.zig index 93eadfa3..ce45a9d0 100644 --- a/src/browser/webapi/event/FormDataEvent.zig +++ b/src/browser/webapi/event/FormDataEvent.zig @@ -38,14 +38,14 @@ const Options = Event.inheritOptions(FormDataEvent, struct { }); pub fn init(typ: []const u8, maybe_options: Options, page: *Page) !*FormDataEvent { - const arena = try page.getArena(.{ .debug = "FormDataEvent" }); + const arena = try page.getArena(.tiny, "FormDataEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, maybe_options, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FormDataEvent { - const arena = try page.getArena(.{ .debug = "FormDataEvent.trusted" }); + const arena = try page.getArena(.tiny, "FormDataEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/InputEvent.zig b/src/browser/webapi/event/InputEvent.zig index 3b01b900..3c00debd 100644 --- a/src/browser/webapi/event/InputEvent.zig +++ b/src/browser/webapi/event/InputEvent.zig @@ -46,13 +46,13 @@ const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*InputEvent { - const arena = try page.getArena(.{ .debug = "InputEvent.trusted" }); + const arena = try page.getArena(.tiny, "InputEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*InputEvent { - const arena = try page.getArena(.{ .debug = "InputEvent" }); + const arena = try page.getArena(.tiny, "InputEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/KeyboardEvent.zig b/src/browser/webapi/event/KeyboardEvent.zig index ddc7548d..f8056cc3 100644 --- a/src/browser/webapi/event/KeyboardEvent.zig +++ b/src/browser/webapi/event/KeyboardEvent.zig @@ -186,13 +186,13 @@ const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*KeyboardEvent { - const arena = try page.getArena(.{ .debug = "KeyboardEvent.trusted" }); + const arena = try page.getArena(.tiny, "KeyboardEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*KeyboardEvent { - const arena = try page.getArena(.{ .debug = "KeyboardEvent" }); + const arena = try page.getArena(.tiny, "KeyboardEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/MessageEvent.zig b/src/browser/webapi/event/MessageEvent.zig index dfd813d5..27fdfb23 100644 --- a/src/browser/webapi/event/MessageEvent.zig +++ b/src/browser/webapi/event/MessageEvent.zig @@ -50,14 +50,14 @@ pub const Data = union(enum) { const Options = Event.inheritOptions(MessageEvent, MessageEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*MessageEvent { - const arena = try page.getArena(.{ .debug = "MessageEvent" }); + const arena = try page.getArena(.small, "MessageEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*MessageEvent { - const arena = try page.getArena(.{ .debug = "MessageEvent.trusted" }); + const arena = try page.getArena(.small, "MessageEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true, page); } diff --git a/src/browser/webapi/event/MouseEvent.zig b/src/browser/webapi/event/MouseEvent.zig index 999bd010..ff2b1118 100644 --- a/src/browser/webapi/event/MouseEvent.zig +++ b/src/browser/webapi/event/MouseEvent.zig @@ -82,14 +82,14 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*MouseEvent { - const arena = try page.getArena(.{ .debug = "MouseEvent" }); + const arena = try page.getArena(.tiny, "MouseEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*MouseEvent { - const arena = try page.getArena(.{ .debug = "MouseEvent.trusted" }); + const arena = try page.getArena(.tiny, "MouseEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig b/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig index 816fa1c8..d791cb39 100644 --- a/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig +++ b/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig @@ -45,14 +45,14 @@ const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent { - const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent" }); + const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts, false, page); } pub fn initTrusted(typ: String, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent { - const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent.trusted" }); + const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts, true, page); } diff --git a/src/browser/webapi/event/PageTransitionEvent.zig b/src/browser/webapi/event/PageTransitionEvent.zig index e11be386..335a665a 100644 --- a/src/browser/webapi/event/PageTransitionEvent.zig +++ b/src/browser/webapi/event/PageTransitionEvent.zig @@ -38,14 +38,14 @@ const PageTransitionEventOptions = struct { const Options = Event.inheritOptions(PageTransitionEvent, PageTransitionEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PageTransitionEvent { - const arena = try page.getArena(.{ .debug = "PageTransitionEvent" }); + const arena = try page.getArena(.tiny, "PageTransitionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PageTransitionEvent { - const arena = try page.getArena(.{ .debug = "PageTransitionEvent.trusted" }); + const arena = try page.getArena(.tiny, "PageTransitionEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/PointerEvent.zig b/src/browser/webapi/event/PointerEvent.zig index c5440d45..4eb1e250 100644 --- a/src/browser/webapi/event/PointerEvent.zig +++ b/src/browser/webapi/event/PointerEvent.zig @@ -84,7 +84,7 @@ const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PointerEvent { - const arena = try page.getArena(.{ .debug = "UIEvent" }); + const arena = try page.getArena(.tiny, "PointerEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/PopStateEvent.zig b/src/browser/webapi/event/PopStateEvent.zig index cd430cf8..3b0fe4e4 100644 --- a/src/browser/webapi/event/PopStateEvent.zig +++ b/src/browser/webapi/event/PopStateEvent.zig @@ -39,14 +39,14 @@ const PopStateEventOptions = struct { const Options = Event.inheritOptions(PopStateEvent, PopStateEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PopStateEvent { - const arena = try page.getArena(.{ .debug = "PopStateEvent" }); + const arena = try page.getArena(.tiny, "PopStateEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PopStateEvent { - const arena = try page.getArena(.{ .debug = "PopStateEvent.trusted" }); + const arena = try page.getArena(.tiny, "PopStateEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/ProgressEvent.zig b/src/browser/webapi/event/ProgressEvent.zig index 6498da48..895bff09 100644 --- a/src/browser/webapi/event/ProgressEvent.zig +++ b/src/browser/webapi/event/ProgressEvent.zig @@ -39,14 +39,14 @@ const ProgressEventOptions = struct { const Options = Event.inheritOptions(ProgressEvent, ProgressEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*ProgressEvent { - const arena = try page.getArena(.{ .debug = "ProgressEvent" }); + const arena = try page.getArena(.tiny, "ProgressEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*ProgressEvent { - const arena = try page.getArena(.{ .debug = "ProgressEvent.trusted" }); + const arena = try page.getArena(.tiny, "ProgressEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/PromiseRejectionEvent.zig b/src/browser/webapi/event/PromiseRejectionEvent.zig index cc014b39..44af3904 100644 --- a/src/browser/webapi/event/PromiseRejectionEvent.zig +++ b/src/browser/webapi/event/PromiseRejectionEvent.zig @@ -37,7 +37,7 @@ const PromiseRejectionEventOptions = struct { const Options = Event.inheritOptions(PromiseRejectionEvent, PromiseRejectionEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*PromiseRejectionEvent { - const arena = try page.getArena(.{ .debug = "PromiseRejectionEvent" }); + const arena = try page.getArena(.tiny, "PromiseRejectionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/SubmitEvent.zig b/src/browser/webapi/event/SubmitEvent.zig index f48365dc..3400cbcd 100644 --- a/src/browser/webapi/event/SubmitEvent.zig +++ b/src/browser/webapi/event/SubmitEvent.zig @@ -39,14 +39,14 @@ const SubmitEventOptions = struct { const Options = Event.inheritOptions(SubmitEvent, SubmitEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*SubmitEvent { - const arena = try page.getArena(.{ .debug = "SubmitEvent" }); + const arena = try page.getArena(.tiny, "SubmitEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*SubmitEvent { - const arena = try page.getArena(.{ .debug = "SubmitEvent.trusted" }); + const arena = try page.getArena(.tiny, "SubmitEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/TextEvent.zig b/src/browser/webapi/event/TextEvent.zig index 3ddb2636..dcc5e478 100644 --- a/src/browser/webapi/event/TextEvent.zig +++ b/src/browser/webapi/event/TextEvent.zig @@ -40,7 +40,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*TextEvent { - const arena = try page.getArena(.{ .debug = "TextEvent" }); + const arena = try page.getArena(.tiny, "TextEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/UIEvent.zig b/src/browser/webapi/event/UIEvent.zig index 6874d6d5..2b456738 100644 --- a/src/browser/webapi/event/UIEvent.zig +++ b/src/browser/webapi/event/UIEvent.zig @@ -51,7 +51,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*UIEvent { - const arena = try page.getArena(.{ .debug = "UIEvent" }); + const arena = try page.getArena(.tiny, "UIEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/WheelEvent.zig b/src/browser/webapi/event/WheelEvent.zig index 4711ac25..8f79ab8a 100644 --- a/src/browser/webapi/event/WheelEvent.zig +++ b/src/browser/webapi/event/WheelEvent.zig @@ -50,7 +50,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*WheelEvent { - const arena = try page.getArena(.{ .debug = "WheelEvent" }); + const arena = try page.getArena(.medium, "WheelEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/net/Response.zig b/src/browser/webapi/net/Response.zig index e4fbd46d..7ed7ba4d 100644 --- a/src/browser/webapi/net/Response.zig +++ b/src/browser/webapi/net/Response.zig @@ -57,7 +57,7 @@ const InitOpts = struct { }; pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response { - const arena = try page.getArena(.{ .debug = "Response" }); + const arena = try page.getArena(.large, "Response"); errdefer page.releaseArena(arena); const opts = opts_ orelse InitOpts{}; @@ -174,7 +174,7 @@ pub fn bytes(self: *const Response, page: *Page) !js.Promise { } pub fn clone(self: *const Response, page: *Page) !*Response { - const arena = try page.getArena(.{ .debug = "Response.clone" }); + const arena = try page.getArena((self._body orelse "").len + self._url.len + 256, "Response.clone"); errdefer page.releaseArena(arena); const body = if (self._body) |b| try arena.dupe(u8, b) else null; diff --git a/src/browser/webapi/net/WebSocket.zig b/src/browser/webapi/net/WebSocket.zig index 1244a61e..20a622c6 100644 --- a/src/browser/webapi/net/WebSocket.zig +++ b/src/browser/webapi/net/WebSocket.zig @@ -105,7 +105,7 @@ pub fn init(url: []const u8, protocols_: ?[]const u8, page: *Page) !*WebSocket { } } - const arena = try page.getArena(.{ .debug = "WebSocket" }); + const arena = try page.getArena(.medium, "WebSocket"); errdefer page.releaseArena(arena); const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encode = true }); @@ -272,12 +272,10 @@ pub fn send(self: *WebSocket, data: SendData) !void { return error.InvalidStateError; } - // Get a dedicated arena for this message - const arena = try self._page._session.getArena(.{ .debug = "WebSocket message" }); - errdefer self._page._session.releaseArena(arena); - switch (data) { .blob => |blob| { + const arena = try self._page._session.getArena(blob._slice.len, "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .binary = .{ .arena = arena, .data = try arena.dupe(u8, blob._slice), @@ -285,15 +283,21 @@ pub fn send(self: *WebSocket, data: SendData) !void { }, .js_val => |js_val| { if (js_val.isString()) |str| { + const arena = try self._page._session.getArena(str.len(), "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .text = .{ .arena = arena, .data = try str.toSliceWithAlloc(arena), } }); } else { const binary = try js_val.toZig(BinaryData); + const buffer = binary.asBuffer(); + + const arena = try self._page._session.getArena(buffer.len, "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .binary = .{ .arena = arena, - .data = try arena.dupe(u8, binary.asBuffer()), + .data = try arena.dupe(u8, buffer), } }); } }, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 62e05a17..1024e1e7 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -89,7 +89,7 @@ const ResponseType = enum { }; pub fn init(page: *Page) !*XMLHttpRequest { - const arena = try page.getArena(.{ .debug = "XMLHttpRequest" }); + const arena = try page.getArena(.large, "XMLHttpRequest"); errdefer page.releaseArena(arena); const self = try page._factory.xhrEventTarget(arena, XMLHttpRequest{ ._page = page, diff --git a/src/browser/webapi/selector/Selector.zig b/src/browser/webapi/selector/Selector.zig index a3d5d894..838cecfd 100644 --- a/src/browser/webapi/selector/Selector.zig +++ b/src/browser/webapi/selector/Selector.zig @@ -45,7 +45,7 @@ pub fn querySelectorAll(root: *Node, input: []const u8, page: *Page) !*List { return error.SyntaxError; } - const arena = try page.getArena(.{ .debug = "querySelectorAll" }); + const arena = try page.getArena(.small, "querySelectorAll"); errdefer page.releaseArena(arena); var nodes: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 024954db..222098c8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -487,10 +487,10 @@ pub const BrowserContext = struct { pub fn createIsolatedWorld(self: *BrowserContext, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld { const browser = &self.cdp.browser; - const arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld" }); + const arena = try browser.arena_pool.acquire(.small, "IsolatedWorld"); errdefer browser.arena_pool.release(arena); - const call_arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld.call_arena" }); + const call_arena = try browser.arena_pool.acquire(.tiny, "IsolatedWorld.call_arena"); errdefer browser.arena_pool.release(call_arena); const world = try arena.create(IsolatedWorld);