diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index f5bb72d3..3e29e96a 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -1,5 +1,5 @@ -name: "Browsercore install" -description: "Install deps for the project browsercore" +name: "Deps install" +description: "Install deps for the browser" inputs: arch: diff --git a/.github/actions/v8-snapshot/action.yml b/.github/actions/v8-snapshot/action.yml new file mode 100644 index 00000000..dc73b48a --- /dev/null +++ b/.github/actions/v8-snapshot/action.yml @@ -0,0 +1,42 @@ +name: "V8 snaphsot" +description: "Generate v8 snapshot" + +inputs: + arch: + description: 'CPU arch used to select the v8 lib' + required: false + default: 'x86_64' + os: + description: 'OS used to select the v8 lib' + required: false + default: 'linux' + +runs: + using: "composite" + + steps: + # Use the commit hash of bridge.zig and Snapshot.zig as cache key for + # snapshot. + - name: V8 snapshot cache key + id: snapshot_cache_key + run: echo "hash=v8-snapshot-${{ inputs.os }}_${{ inputs.arch }}-$(git log -n 1 --pretty=format:%H -- + src/browser/js/bridge.zig + src/browser/js/Snapshot.zig + )" >> "$GITHUB_OUTPUT" + shell: bash + + # Fetch the cache for snapshot + - name: Cache V8 snapshot + id: cache-v8-snapshot + uses: actions/cache@v5 + env: + cache-name: cache-v8-snapshot + with: + path: src/snapshot.bin + key: ${{ steps.snapshot_cache_key.outputs.hash }} + + # Generate snapshot on cache miss. + - name: v8 snapshot + shell: bash + if: hashFiles('src/snapshot.bin') == '' + run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index f069ebb5..50af9e91 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -39,7 +39,7 @@ jobs: name: zig build release runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 20 # Don't run the CI with draft PR. if: github.event.pull_request.draft == false @@ -50,9 +50,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/install - - - name: v8 snapshot - run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin + - uses: ./.github/actions/v8-snapshot - name: zig build release run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64 diff --git a/.github/workflows/wpt.yml b/.github/workflows/wpt.yml index c6cc7350..8a3b8516 100644 --- a/.github/workflows/wpt.yml +++ b/.github/workflows/wpt.yml @@ -36,8 +36,10 @@ jobs: os: ${{env.OS}} arch: ${{env.ARCH}} - - name: v8 snapshot - run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin + - uses: ./.github/actions/v8-snapshot + with: + os: ${{env.OS}} + arch: ${{env.ARCH}} - name: zig build release run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=generic @@ -170,6 +172,21 @@ jobs: cd ./wptdiff CGO_ENABLED=0 go build + - run: | + ./wptdiff/wptdiff --completion |tee completion.log + + - name: Send completion to slack + uses: slackapi/slack-github-action@v3.0.1 + with: + errors: true + method: files.uploadV2 + token: ${{ secrets.CI_SLACK_BOT_TOKEN }} + payload: | + channel_id: ${{ vars.WPT_SLACK_CHANNEL_ID }} + initial_comment: "Last WPT completion" + file: "./completion.log" + filename: "wpt-completion-${{ github.sha }}.txt" + - run: | ./wptdiff/wptdiff |tee diff.log diff --git a/src/App.zig b/src/App.zig index 8e3fe0c9..9e8741ba 100644 --- a/src/App.zig +++ b/src/App.zig @@ -69,7 +69,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App { app.telemetry = try Telemetry.init(app, config.mode); errdefer app.telemetry.deinit(allocator); - app.arena_pool = ArenaPool.init(allocator, 512, 1024 * 16); + app.arena_pool = ArenaPool.init(allocator, .{}); errdefer app.arena_pool.deinit(); return app; diff --git a/src/ArenaPool.zig b/src/ArenaPool.zig index 96fb694f..2b501438 100644 --- a/src/ArenaPool.zig +++ b/src/ArenaPool.zig @@ -27,33 +27,52 @@ const ArenaPool = @This(); const IS_DEBUG = builtin.mode == .Debug; -allocator: Allocator, -retain_bytes: usize, -free_list_len: u16 = 0, -free_list: ?*Entry = null, -free_list_max: u16, -entry_pool: std.heap.MemoryPool(Entry), -mutex: std.Thread.Mutex = .{}, -// Debug mode: track acquire/release counts per debug name to detect leaks and double-frees -_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {}, +pub const BucketSize = enum { tiny, small, medium, large }; + +const Bucket = struct { + free_list: ?*Entry = null, + free_list_len: u16 = 0, + free_list_max: u16, + retain_bytes: usize, +}; const Entry = struct { next: ?*Entry, arena: ArenaAllocator, + bucket: *Bucket, debug: if (IS_DEBUG) []const u8 else void = if (IS_DEBUG) "" else {}, }; -pub const DebugInfo = struct { - debug: []const u8 = "", +pub const Config = struct { + tiny: Config.Bucket = .{ .max = 512, .retain = 1024 }, + small: Config.Bucket = .{ .max = 128, .retain = 4 * 1024 }, + medium: Config.Bucket = .{ .max = 64, .retain = 16 * 1024 }, + large: Config.Bucket = .{ .max = 32, .retain = 128 * 1024 }, + + const Bucket = struct { + max: u16, + retain: usize, + }; }; -pub fn init(allocator: Allocator, free_list_max: u16, retain_bytes: usize) ArenaPool { +tiny: Bucket, +small: Bucket, +medium: Bucket, +large: Bucket, +allocator: Allocator, +mutex: std.Thread.Mutex = .{}, +entry_pool: std.heap.MemoryPool(Entry), + +_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {}, + +pub fn init(allocator: Allocator, config: Config) ArenaPool { return .{ .allocator = allocator, - .free_list_max = free_list_max, - .retain_bytes = retain_bytes, .entry_pool = .init(allocator), - ._leak_track = if (IS_DEBUG) .empty else {}, + .tiny = .{ .free_list_max = config.tiny.max, .retain_bytes = config.tiny.retain }, + .small = .{ .free_list_max = config.small.max, .retain_bytes = config.small.retain }, + .medium = .{ .free_list_max = config.medium.max, .retain_bytes = config.medium.retain }, + .large = .{ .free_list_max = config.large.max, .retain_bytes = config.large.retain }, }; } @@ -73,24 +92,49 @@ pub fn deinit(self: *ArenaPool) void { self._leak_track.deinit(self.allocator); } - var entry = self.free_list; - while (entry) |e| { - entry = e.next; - e.arena.deinit(); + // Free all arenas in all buckets + inline for (&[_]*Bucket{ &self.tiny, &self.small, &self.medium, &self.large }) |bucket| { + var entry = bucket.free_list; + while (entry) |e| { + entry = e.next; + e.arena.deinit(); + } } self.entry_pool.deinit(); } -pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { +// Acquire an arena from the pool. +// - Pass a BucketSize (.tiny, .small, .medium, .large) for explicit bucket selection +// - Pass a usize for automatic bucket selection based on expected size +pub fn acquire(self: *ArenaPool, size_or_bucket: anytype, debug: []const u8) !Allocator { + const bucket = blk: { + const T = @TypeOf(size_or_bucket); + if (T == BucketSize or T == @TypeOf(.enum_literal)) { + break :blk switch (@as(BucketSize, size_or_bucket)) { + .tiny => &self.tiny, + .small => &self.small, + .medium => &self.medium, + .large => &self.large, + }; + } + if (T == usize or T == comptime_int) { + if (size_or_bucket <= self.tiny.retain_bytes) break :blk &self.tiny; + if (size_or_bucket <= self.small.retain_bytes) break :blk &self.small; + if (size_or_bucket <= self.medium.retain_bytes) break :blk &self.medium; + break :blk &self.large; + } + @compileError("acquire expects BucketSize or usize, got " ++ @typeName(T)); + }; + self.mutex.lock(); defer self.mutex.unlock(); - if (self.free_list) |entry| { - self.free_list = entry.next; - self.free_list_len -= 1; + if (bucket.free_list) |entry| { + bucket.free_list = entry.next; + bucket.free_list_len -= 1; if (IS_DEBUG) { - entry.debug = dbg.debug; - const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug); + entry.debug = debug; + const gop = try self._leak_track.getOrPut(self.allocator, debug); if (!gop.found_existing) { gop.value_ptr.* = 0; } @@ -102,12 +146,13 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { const entry = try self.entry_pool.create(); entry.* = .{ .next = null, + .bucket = bucket, + .debug = if (IS_DEBUG) debug else {}, .arena = ArenaAllocator.init(self.allocator), - .debug = if (IS_DEBUG) dbg.debug else {}, }; if (IS_DEBUG) { - const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug); + const gop = try self._leak_track.getOrPut(self.allocator, debug); if (!gop.found_existing) { gop.value_ptr.* = 0; } @@ -116,12 +161,14 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator { return entry.arena.allocator(); } +// Universal release - determines bucket from the Entry automatically pub fn release(self: *ArenaPool, allocator: Allocator) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); const entry: *Entry = @fieldParentPtr("arena", arena); + const bucket = entry.bucket; // Reset the arena before acquiring the lock to minimize lock hold time - _ = arena.reset(.{ .retain_with_limit = self.retain_bytes }); + _ = arena.reset(.{ .retain_with_limit = bucket.retain_bytes }); self.mutex.lock(); defer self.mutex.unlock(); @@ -139,105 +186,113 @@ pub fn release(self: *ArenaPool, allocator: Allocator) void { } } - const free_list_len = self.free_list_len; - if (free_list_len == self.free_list_max) { + if (bucket.free_list_len >= bucket.free_list_max) { arena.deinit(); self.entry_pool.destroy(entry); return; } - entry.next = self.free_list; - self.free_list_len = free_list_len + 1; - self.free_list = entry; + entry.next = bucket.free_list; + bucket.free_list = entry; + bucket.free_list_len += 1; } pub fn reset(_: *const ArenaPool, allocator: Allocator, retain: usize) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); _ = arena.reset(.{ .retain_with_limit = retain }); } pub fn resetRetain(_: *const ArenaPool, allocator: Allocator) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); + const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr)); _ = arena.reset(.retain_capacity); } const testing = std.testing; - -test "arena pool - basic acquire and use" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: basic acquire and release" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const alloc = try pool.acquire(.{ .debug = "test" }); - const buf = try alloc.alloc(u8, 64); - @memset(buf, 0xAB); - try testing.expectEqual(@as(u8, 0xAB), buf[0]); + const tiny = try pool.acquire(.tiny, "test-tiny"); + const medium = try pool.acquire(.medium, "test-medium"); + const large = try pool.acquire(.large, "test-large"); - pool.release(alloc); + // All three must be distinct arenas + try testing.expect(tiny.ptr != medium.ptr); + try testing.expect(medium.ptr != large.ptr); + + _ = try tiny.alloc(u8, 64); + _ = try medium.alloc(u8, 1024); + _ = try large.alloc(u8, 4096); + + // Universal release works for all buckets + pool.release(tiny); + pool.release(medium); + pool.release(large); + + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); + try testing.expectEqual(1, pool.large.free_list_len); } -test "arena pool - reuse entry after release" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: reuse from correct bucket" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const alloc1 = try pool.acquire(.{ .debug = "test" }); - try testing.expectEqual(@as(u16, 0), pool.free_list_len); + const tiny1 = try pool.acquire(.tiny, "test"); + pool.release(tiny1); + try testing.expectEqual(1, pool.tiny.free_list_len); - pool.release(alloc1); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); + // Next acquire with .tiny should reuse from tiny bucket + const tiny2 = try pool.acquire(.tiny, "test"); + try testing.expectEqual(0, pool.tiny.free_list_len); + try testing.expectEqual(tiny1.ptr, tiny2.ptr); - // The same entry should be returned from the free list. - const alloc2 = try pool.acquire(.{ .debug = "test" }); - try testing.expectEqual(@as(u16, 0), pool.free_list_len); - try testing.expectEqual(alloc1.ptr, alloc2.ptr); + // acquire with .medium should NOT get the tiny arena + const medium = try pool.acquire(.medium, "test-medium"); + try testing.expect(medium.ptr != tiny2.ptr); - pool.release(alloc2); + pool.release(tiny2); + pool.release(medium); } -test "arena pool - multiple concurrent arenas" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); +test "ArenaPool: respects per-bucket max limits" { + var pool = ArenaPool.init(testing.allocator, .{ + .tiny = .{ .max = 1, .retain = 1024 }, + .medium = .{ .max = 2, .retain = 1024 }, + .large = .{ .max = 1, .retain = 1024 }, + }); defer pool.deinit(); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); - const a3 = try pool.acquire(.{ .debug = "test3" }); + // Acquire 3 tiny arenas + const t1 = try pool.acquire(.tiny, "t1"); + const t2 = try pool.acquire(.tiny, "t2"); + const t3 = try pool.acquire(.tiny, "t3"); - // All three must be distinct arenas. - try testing.expect(a1.ptr != a2.ptr); - try testing.expect(a2.ptr != a3.ptr); - try testing.expect(a1.ptr != a3.ptr); + // Release all 3, but only 1 should be kept (tiny_max = 1) + pool.release(t1); + try testing.expectEqual(1, pool.tiny.free_list_len); + pool.release(t2); + try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t2 discarded + pool.release(t3); + try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t3 discarded - _ = try a1.alloc(u8, 16); - _ = try a2.alloc(u8, 32); - _ = try a3.alloc(u8, 48); + // Acquire 3 medium arenas + const m1 = try pool.acquire(.medium, "m1"); + const m2 = try pool.acquire(.medium, "m2"); + const m3 = try pool.acquire(.medium, "m3"); - pool.release(a1); - pool.release(a2); - pool.release(a3); - - try testing.expectEqual(@as(u16, 3), pool.free_list_len); + // Release all 3, but only 2 should be kept (medium_max = 2) + pool.release(m1); + pool.release(m2); + pool.release(m3); + try testing.expectEqual(2, pool.medium.free_list_len); } -test "arena pool - free list respects max limit" { - // Cap the free list at 1 so the second release discards its arena. - var pool = ArenaPool.init(testing.allocator, 1, 1024 * 16); +test "ArenaPool: reset clears memory without releasing" { + var pool = ArenaPool.init(testing.allocator, .{}); defer pool.deinit(); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); - - pool.release(a1); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); - - // The free list is full; a2's arena should be destroyed, not queued. - pool.release(a2); - try testing.expectEqual(@as(u16, 1), pool.free_list_len); -} - -test "arena pool - reset clears memory without releasing" { - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); - defer pool.deinit(); - - const alloc = try pool.acquire(.{ .debug = "test" }); + const alloc = try pool.acquire(.medium, "test"); const buf = try alloc.alloc(u8, 128); @memset(buf, 0xFF); @@ -246,7 +301,7 @@ test "arena pool - reset clears memory without releasing" { pool.reset(alloc, 0); // The free list must stay empty; the allocator was not released. - try testing.expectEqual(@as(u16, 0), pool.free_list_len); + try testing.expectEqual(0, pool.medium.free_list_len); // Allocating again through the same arena must still work. const buf2 = try alloc.alloc(u8, 64); @@ -256,18 +311,60 @@ test "arena pool - reset clears memory without releasing" { pool.release(alloc); } -test "arena pool - deinit with entries in free list" { +test "ArenaPool: deinit with entries in free list" { // Verifies that deinit properly cleans up free-listed arenas (no leaks // detected by the test allocator). - var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16); + var pool = ArenaPool.init(testing.allocator, .{}); - const a1 = try pool.acquire(.{ .debug = "test1" }); - const a2 = try pool.acquire(.{ .debug = "test2" }); + const a1 = try pool.acquire(.tiny, "test1"); + const a2 = try pool.acquire(.medium, "test2"); _ = try a1.alloc(u8, 256); _ = try a2.alloc(u8, 512); pool.release(a1); pool.release(a2); - try testing.expectEqual(@as(u16, 2), pool.free_list_len); + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); pool.deinit(); } + +test "ArenaPool: small bucket" { + var pool = ArenaPool.init(testing.allocator, .{ + .small = .{ .max = 2, .retain = 4 * 1024 }, + }); + defer pool.deinit(); + + const s1 = try pool.acquire(.small, "s1"); + const s2 = try pool.acquire(.small, "s2"); + const s3 = try pool.acquire(.small, "s3"); + + pool.release(s1); + pool.release(s2); + pool.release(s3); + + try testing.expectEqual(2, pool.small.free_list_len); +} + +test "ArenaPool: size-based acquire" { + var pool = ArenaPool.init(testing.allocator, .{}); + defer pool.deinit(); + + // <= 1KB -> tiny + const a = try pool.acquire(500, "fits-tiny"); + // <= 4KB -> small + const b = try pool.acquire(2000, "fits-small"); + // <= 16KB -> medium + const c = try pool.acquire(8000, "fits-medium"); + // > 16KB -> large + const d = try pool.acquire(20000, "fits-large"); + + pool.release(a); + pool.release(b); + pool.release(c); + pool.release(d); + + try testing.expectEqual(1, pool.tiny.free_list_len); + try testing.expectEqual(1, pool.small.free_list_len); + try testing.expectEqual(1, pool.medium.free_list_len); + try testing.expectEqual(1, pool.large.free_list_len); +} diff --git a/src/Config.zig b/src/Config.zig index 483c320f..2acca63c 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -212,6 +212,20 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig { }; } +pub fn blockPrivateNetworks(self: *const Config) bool { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.block_private_networks, + else => unreachable, + }; +} + +pub fn blockCidrs(self: *const Config) ?[]const u8 { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.block_cidrs, + else => unreachable, + }; +} + pub fn maxConnections(self: *const Config) u16 { return switch (self.mode) { .serve => |opts| opts.cdp_max_connections, @@ -300,6 +314,9 @@ pub const Common = struct { web_bot_auth_key_file: ?[]const u8 = null, web_bot_auth_keyid: ?[]const u8 = null, web_bot_auth_domain: ?[]const u8 = null, + + block_private_networks: bool = false, + block_cidrs: ?[]const u8 = null, }; /// Pre-formatted HTTP headers for reuse across Http and Client. @@ -362,6 +379,21 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ we make requests towards. \\ Defaults to false. \\ + \\--block-private-networks + \\ Blocks HTTP requests to private/internal IP addresses + \\ after DNS resolution. Useful for sandboxing, multi-tenant + \\ deployments, and preventing access to internal infrastructure + \\ regardless of what triggers the request (JavaScript, HTML + \\ resources, redirects, etc.). + \\ Defaults to false. + \\ + \\--block-cidrs + \\ Additional CIDR ranges to block, comma-separated. + \\ Prefix with '-' to allow (exempt from blocking). + \\ e.g. --block-cidrs 169.254.169.254/32,fd00:ec2::254/128 + \\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32 + \\ Can be used standalone or combined with --block-private-networks. + \\ \\--http-proxy The HTTP proxy to use for all HTTP requests. \\ A username:password can be included for basic authentication. \\ Defaults to none. @@ -1145,5 +1177,19 @@ fn parseCommonArg( return true; } + if (std.mem.eql(u8, "--block-private-networks", opt)) { + common.block_private_networks = true; + return true; + } + + if (std.mem.eql(u8, "--block-cidrs", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--block-cidrs" }); + return error.InvalidArgument; + }; + common.block_cidrs = try allocator.dupe(u8, str); + return true; + } + return false; } diff --git a/src/SemanticTree.zig b/src/SemanticTree.zig index 9bca520e..5b3f0ae5 100644 --- a/src/SemanticTree.zig +++ b/src/SemanticTree.zig @@ -671,7 +671,7 @@ pub fn getNodeDetails( if (el.getAttributeSafe(comptime .wrap("href"))) |h| { const URL = lp.URL; - href = URL.resolve(arena, page.base(), h, .{ .encode = true }) catch h; + href = URL.resolve(arena, page.base(), h, .{ .encoding = page.charset }) catch h; } if (el.is(Element.Html.Input)) |input| { diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index b90029ac..a2da34d5 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -374,7 +374,8 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void { fn processRequest(self: *Client, req: Request) !void { if (self.network.cache) |*cache| { if (req.method == .GET) { - const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" }); + // cache is only used to read the meta data + const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache"); defer self.network.app.arena_pool.release(arena); var iter = req.headers.iterator(); diff --git a/src/browser/Page.zig b/src/browser/Page.zig index f12b606b..935f6f51 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -207,6 +207,9 @@ base_url: ?[:0]const u8 = null, // referer header cache. referer_header: ?[:0]const u8 = null, +// Document charset (canonical name from encoding_rs, static lifetime) +charset: []const u8 = "UTF-8", + // Arbitrary buffer. Need to temporarily lowercase a value? Use this. No lifetime // guarantee - it's valid until someone else uses it. buf: [BUF_SIZE]u8 = undefined, @@ -248,7 +251,7 @@ pub fn init(self: *Page, frame_id: u32, session: *Session, parent: ?*Page) !void log.debug(.page, "page.init", .{}); } - const call_arena = try session.getArena(.{ .debug = "call_arena" }); + const call_arena = try session.getArena(.medium, "call_arena"); errdefer session.releaseArena(call_arena); const factory = &session.factory; @@ -429,8 +432,8 @@ pub fn headersForRequest(self: *Page, headers: *HttpClient.Headers) !void { } } -pub fn getArena(self: *Page, comptime opts: Session.GetArenaOpts) !Allocator { - return self._session.getArena(opts); +pub fn getArena(self: *Page, size_or_bucket: anytype, debug: []const u8) !Allocator { + return self._session.getArena(size_or_bucket, debug); } pub fn releaseArena(self: *Page, allocator: Allocator) void { @@ -510,7 +513,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi log.warn(.js, "invalid blob", .{ .url = request_url }); return error.BlobNotFound; }; - const parse_arena = try self.getArena(.{ .debug = "Page.parseBlob" }); + const parse_arena = try self.getArena(.medium, "Page.parseBlob"); defer self.releaseArena(parse_arena); var parser = Parser.init(parse_arena, self.document.asNode(), self); parser.parse(blob._slice); @@ -619,7 +622,7 @@ pub fn scheduleNavigation(self: *Page, request_url: []const u8, opts: NavigateOp if (self.canScheduleNavigation(std.meta.activeTag(nt)) == false) { return; } - const arena = try self._session.getArena(.{ .debug = "scheduleNavigation" }); + const arena = try self._session.getArena(.small, "scheduleNavigation"); errdefer self._session.releaseArena(arena); return self.scheduleNavigationWithArena(arena, request_url, opts, nt); } @@ -658,7 +661,7 @@ fn scheduleNavigationWithArena(originator: *Page, arena: Allocator, request_url: arena, page_base, request_url, - .{ .always_dupe = true, .encode = true }, + .{ .always_dupe = true, .encoding = originator.charset }, ); break :blk .{ u, false }; }; @@ -962,9 +965,13 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { switch (mime.content_type) { .text_html => { - self._parse_state = .{ .html = .{ - .mime = mime, - } }; + // Normalize and store the charset using encoding_rs canonical names + const charset_str = mime.charsetString(); + const info = h5e.encoding_for_label(charset_str.ptr, charset_str.len); + if (info.isValid()) { + self.charset = info.name(); + } + self._parse_state = .{ .html = .empty }; }, .application_json, .text_javascript, .text_css, .text_plain => { var arr: std.ArrayList(u8) = .empty; @@ -979,7 +986,7 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { } switch (self._parse_state) { - .html => |*html| try html.buf.appendSlice(self.arena, data), + .html => |*html| try html.appendSlice(self.arena, data), .text => |*buf| { // we have to escape the data... var v = data; @@ -1022,18 +1029,19 @@ fn pageDoneCallback(ctx: *anyopaque) !void { }); }; - const parse_arena = try self.getArena(.{ .debug = "Page.parse" }); + const parse_arena = try self.getArena(.medium, "Page.parse"); defer self.releaseArena(parse_arena); var parser = Parser.init(parse_arena, self.document.asNode(), self); switch (self._parse_state) { - .html => |*html_state| { - const raw_html = html_state.buf.items; - if (html_state.needsEncodingConversion()) { - parser.parseWithEncoding(raw_html, html_state.mime.charsetString()); - } else { + .html => |*html_buf| { + const raw_html = html_buf.items; + + if (std.mem.eql(u8, self.charset, "UTF-8")) { parser.parse(raw_html); + } else { + parser.parseWithEncoding(raw_html, self.charset); } self._script_manager.staticScriptsDone(); self._parse_state = .complete; @@ -1188,7 +1196,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *IFrame) !void { self.call_arena, // ok to use, page.navigate dupes this self.base(), src, - .{ .encode = true }, + .{ .encoding = self.charset }, ); }; @@ -3164,21 +3172,11 @@ const ParseState = union(enum) { pre, complete, err: anyerror, - html: Html, + html: std.ArrayList(u8), text: std.ArrayList(u8), image: std.ArrayList(u8), raw: std.ArrayList(u8), raw_done: []const u8, - - const Html = struct { - mime: Mime, - buf: std.ArrayList(u8) = .empty, - - fn needsEncodingConversion(self: *const Html) bool { - const charset = self.mime.charsetString(); - return !std.ascii.eqlIgnoreCase(charset, "utf-8") and !std.ascii.eqlIgnoreCase(charset, "utf8"); - } - }; }; const LoadState = enum { @@ -3568,7 +3566,7 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form // I don't think this is technically correct, but FormData handles it ok const form_data = try FormData.init(form, submitter_, self); - const arena = try self._session.getArena(.{ .debug = "submitForm" }); + const arena = try self._session.getArena(.medium, "submitForm"); errdefer self._session.releaseArena(arena); const encoding = form_element.getAttributeSafe(comptime .wrap("enctype")); @@ -3628,9 +3626,6 @@ fn asUint(comptime string: anytype) std.meta.Int( const testing = @import("../testing.zig"); test "WebApi: Page" { - const filter: testing.LogFilter = .init(&.{ .http, .js }); - defer filter.deinit(); - try testing.htmlRunner("page", .{}); } diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index 4ee753ea..fd3889e6 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -249,7 +249,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult { } pub fn waitForSelector(self: *Runner, selector: [:0]const u8, timeout_ms: u32) !*Node.Element { - const arena = try self.session.getArena(.{ .debug = "Runner.waitForSelector" }); + const arena = try self.session.getArena(.small, "Runner.waitForSelector"); defer self.session.releaseArena(arena); var timer = try std.time.Timer.start(); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 984ecccc..95b7f839 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -188,7 +188,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e var handover = false; const page = self.page; - const arena = try page.getArena(.{ .debug = "addFromElement" }); + const arena = try page.getArena(.large, "SM.addFromElement"); errdefer if (!handover) { page.releaseArena(arena); }; @@ -369,7 +369,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const errdefer _ = self.imported_modules.remove(url); const page = self.page; - const arena = try page.getArena(.{ .debug = "preloadImport" }); + const arena = try page.getArena(.large, "SM.preloadImport"); errdefer page.releaseArena(arena); const script = try arena.create(Script); @@ -469,7 +469,7 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource { pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { const page = self.page; - const arena = try page.getArena(.{ .debug = "getAsyncImport" }); + const arena = try page.getArena(.large, "SM.getAsyncImport"); errdefer page.releaseArena(arena); const script = try arena.create(Script); diff --git a/src/browser/Session.zig b/src/browser/Session.zig index 8ec3e217..baea1590 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -110,10 +110,10 @@ pub fn init(self: *Session, browser: *Browser, notification: *Notification) !voi const allocator = browser.app.allocator; const arena_pool = browser.arena_pool; - const arena = try arena_pool.acquire(.{ .debug = "Session" }); + const arena = try arena_pool.acquire(.small, "Session"); errdefer arena_pool.release(arena); - const page_arena = try arena_pool.acquire(.{ .debug = "Session.page_arena" }); + const page_arena = try arena_pool.acquire(.large, "Session.page_arena"); errdefer arena_pool.release(page_arena); self.* = .{ @@ -186,12 +186,8 @@ pub fn removePage(self: *Session) void { } } -pub const GetArenaOpts = struct { - debug: []const u8, -}; - -pub fn getArena(self: *Session, opts: GetArenaOpts) !Allocator { - return self.arena_pool.acquire(.{ .debug = opts.debug }); +pub fn getArena(self: *Session, size_or_bucket: anytype, debug: []const u8) !Allocator { + return self.arena_pool.acquire(size_or_bucket, debug); } pub fn releaseArena(self: *Session, allocator: Allocator) void { diff --git a/src/browser/StyleManager.zig b/src/browser/StyleManager.zig index 161ebca0..404a11ed 100644 --- a/src/browser/StyleManager.zig +++ b/src/browser/StyleManager.zig @@ -66,7 +66,7 @@ dirty: bool = false, pub fn init(page: *Page) !StyleManager { return .{ .page = page, - .arena = try page.getArena(.{ .debug = "StyleManager" }), + .arena = try page.getArena(.medium, "StyleManager"), }; } diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 6f8cbebd..532f11a1 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -19,16 +19,19 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const ResolveOpts = struct { - encode: bool = false, +pub const ResolveOpts = struct { + /// null = don't encode, "UTF-8" = standard percent encoding, + /// other charset = encode query string using that charset with NCR fallback + encoding: ?[]const u8 = null, always_dupe: bool = false, }; // path is anytype, so that it can be used with both []const u8 and [:0]const u8 -pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 { +pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, opts: ResolveOpts) ![:0]const u8 { const PT = @TypeOf(source_path); - var path: [:0]const u8 = if (comptime !isNullTerminated(PT) or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path; + const needs_dupe = comptime !isNullTerminated(PT); + var path: [:0]const u8 = if (needs_dupe or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path; if (base.len == 0) { return processResolved(allocator, path, opts); @@ -186,14 +189,12 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c return processResolved(allocator, out[0..out_i :0], opts); } -fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { - if (!comptime opts.encode) { - return url; - } - return ensureEncoded(allocator, url); +fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) ![:0]const u8 { + const encoding = opts.encoding orelse return url; + return ensureEncoded(allocator, url, encoding); } -pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { +pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8, encoding: []const u8) ![:0]const u8 { const scheme_end = std.mem.indexOf(u8, url, "://"); const authority_start = if (scheme_end) |end| end + 3 else 0; const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; @@ -205,18 +206,18 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end; const path_to_encode = url[path_start..path_end]; + // Path is always UTF-8 percent encoded per URL spec const encoded_path = try percentEncodeSegment(allocator, path_to_encode, .path); + // Query string uses document encoding const encoded_query = if (query_start) |qs| blk: { const query_to_encode = url[qs + 1 .. query_end]; - const encoded = try percentEncodeSegment(allocator, query_to_encode, .query); - break :blk encoded; + break :blk try encodeQueryString(allocator, query_to_encode, encoding); } else null; const encoded_fragment = if (fragment_start) |fs| blk: { const fragment_to_encode = url[fs + 1 ..]; - const encoded = try percentEncodeSegment(allocator, fragment_to_encode, .query); - break :blk encoded; + break :blk try percentEncodeSegment(allocator, fragment_to_encode, .query); } else null; if (encoded_path.ptr == path_to_encode.ptr and @@ -242,7 +243,7 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { return buf.items[0 .. buf.items.len - 1 :0]; } -const EncodeSet = enum { path, query, userinfo, fragment }; +const EncodeSet = enum { path, query, query_legacy, userinfo, fragment }; fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 { // Check if encoding is needed @@ -285,17 +286,65 @@ fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime enco return buf.items; } +const h5e = @import("parser/html5ever.zig"); + +/// Encode a query string using the specified encoding. +/// For UTF-8, this is standard percent encoding. +/// For legacy encodings, unmappable characters are replaced with NCRs (&#codepoint;). +fn encodeQueryString(allocator: Allocator, query: []const u8, encoding: []const u8) ![]const u8 { + // For UTF-8, use standard percent encoding + if (std.mem.eql(u8, encoding, "UTF-8")) { + return percentEncodeSegment(allocator, query, .query); + } + + // For legacy encodings, first encode to the target charset with NCR fallback + const enc_info = h5e.encoding_for_label(encoding.ptr, encoding.len); + if (!enc_info.isValid()) { + // Unknown encoding, fall back to UTF-8 + return percentEncodeSegment(allocator, query, .query); + } + + // Calculate max buffer size for encoded output + const max_encoded_len = h5e.encoding_max_encode_buffer_length(enc_info.handle.?, query.len); + if (max_encoded_len == 0) { + return percentEncodeSegment(allocator, query, .query); + } + + const encode_buf = try allocator.alloc(u8, max_encoded_len); + defer allocator.free(encode_buf); + + // Encode UTF-8 to legacy encoding with NCR fallback + const result = h5e.encoding_encode_with_ncr( + enc_info.handle.?, + query.ptr, + query.len, + encode_buf.ptr, + encode_buf.len, + ); + + if (!result.isSuccess()) { + // Encoding failed, fall back to UTF-8 + return percentEncodeSegment(allocator, query, .query); + } + + // Now percent-encode the result using query_legacy to preserve NCRs + const encoded_bytes = encode_buf[0..result.bytes_written]; + return percentEncodeSegment(allocator, encoded_bytes, .query_legacy); +} + fn shouldPercentEncode(c: u8, comptime encode_set: EncodeSet) bool { return switch (c) { // Unreserved characters (RFC 3986) 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false, - // sub-delims allowed in path/query but some must be encoded in userinfo - '!', '$', '&', '\'', '(', ')', '*', '+', ',' => false, - ';', '=' => encode_set == .userinfo, + // sub-delims allowed in path/query but some must be encoded in userinfo/query_legacy + '!', '$', '\'', '(', ')', '*', '+', ',' => false, + // '&' and ';' must be encoded for legacy encoding (to preserve NCRs like &#nnnnn;) + '&', ';' => encode_set == .userinfo or encode_set == .query_legacy, + '=' => encode_set == .userinfo, // Separators: userinfo must encode these '/', ':', '@' => encode_set == .userinfo, // '?' is allowed in queries only - '?' => encode_set != .query, + '?' => encode_set != .query and encode_set != .query_legacy, // '#' is allowed in fragments only '#' => encode_set != .fragment, // Everything else needs encoding (including space) @@ -1130,7 +1179,7 @@ test "URL: ensureEncoded" { }; for (cases) |case| { - const result = try ensureEncoded(testing.arena_allocator, case.url); + const result = try ensureEncoded(testing.arena_allocator, case.url, "UTF-8"); try testing.expectString(case.expected, result); } } @@ -1296,7 +1345,7 @@ test "URL: resolve with encoding" { }; for (cases) |case| { - const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true }); + const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encoding = "UTF-8" }); try testing.expectString(case.expected, result); } } diff --git a/src/browser/interactive.zig b/src/browser/interactive.zig index a0b4528a..225633c7 100644 --- a/src/browser/interactive.zig +++ b/src/browser/interactive.zig @@ -182,7 +182,7 @@ pub fn collectInteractiveElements( .id = el.getAttributeSafe(comptime .wrap("id")), .class = el.getAttributeSafe(comptime .wrap("class")), .href = if (el.getAttributeSafe(comptime .wrap("href"))) |href| - URL.resolve(arena, page.base(), href, .{ .encode = true }) catch href + URL.resolve(arena, page.base(), href, .{ .encoding = page.charset }) catch href else null, .input_type = getInputType(el), diff --git a/src/browser/js/Env.zig b/src/browser/js/Env.zig index 2c1ebf38..03eadac3 100644 --- a/src/browser/js/Env.zig +++ b/src/browser/js/Env.zig @@ -261,7 +261,7 @@ pub const ContextParams = struct { }; pub fn createContext(self: *Env, page: *Page, params: ContextParams) !*Context { - const context_arena = try self.app.arena_pool.acquire(.{ .debug = params.debug_name }); + const context_arena = try self.app.arena_pool.acquire(.large, params.debug_name); errdefer self.app.arena_pool.release(context_arena); const isolate = self.isolate; diff --git a/src/browser/js/Local.zig b/src/browser/js/Local.zig index 4d91ed2e..170e5c0c 100644 --- a/src/browser/js/Local.zig +++ b/src/browser/js/Local.zig @@ -1479,7 +1479,7 @@ fn createFinalizerCallback( ) !*Session.FinalizerCallback { const session = self.ctx.session; - const arena = try session.getArena(.{ .debug = "FinalizerCallback" }); + const arena = try session.getArena(.tiny, "FinalizerCallback"); errdefer session.releaseArena(arena); const fc = try arena.create(Session.FinalizerCallback); diff --git a/src/browser/js/Origin.zig b/src/browser/js/Origin.zig index c6c6bf81..fce37ce0 100644 --- a/src/browser/js/Origin.zig +++ b/src/browser/js/Origin.zig @@ -45,7 +45,7 @@ key: []const u8, security_token: v8.Global, pub fn init(app: *App, isolate: js.Isolate, key: []const u8) !*Origin { - const arena = try app.arena_pool.acquire(.{ .debug = "Origin" }); + const arena = try app.arena_pool.acquire(.tiny, "Origin"); errdefer app.arena_pool.release(arena); var hs: js.HandleScope = undefined; diff --git a/src/browser/js/Snapshot.zig b/src/browser/js/Snapshot.zig index 5a04861a..0b6a7fd1 100644 --- a/src/browser/js/Snapshot.zig +++ b/src/browser/js/Snapshot.zig @@ -126,6 +126,7 @@ pub fn create() !Snapshot { var data_start: usize = 0; const isolate = v8.v8__SnapshotCreator__getIsolate(snapshot_creator).?; + defer v8.v8__Isolate__LowMemoryNotification(isolate); { // CreateBlob, which we'll call once everything is setup, MUST NOT diff --git a/src/browser/js/String.zig b/src/browser/js/String.zig index 2cbe6a17..8d29d838 100644 --- a/src/browser/js/String.zig +++ b/src/browser/js/String.zig @@ -44,11 +44,11 @@ fn _toSlice(self: String, comptime null_terminate: bool, allocator: Allocator) ! const handle = self.handle; const isolate = local.isolate.handle; - const len = v8.v8__String__Utf8Length(handle, isolate); - const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(len), 0) else allocator.alloc(u8, @intCast(len))); + const l = v8.v8__String__Utf8Length(handle, isolate); + const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(l), 0) else allocator.alloc(u8, @intCast(l))); const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } return buf; @@ -64,32 +64,32 @@ pub fn toSSOWithAlloc(self: String, allocator: Allocator) !SSO { const handle = self.handle; const isolate = self.local.isolate.handle; - const len: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate)); + const l: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate)); - if (len <= 12) { + if (l <= 12) { var content: [12]u8 = undefined; const n = v8.v8__String__WriteUtf8(handle, isolate, &content[0], content.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } // Weird that we do this _after_, but we have to..I've seen weird issues // in ReleaseMode where v8 won't write to content if it starts off zero // initiated - @memset(content[len..], 0); - return .{ .len = @intCast(len), .payload = .{ .content = content } }; + @memset(content[l..], 0); + return .{ .len = @intCast(l), .payload = .{ .content = content } }; } - const buf = try allocator.alloc(u8, len); + const buf = try allocator.alloc(u8, l); const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); if (comptime IS_DEBUG) { - std.debug.assert(n == len); + std.debug.assert(n == l); } var prefix: [4]u8 = @splat(0); @memcpy(&prefix, buf[0..4]); return .{ - .len = @intCast(len), + .len = @intCast(l), .payload = .{ .heap = .{ .prefix = prefix, .ptr = buf.ptr, @@ -103,9 +103,13 @@ pub fn format(self: String, writer: *std.Io.Writer) !void { const isolate = local.isolate.handle; var small: [1024]u8 = undefined; - const len = v8.v8__String__Utf8Length(handle, isolate); - var buf = if (len < 1024) &small else local.call_arena.alloc(u8, @intCast(len)) catch return error.WriteFailed; + const l = v8.v8__String__Utf8Length(handle, isolate); + var buf = if (l < 1024) &small else local.call_arena.alloc(u8, @intCast(l)) catch return error.WriteFailed; const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8); return writer.writeAll(buf[0..n]); } + +pub fn len(self: String) usize { + return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle)); +} diff --git a/src/browser/markdown.zig b/src/browser/markdown.zig index 8a2f1031..b513b057 100644 --- a/src/browser/markdown.zig +++ b/src/browser/markdown.zig @@ -278,7 +278,8 @@ const Context = struct { } try self.writer.writeAll("]("); if (el.getAttributeSafe(comptime .wrap("src"))) |src| { - const absolute_src = URL.resolve(self.page.call_arena, self.page.base(), src, .{ .encode = true }) catch src; + const page = self.page; + const absolute_src = URL.resolve(page.call_arena, page.base(), src, .{ .encoding = page.charset }) catch src; try self.writer.writeAll(absolute_src); } try self.writer.writeAll(")"); @@ -286,13 +287,14 @@ const Context = struct { return; }, .anchor => { + const page = self.page; const info = analyzeContent(el.asNode()); const label = getAnchorLabel(el); const href_raw = el.getAttributeSafe(comptime .wrap("href")); if (!info.has_visible and label == null and href_raw == null) return; - const href = if (href_raw) |h| URL.resolve(self.page.call_arena, self.page.base(), h, .{ .encode = true }) catch h else null; + const href = if (href_raw) |h| URL.resolve(page.call_arena, page.base(), h, .{ .encoding = page.charset }) catch h else null; if (info.has_block) { try self.renderChildren(el.asNode()); diff --git a/src/browser/parser/html5ever.zig b/src/browser/parser/html5ever.zig index f6f81583..829ac429 100644 --- a/src/browser/parser/html5ever.zig +++ b/src/browser/parser/html5ever.zig @@ -216,3 +216,89 @@ pub extern "c" fn xml5ever_parse_document( appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void, appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void, ) void; + +// General encoding api +pub const EncodingInfo = extern struct { + found: u8, + handle: ?*anyopaque, + name_len: usize, + name_ptr: [*]const u8, + + pub fn isValid(self: *const EncodingInfo) bool { + return self.found != 0; + } + + pub fn name(self: *const EncodingInfo) []const u8 { + if (self.name_len == 0) { + return ""; + } + return self.name_ptr[0..self.name_len]; + } +}; + +pub const DecodeResult = extern struct { + had_errors: u8, + bytes_read: usize, + bytes_written: usize, + + pub fn hadErrors(self: *const DecodeResult) bool { + return self.had_errors != 0; + } +}; + +pub extern "c" fn encoding_for_label( + label: [*]const u8, + label_len: usize, +) EncodingInfo; + +pub extern "c" fn encoding_max_utf8_buffer_length( + handle: *anyopaque, + input_len: usize, +) usize; + +pub extern "c" fn encoding_decode( + handle: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_len: usize, + is_last: u8, +) DecodeResult; + +// Streaming decoder API +pub extern "c" fn encoding_decoder_new(handle: *anyopaque) ?*anyopaque; + +pub extern "c" fn encoding_decoder_decode( + decoder: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_len: usize, + is_last: u8, +) DecodeResult; + +pub extern "c" fn encoding_decoder_free(decoder: *anyopaque) void; + +// Encoding API (UTF-8 to legacy encoding with NCR fallback) +pub const EncodeResult = extern struct { + status: u8, + bytes_read: usize, + bytes_written: usize, + + pub fn isSuccess(self: *const EncodeResult) bool { + return self.status == 0; + } +}; + +pub extern "c" fn encoding_encode_with_ncr( + handle: *anyopaque, + input: ?[*]const u8, + input_len: usize, + output: [*]u8, + output_capacity: usize, +) EncodeResult; + +pub extern "c" fn encoding_max_encode_buffer_length( + handle: *anyopaque, + input_len: usize, +) usize; diff --git a/src/browser/structured_data.zig b/src/browser/structured_data.zig index 9b6e7fbe..cad1d9d8 100644 --- a/src/browser/structured_data.zig +++ b/src/browser/structured_data.zig @@ -288,7 +288,7 @@ fn collectLink( ) !void { const rel = el.getAttributeSafe(comptime .wrap("rel")) orelse return; const raw_href = el.getAttributeSafe(comptime .wrap("href")) orelse return; - const href = URL.resolve(arena, page.base(), raw_href, .{ .encode = true }) catch raw_href; + const href = URL.resolve(arena, page.base(), raw_href, .{ .encoding = page.charset }) catch raw_href; if (std.ascii.eqlIgnoreCase(rel, "alternate")) { try alternate.append(arena, .{ diff --git a/src/browser/tests/document/document.html b/src/browser/tests/document/document.html index 74d8ff30..ede2b507 100644 --- a/src/browser/tests/document/document.html +++ b/src/browser/tests/document/document.html @@ -18,6 +18,10 @@ testing.expectEqual("visible", document.visibilityState); testing.expectEqual(false, document.prerendering); testing.expectEqual(undefined, Document.prerendering); + // characterSet should return canonical encoding name + testing.expectEqual("UTF-8", document.characterSet); + testing.expectEqual("UTF-8", document.charset); + testing.expectEqual("UTF-8", document.inputEncoding); + + + + + + diff --git a/src/browser/tests/encoding/text_encoder.html b/src/browser/tests/encoding/text_encoder.html index 540f60c1..99fd1959 100644 --- a/src/browser/tests/encoding/text_encoder.html +++ b/src/browser/tests/encoding/text_encoder.html @@ -5,6 +5,9 @@ diff --git a/src/browser/tests/page/encoding.html b/src/browser/tests/page/encoding.html index af532b82..b740a465 100644 --- a/src/browser/tests/page/encoding.html +++ b/src/browser/tests/page/encoding.html @@ -11,6 +11,10 @@ testing.onload(() => { // GBK-encoded "中文" should be decoded to UTF-8 testing.expectEqual('中文', iframe.contentDocument.getElementById('test').textContent); + // document.characterSet should return canonical encoding name + testing.expectEqual('GBK', iframe.contentDocument.characterSet); + testing.expectEqual('GBK', iframe.contentDocument.charset); + testing.expectEqual('GBK', iframe.contentDocument.inputEncoding); }); } @@ -73,3 +77,32 @@ }); } + + diff --git a/src/browser/tests/testing.js b/src/browser/tests/testing.js index 037d15cf..12d0f761 100644 --- a/src/browser/tests/testing.js +++ b/src/browser/tests/testing.js @@ -37,7 +37,13 @@ function expectError(expected, fn) { withError((err) => { - expectEqual(true, err.toString().includes(expected)); + if (!err.toString().includes(expected)) { + console.error(`Expecte error to contains: ${expected}, was: ${err.toString()}`); + expectEqual(true, false); + } else { + // to record a successful case + expectTrue(true); + } }, fn); } diff --git a/src/browser/webapi/Blob.zig b/src/browser/webapi/Blob.zig index bf0c1118..0598d7fc 100644 --- a/src/browser/webapi/Blob.zig +++ b/src/browser/webapi/Blob.zig @@ -77,7 +77,15 @@ pub fn initWithMimeValidation( validate_mime: bool, page: *Page, ) !*Blob { - const arena = try page.getArena(.{ .debug = "Blob" }); + const data_len = blk: { + const parts = maybe_blob_parts orelse break :blk 0; + var size: usize = 0; + for (parts) |p| { + size += p.len; + } + break :blk size; + }; + const arena = try page.getArena(256 + data_len, "Blob"); errdefer page.releaseArena(arena); const options: InitOptions = maybe_options orelse .{}; diff --git a/src/browser/webapi/DOMParser.zig b/src/browser/webapi/DOMParser.zig index 10a94bca..7bd5b600 100644 --- a/src/browser/webapi/DOMParser.zig +++ b/src/browser/webapi/DOMParser.zig @@ -50,7 +50,7 @@ pub fn parseFromString( @"image/svg+xml", }, mime_type) orelse return error.NotSupported; - const arena = try page.getArena(.{ .debug = "DOMParser.parseFromString" }); + const arena = try page.getArena(.medium, "DOMParser.parseFromString"); defer page.releaseArena(arena); return switch (target_mime) { diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index cf15c49b..cd2d5a4c 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -666,7 +666,7 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void { page._parse_mode = .document_write; defer page._parse_mode = previous_parse_mode; - const arena = try page.getArena(.{ .debug = "Document.write" }); + const arena = try page.getArena(.medium, "Document.write"); defer page.releaseArena(arena); var parser = Parser.init(arena, fragment_node, page); @@ -1068,10 +1068,15 @@ pub const JsApi = struct { pub const hasFocus = bridge.function(Document.hasFocus, .{}); pub const prerendering = bridge.property(false, .{ .template = false }); - pub const characterSet = bridge.property("UTF-8", .{ .template = false }); - pub const charset = bridge.property("UTF-8", .{ .template = false }); - pub const inputEncoding = bridge.property("UTF-8", .{ .template = false }); + pub const characterSet = bridge.accessor(getCharacterSet, null, .{}); + pub const charset = bridge.accessor(getCharacterSet, null, .{}); + pub const inputEncoding = bridge.accessor(getCharacterSet, null, .{}); pub const compatMode = bridge.property("CSS1Compat", .{ .template = false }); + + fn getCharacterSet(self: *const Document) []const u8 { + const doc_page = self._page orelse return "UTF-8"; + return doc_page.charset; + } pub const referrer = bridge.property("", .{ .template = false }); }; diff --git a/src/browser/webapi/Event.zig b/src/browser/webapi/Event.zig index b573bfc7..50895866 100644 --- a/src/browser/webapi/Event.zig +++ b/src/browser/webapi/Event.zig @@ -90,14 +90,14 @@ pub const Options = struct { }; pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*Event { - const arena = try page.getArena(.{ .debug = "Event" }); + const arena = try page.getArena(.tiny, "Event"); errdefer page.releaseArena(arena); const str = try String.init(arena, typ, .{}); return initWithTrusted(arena, str, opts_, false); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*Event { - const arena = try page.getArena(.{ .debug = "Event.trusted" }); + const arena = try page.getArena(.tiny, "Event.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true); } diff --git a/src/browser/webapi/File.zig b/src/browser/webapi/File.zig index fb27359a..e4c70662 100644 --- a/src/browser/webapi/File.zig +++ b/src/browser/webapi/File.zig @@ -31,7 +31,7 @@ _proto: *Blob, // TODO: Implement File API. pub fn init(page: *Page) !*File { - const arena = try page.getArena(.{ .debug = "File" }); + const arena = try page.getArena(.tiny, "File"); errdefer page.releaseArena(arena); return page._factory.blob(arena, File{ ._proto = undefined }); } diff --git a/src/browser/webapi/FileReader.zig b/src/browser/webapi/FileReader.zig index 109fdc7b..33f0e209 100644 --- a/src/browser/webapi/FileReader.zig +++ b/src/browser/webapi/FileReader.zig @@ -63,7 +63,7 @@ const Result = union(enum) { }; pub fn init(page: *Page) !*FileReader { - const arena = try page.getArena(.{ .debug = "FileReader" }); + const arena = try page.getArena(.tiny, "FileReader"); errdefer page.releaseArena(arena); return page._factory.eventTargetWithAllocator(arena, FileReader{ diff --git a/src/browser/webapi/IntersectionObserver.zig b/src/browser/webapi/IntersectionObserver.zig index cbc9278f..990c45ee 100644 --- a/src/browser/webapi/IntersectionObserver.zig +++ b/src/browser/webapi/IntersectionObserver.zig @@ -71,7 +71,7 @@ pub const ObserverInit = struct { }; pub fn init(callback: js.Function.Temp, options: ?ObserverInit, page: *Page) !*IntersectionObserver { - const arena = try page.getArena(.{ .debug = "IntersectionObserver" }); + const arena = try page.getArena(.medium, "IntersectionObserver"); errdefer page.releaseArena(arena); const opts = options orelse ObserverInit{}; @@ -266,7 +266,7 @@ fn checkIntersection(self: *IntersectionObserver, target: *Element, page: *Page) (was_intersecting_opt != null and was_intersecting_opt.? != is_now_intersecting); if (should_report) { - const arena = try page.getArena(.{ .debug = "IntersectionObserverEntry" }); + const arena = try page.getArena(.tiny, "IntersectionObserverEntry"); errdefer page.releaseArena(arena); const entry = try arena.create(IntersectionObserverEntry); diff --git a/src/browser/webapi/MutationObserver.zig b/src/browser/webapi/MutationObserver.zig index 5453e797..6a99fcb5 100644 --- a/src/browser/webapi/MutationObserver.zig +++ b/src/browser/webapi/MutationObserver.zig @@ -76,7 +76,7 @@ pub const ObserveOptions = struct { }; pub fn init(callback: js.Function.Temp, page: *Page) !*MutationObserver { - const arena = try page.getArena(.{ .debug = "MutationObserver" }); + const arena = try page.getArena(.medium, "MutationObserver"); errdefer page.releaseArena(arena); const self = try arena.create(MutationObserver); @@ -227,7 +227,7 @@ pub fn notifyAttributeChange( } } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, @@ -271,7 +271,7 @@ pub fn notifyCharacterDataChange( continue; } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, @@ -318,7 +318,7 @@ pub fn notifyChildListChange( continue; } - const arena = try page.getArena(.{ .debug = "MutationRecord" }); + const arena = try page.getArena(.tiny, "MutationRecord"); const record = try arena.create(MutationRecord); record.* = .{ ._arena = arena, diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 0e7c2ffe..5871abee 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -22,6 +22,7 @@ const String = @import("../../string.zig").String; const js = @import("../js/js.zig"); const Page = @import("../Page.zig"); +const URL = @import("../URL.zig"); const reflect = @import("../reflect.zig"); const EventTarget = @import("EventTarget.zig"); @@ -511,6 +512,18 @@ pub fn ownerPage(self: *const Node, default: *Page) *Page { return doc._page orelse default; } +pub const ResolveURLOpts = struct { + allocator: ?Allocator = null, +}; + +// Resolve a URL relative to this node's owning document. +// Uses the document's charset for query string encoding (with NCR fallback for unmappable chars). +pub fn resolveURL(self: *const Node, url: anytype, page: *Page, opts: ResolveURLOpts) ![:0]const u8 { + const owner_page = self.ownerPage(page); + const allocator = opts.allocator orelse page.call_arena; + return URL.resolve(allocator, owner_page.base(), url, .{ .encoding = owner_page.charset }); +} + pub fn isSameDocumentAs(self: *const Node, other: *const Node, page: *const Page) bool { // Get the root document for each node const self_doc = if (self._type == .document) self._type.document else self.ownerDocument(page); diff --git a/src/browser/webapi/Permissions.zig b/src/browser/webapi/Permissions.zig index 8a06b4f4..84ff810f 100644 --- a/src/browser/webapi/Permissions.zig +++ b/src/browser/webapi/Permissions.zig @@ -38,7 +38,7 @@ const QueryDescriptor = struct { }; // We always report 'prompt' (the default safe value — neither granted nor denied). pub fn query(_: *const Permissions, qd: QueryDescriptor, page: *Page) !js.Promise { - const arena = try page.getArena(.{ .debug = "PermissionStatus" }); + const arena = try page.getArena(.tiny, "PermissionStatus"); errdefer page.releaseArena(arena); const status = try arena.create(PermissionStatus); diff --git a/src/browser/webapi/Range.zig b/src/browser/webapi/Range.zig index 720fc5ff..dab3db89 100644 --- a/src/browser/webapi/Range.zig +++ b/src/browser/webapi/Range.zig @@ -33,7 +33,7 @@ const Range = @This(); _proto: *AbstractRange, pub fn init(page: *Page) !*Range { - const arena = try page.getArena(.{ .debug = "Range" }); + const arena = try page.getArena(.medium, "Range"); errdefer page.releaseArena(arena); return page._factory.abstractRange(arena, Range{ ._proto = undefined }, page); } @@ -312,7 +312,7 @@ pub fn intersectsNode(self: *const Range, node: *Node) bool { } pub fn cloneRange(self: *const Range, page: *Page) !*Range { - const arena = try page.getArena(.{ .debug = "Range.clone" }); + const arena = try page.getArena(.medium, "Range.clone"); errdefer page.releaseArena(arena); const clone = try page._factory.abstractRange(arena, Range{ ._proto = undefined }, page); diff --git a/src/browser/webapi/Window.zig b/src/browser/webapi/Window.zig index 418037fd..ef076663 100644 --- a/src/browser/webapi/Window.zig +++ b/src/browser/webapi/Window.zig @@ -407,7 +407,7 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons const target_page = self._page; const source_window = target_page.js.getIncumbent().window; - const arena = try target_page.getArena(.{ .debug = "Window.postMessage" }); + const arena = try target_page.getArena(.medium, "Window.postMessage"); errdefer target_page.releaseArena(arena); // Origin should be the source window's origin (where the message came from) @@ -645,7 +645,7 @@ fn scheduleCallback(self: *Window, cb: js.Function.Temp, delay_ms: u32, opts: Sc return error.TooManyTimeout; } - const arena = try page.getArena(.{ .debug = "Window.schedule" }); + const arena = try page.getArena(.tiny, "Window.schedule"); errdefer page.releaseArena(arena); const timer_id = self._timer_id +% 1; diff --git a/src/browser/webapi/animation/Animation.zig b/src/browser/webapi/animation/Animation.zig index 08eb21c2..4bddfd1d 100644 --- a/src/browser/webapi/animation/Animation.zig +++ b/src/browser/webapi/animation/Animation.zig @@ -52,7 +52,7 @@ _playState: PlayState = .idle, // // TODO add support for effect and timeline pub fn init(page: *Page) !*Animation { - const arena = try page.getArena(.{ .debug = "Animation" }); + const arena = try page.getArena(.tiny, "Animation"); errdefer page.releaseArena(arena); const self = try arena.create(Animation); diff --git a/src/browser/webapi/collections/ChildNodes.zig b/src/browser/webapi/collections/ChildNodes.zig index df3e7ee1..410c12b7 100644 --- a/src/browser/webapi/collections/ChildNodes.zig +++ b/src/browser/webapi/collections/ChildNodes.zig @@ -39,7 +39,7 @@ pub const ValueIterator = GenericIterator(Iterator, "1"); pub const EntryIterator = GenericIterator(Iterator, null); pub fn init(node: *Node, page: *Page) !*ChildNodes { - const arena = try page.getArena(.{ .debug = "ChildNodes" }); + const arena = try page.getArena(.small, "ChildNodes"); errdefer page.releaseArena(arena); const self = try arena.create(ChildNodes); diff --git a/src/browser/webapi/css/FontFace.zig b/src/browser/webapi/css/FontFace.zig index 075d9135..9ccb0c4c 100644 --- a/src/browser/webapi/css/FontFace.zig +++ b/src/browser/webapi/css/FontFace.zig @@ -33,7 +33,7 @@ _family: []const u8, pub fn init(family: []const u8, source: []const u8, page: *Page) !*FontFace { _ = source; - const arena = try page.getArena(.{ .debug = "FontFace" }); + const arena = try page.getArena(.tiny, "FontFace"); errdefer page.releaseArena(arena); const self = try arena.create(FontFace); diff --git a/src/browser/webapi/css/FontFaceSet.zig b/src/browser/webapi/css/FontFaceSet.zig index b20017ca..f43dc405 100644 --- a/src/browser/webapi/css/FontFaceSet.zig +++ b/src/browser/webapi/css/FontFaceSet.zig @@ -34,7 +34,7 @@ _proto: *EventTarget, _arena: Allocator, pub fn init(page: *Page) !*FontFaceSet { - const arena = try page.getArena(.{ .debug = "FontFaceSet" }); + const arena = try page.getArena(.tiny, "FontFaceSet"); errdefer page.releaseArena(arena); return page._factory.eventTargetWithAllocator(arena, FontFaceSet{ diff --git a/src/browser/webapi/element/Html.zig b/src/browser/webapi/element/Html.zig index d72b4fa8..7a33d25a 100644 --- a/src/browser/webapi/element/Html.zig +++ b/src/browser/webapi/element/Html.zig @@ -292,7 +292,7 @@ pub fn insertAdjacentHTML( }); const doc_node = doc.asNode(); - const arena = try page.getArena(.{ .debug = "HTML.insertAdjacentHTML" }); + const arena = try page.getArena(.medium, "HTML.insertAdjacentHTML"); defer page.releaseArena(arena); const Parser = @import("../../parser/Parser.zig"); diff --git a/src/browser/webapi/element/html/Anchor.zig b/src/browser/webapi/element/html/Anchor.zig index 33c8bded..e4207e84 100644 --- a/src/browser/webapi/element/html/Anchor.zig +++ b/src/browser/webapi/element/html/Anchor.zig @@ -39,12 +39,11 @@ pub fn asNode(self: *Anchor) *Node { } pub fn getHref(self: *Anchor, page: *Page) ![]const u8 { - const element = self.asElement(); - const href = element.getAttributeSafe(comptime .wrap("href")) orelse return ""; + const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return ""; if (href.len == 0) { return ""; } - return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true }); + return self.asNode().resolveURL(href, page, .{}); } pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void { @@ -203,7 +202,7 @@ fn getResolvedHref(self: *Anchor, page: *Page) !?[:0]const u8 { if (href.len == 0) { return null; } - return try URL.resolve(page.call_arena, page.base(), href, .{}); + return try self.asNode().resolveURL(href, page, .{}); } pub const JsApi = struct { diff --git a/src/browser/webapi/element/html/Form.zig b/src/browser/webapi/element/html/Form.zig index e8857e48..6628306b 100644 --- a/src/browser/webapi/element/html/Form.zig +++ b/src/browser/webapi/element/html/Form.zig @@ -97,7 +97,7 @@ pub fn getAction(self: *Form, page: *Page) ![]const u8 { if (action.len == 0) { return page.url; } - return URL.resolve(page.call_arena, page.base(), action, .{ .encode = true }); + return element.asNode().resolveURL(action, page, .{}); } pub fn setAction(self: *Form, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/IFrame.zig b/src/browser/webapi/element/html/IFrame.zig index e596f4ac..3b276dcd 100644 --- a/src/browser/webapi/element/html/IFrame.zig +++ b/src/browser/webapi/element/html/IFrame.zig @@ -48,9 +48,9 @@ pub fn getContentDocument(self: *const IFrame) ?*Document { return window._document; } -pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 { +pub fn getSrc(self: *IFrame, page: *Page) ![:0]const u8 { if (self._src.len == 0) return ""; - return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true }); + return self.asNode().resolveURL(self._src, page, .{}); } pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Image.zig b/src/browser/webapi/element/html/Image.zig index b6731144..e3b57cd9 100644 --- a/src/browser/webapi/element/html/Image.zig +++ b/src/browser/webapi/element/html/Image.zig @@ -40,9 +40,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 { if (src.len == 0) { return ""; } - - // Always resolve the src against the page URL - return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }); + return element.asConstNode().resolveURL(src, page, .{}); } pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig index ed3839f2..5b6ce0c6 100644 --- a/src/browser/webapi/element/html/Link.zig +++ b/src/browser/webapi/element/html/Link.zig @@ -44,9 +44,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 { if (href.len == 0) { return ""; } - - // Always resolve the href against the page URL - return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true }); + return element.asNode().resolveURL(href, page, .{}); } pub fn setHref(self: *Link, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Media.zig b/src/browser/webapi/element/html/Media.zig index 71013e71..6d62013f 100644 --- a/src/browser/webapi/element/html/Media.zig +++ b/src/browser/webapi/element/html/Media.zig @@ -235,8 +235,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 { if (src.len == 0) { return ""; } - const URL = @import("../../URL.zig"); - return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true }); + return element.asConstNode().resolveURL(src, page, .{}); } pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Script.zig b/src/browser/webapi/element/html/Script.zig index d5e83b4f..77b6b7ef 100644 --- a/src/browser/webapi/element/html/Script.zig +++ b/src/browser/webapi/element/html/Script.zig @@ -45,9 +45,9 @@ pub fn asNode(self: *Script) *Node { return self.asElement().asNode(); } -pub fn getSrc(self: *const Script, page: *Page) ![]const u8 { +pub fn getSrc(self: *Script, page: *Page) ![]const u8 { if (self._src.len == 0) return ""; - return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true }); + return self.asNode().resolveURL(self._src, page, .{}); } pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/element/html/Video.zig b/src/browser/webapi/element/html/Video.zig index 63ccda4a..8fabb3ae 100644 --- a/src/browser/webapi/element/html/Video.zig +++ b/src/browser/webapi/element/html/Video.zig @@ -57,9 +57,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 { if (poster.len == 0) { return ""; } - - const URL = @import("../../URL.zig"); - return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true }); + return element.asConstNode().resolveURL(poster, page, .{}); } pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void { diff --git a/src/browser/webapi/encoding/TextDecoder.zig b/src/browser/webapi/encoding/TextDecoder.zig index c117df09..7668e48e 100644 --- a/src/browser/webapi/encoding/TextDecoder.zig +++ b/src/browser/webapi/encoding/TextDecoder.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -19,6 +19,7 @@ const std = @import("std"); const lp = @import("lightpanda"); const js = @import("../../js/js.zig"); +const html5ever = @import("../../parser/html5ever.zig"); const Page = @import("../../Page.zig"); const Session = @import("../../Session.zig"); @@ -30,13 +31,11 @@ _rc: lp.RC(u8) = .{}, _fatal: bool, _arena: Allocator, _ignore_bom: bool, -_stream: std.ArrayList(u8), - -const Label = enum { - utf8, - @"utf-8", - @"unicode-1-1-utf-8", -}; +_bom_seen: bool, +_decoder: ?*anyopaque, // Persistent streaming decoder +_encoding_handle: *anyopaque, +_encoding_name: []const u8, +_lowercase_name: []const u8, // Cached lowercase version of encoding name const InitOpts = struct { fatal: bool = false, @@ -44,25 +43,41 @@ const InitOpts = struct { }; pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*TextDecoder { - if (label_) |label| { - _ = std.meta.stringToEnum(Label, label) orelse return error.RangeError; + const label = label_ orelse "utf-8"; + + const info = html5ever.encoding_for_label(label.ptr, label.len); + if (!info.isValid()) { + return error.RangeError; } - const arena = try page.getArena(.{ .debug = "TextDecoder" }); + // Check for "replacement" encoding - it's not usable for decoding per spec + const enc_name = info.name(); + if (std.mem.eql(u8, enc_name, "replacement")) { + return error.RangeError; + } + + const arena = try page.getArena(.large, "TextDecoder"); errdefer page.releaseArena(arena); const opts = opts_ orelse InitOpts{}; const self = try arena.create(TextDecoder); self.* = .{ ._arena = arena, - ._stream = .empty, ._fatal = opts.fatal, ._ignore_bom = opts.ignoreBOM, + ._encoding_handle = info.handle.?, + ._decoder = null, + ._bom_seen = false, + ._lowercase_name = "", // Will be lazily allocated + ._encoding_name = enc_name, // Points to static Rust memory }; return self; } pub fn deinit(self: *TextDecoder, session: *Session) void { + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + } session.releaseArena(self._arena); } @@ -82,34 +97,110 @@ pub fn getFatal(self: *const TextDecoder) bool { return self._fatal; } +pub fn getEncoding(self: *TextDecoder) ![]const u8 { + // Spec requires lowercase encoding name + // Allocate buffer for lowercase name on first access + if (self._lowercase_name.len > 0) { + return self._lowercase_name; + } + self._lowercase_name = try std.ascii.allocLowerString(self._arena, self._encoding_name); + return self._lowercase_name; +} + const DecodeOpts = struct { stream: bool = false, }; + pub fn decode(self: *TextDecoder, input_: ?[]const u8, opts_: ?DecodeOpts) ![]const u8 { - var input = input_ orelse return ""; const opts: DecodeOpts = opts_ orelse .{}; + const input = input_ orelse ""; - if (self._stream.items.len > 0) { - try self._stream.appendSlice(self._arena, input); - input = self._stream.items; - } - - if (self._fatal and !std.unicode.utf8ValidateSlice(input)) { - if (opts.stream) { - if (self._stream.items.len == 0) { - try self._stream.appendSlice(self._arena, input); - } - return ""; + // For non-streaming calls, we don't need a persistent decoder + if (!opts.stream) { + // Reset decoder state if we had one + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + self._decoder = null; + } + } else if (self._decoder == null) { + self._decoder = html5ever.encoding_decoder_new(self._encoding_handle); + if (self._decoder == null) { + return error.OutOfMemory; } - return error.InvalidUtf8; } - self._stream.clearRetainingCapacity(); - if (self._ignore_bom == false and std.mem.startsWith(u8, input, &.{ 0xEF, 0xBB, 0xBF })) { - return input[3..]; + return self._decode(input, self._decoder); +} + +fn _decode(self: *TextDecoder, input: []const u8, streaming_decoder: ?*anyopaque) ![]const u8 { + if (input.len == 0) { + return ""; } - return input; + // Calculate max output size + const max_out = html5ever.encoding_max_utf8_buffer_length( + self._encoding_handle, + input.len, + ); + + if (max_out == 0) { + return ""; + } + + // Allocate output buffer + const output = try self._arena.alloc(u8, max_out); + + // Decode using either streaming or one-shot decoder + const result = if (streaming_decoder) |decoder| + html5ever.encoding_decoder_decode( + decoder, + input.ptr, + input.len, + output.ptr, + output.len, + 0, // is_last = false for streaming + ) + else + html5ever.encoding_decode( + self._encoding_handle, + input.ptr, + input.len, + output.ptr, + output.len, + 1, // is_last = true for one-shot + ); + + // Handle errors in fatal mode + if (self._fatal and result.hadErrors()) { + if (streaming_decoder != null) { + // Reset decoder on error + if (self._decoder) |decoder| { + html5ever.encoding_decoder_free(decoder); + self._decoder = null; + } + } + self._bom_seen = false; + return error.TypeError; + } + + var decoded: []const u8 = output[0..result.bytes_written]; + + // Handle BOM stripping + if (!self._bom_seen and !self._ignore_bom) { + decoded = stripBom(decoded); + self._bom_seen = true; + } + + return decoded; +} + +fn stripBom(data: []const u8) []const u8 { + // UTF-8 BOM in decoded output appears as U+FEFF (EF BB BF in UTF-8) + const bom = "\u{FEFF}"; + if (std.mem.startsWith(u8, data, bom)) { + return data[bom.len..]; + } + return data; } pub const JsApi = struct { @@ -123,7 +214,7 @@ pub const JsApi = struct { pub const constructor = bridge.constructor(TextDecoder.init, .{}); pub const decode = bridge.function(TextDecoder.decode, .{}); - pub const encoding = bridge.property("utf-8", .{ .template = false }); + pub const encoding = bridge.accessor(TextDecoder.getEncoding, null, .{}); pub const fatal = bridge.accessor(TextDecoder.getFatal, null, .{}); pub const ignoreBOM = bridge.accessor(TextDecoder.getIgnoreBOM, null, .{}); }; diff --git a/src/browser/webapi/encoding/TextEncoder.zig b/src/browser/webapi/encoding/TextEncoder.zig index a6bff48e..112d2e32 100644 --- a/src/browser/webapi/encoding/TextEncoder.zig +++ b/src/browser/webapi/encoding/TextEncoder.zig @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2025 Lightpanda (Selecy SAS) +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) // // Francis Bouvier // Pierre Tachoire @@ -26,12 +26,23 @@ pub fn init() TextEncoder { return .{}; } -pub fn encode(_: *const TextEncoder, v: []const u8) !js.TypedArray(u8) { - if (!std.unicode.utf8ValidateSlice(v)) { +pub fn encode(_: *const TextEncoder, v_: ?js.Value) !js.TypedArray(u8) { + const v = v_ orelse return .{ .values = "" }; + + if (v.isUndefined()) { + return .{ .values = "" }; + } + + if (v.isNull()) { + return .{ .values = "null" }; + } + + const str = try v.toStringSlice(); + if (!std.unicode.utf8ValidateSlice(str)) { return error.InvalidUtf8; } - return .{ .values = v }; + return .{ .values = str }; } pub const JsApi = struct { diff --git a/src/browser/webapi/event/CloseEvent.zig b/src/browser/webapi/event/CloseEvent.zig index aa9f1d2b..dbe5f21a 100644 --- a/src/browser/webapi/event/CloseEvent.zig +++ b/src/browser/webapi/event/CloseEvent.zig @@ -39,14 +39,14 @@ const CloseEventOptions = struct { const Options = Event.inheritOptions(CloseEvent, CloseEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*CloseEvent { - const arena = try page.getArena(.{ .debug = "CloseEvent" }); + const arena = try page.getArena(.tiny, "CloseEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*CloseEvent { - const arena = try page.getArena(.{ .debug = "CloseEvent.trusted" }); + const arena = try page.getArena(.tiny, "CloseEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/CompositionEvent.zig b/src/browser/webapi/event/CompositionEvent.zig index 7f3fd1d2..04077994 100644 --- a/src/browser/webapi/event/CompositionEvent.zig +++ b/src/browser/webapi/event/CompositionEvent.zig @@ -35,7 +35,7 @@ const CompositionEventOptions = struct { const Options = Event.inheritOptions(CompositionEvent, CompositionEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CompositionEvent { - const arena = try page.getArena(.{ .debug = "CompositionEvent" }); + const arena = try page.getArena(.tiny, "CompositionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/CustomEvent.zig b/src/browser/webapi/event/CustomEvent.zig index 9013bb4a..51efa36c 100644 --- a/src/browser/webapi/event/CustomEvent.zig +++ b/src/browser/webapi/event/CustomEvent.zig @@ -38,7 +38,7 @@ const CustomEventOptions = struct { const Options = Event.inheritOptions(CustomEvent, CustomEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CustomEvent { - const arena = try page.getArena(.{ .debug = "CustomEvent" }); + const arena = try page.getArena(.tiny, "CustomEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/ErrorEvent.zig b/src/browser/webapi/event/ErrorEvent.zig index aef63a0e..4bb68573 100644 --- a/src/browser/webapi/event/ErrorEvent.zig +++ b/src/browser/webapi/event/ErrorEvent.zig @@ -47,14 +47,14 @@ pub const ErrorEventOptions = struct { const Options = Event.inheritOptions(ErrorEvent, ErrorEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*ErrorEvent { - const arena = try page.getArena(.{ .debug = "ErrorEvent" }); + const arena = try page.getArena(.small, "ErrorEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*ErrorEvent { - const arena = try page.getArena(.{ .debug = "ErrorEvent.trusted" }); + const arena = try page.getArena(.small, "ErrorEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true, page); } diff --git a/src/browser/webapi/event/FocusEvent.zig b/src/browser/webapi/event/FocusEvent.zig index 776605db..59e88e36 100644 --- a/src/browser/webapi/event/FocusEvent.zig +++ b/src/browser/webapi/event/FocusEvent.zig @@ -42,13 +42,13 @@ pub const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FocusEvent { - const arena = try page.getArena(.{ .debug = "FocusEvent.trusted" }); + const arena = try page.getArena(.tiny, "FocusEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*FocusEvent { - const arena = try page.getArena(.{ .debug = "FocusEvent" }); + const arena = try page.getArena(.tiny, "FocusEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/FormDataEvent.zig b/src/browser/webapi/event/FormDataEvent.zig index 93eadfa3..ce45a9d0 100644 --- a/src/browser/webapi/event/FormDataEvent.zig +++ b/src/browser/webapi/event/FormDataEvent.zig @@ -38,14 +38,14 @@ const Options = Event.inheritOptions(FormDataEvent, struct { }); pub fn init(typ: []const u8, maybe_options: Options, page: *Page) !*FormDataEvent { - const arena = try page.getArena(.{ .debug = "FormDataEvent" }); + const arena = try page.getArena(.tiny, "FormDataEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, maybe_options, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FormDataEvent { - const arena = try page.getArena(.{ .debug = "FormDataEvent.trusted" }); + const arena = try page.getArena(.tiny, "FormDataEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/InputEvent.zig b/src/browser/webapi/event/InputEvent.zig index 3b01b900..3c00debd 100644 --- a/src/browser/webapi/event/InputEvent.zig +++ b/src/browser/webapi/event/InputEvent.zig @@ -46,13 +46,13 @@ const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*InputEvent { - const arena = try page.getArena(.{ .debug = "InputEvent.trusted" }); + const arena = try page.getArena(.tiny, "InputEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*InputEvent { - const arena = try page.getArena(.{ .debug = "InputEvent" }); + const arena = try page.getArena(.tiny, "InputEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/KeyboardEvent.zig b/src/browser/webapi/event/KeyboardEvent.zig index ddc7548d..f8056cc3 100644 --- a/src/browser/webapi/event/KeyboardEvent.zig +++ b/src/browser/webapi/event/KeyboardEvent.zig @@ -186,13 +186,13 @@ const Options = Event.inheritOptions( ); pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*KeyboardEvent { - const arena = try page.getArena(.{ .debug = "KeyboardEvent.trusted" }); + const arena = try page.getArena(.tiny, "KeyboardEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*KeyboardEvent { - const arena = try page.getArena(.{ .debug = "KeyboardEvent" }); + const arena = try page.getArena(.tiny, "KeyboardEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); diff --git a/src/browser/webapi/event/MessageEvent.zig b/src/browser/webapi/event/MessageEvent.zig index dfd813d5..27fdfb23 100644 --- a/src/browser/webapi/event/MessageEvent.zig +++ b/src/browser/webapi/event/MessageEvent.zig @@ -50,14 +50,14 @@ pub const Data = union(enum) { const Options = Event.inheritOptions(MessageEvent, MessageEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*MessageEvent { - const arena = try page.getArena(.{ .debug = "MessageEvent" }); + const arena = try page.getArena(.small, "MessageEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*MessageEvent { - const arena = try page.getArena(.{ .debug = "MessageEvent.trusted" }); + const arena = try page.getArena(.small, "MessageEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts_, true, page); } diff --git a/src/browser/webapi/event/MouseEvent.zig b/src/browser/webapi/event/MouseEvent.zig index 999bd010..ff2b1118 100644 --- a/src/browser/webapi/event/MouseEvent.zig +++ b/src/browser/webapi/event/MouseEvent.zig @@ -82,14 +82,14 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*MouseEvent { - const arena = try page.getArena(.{ .debug = "MouseEvent" }); + const arena = try page.getArena(.tiny, "MouseEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*MouseEvent { - const arena = try page.getArena(.{ .debug = "MouseEvent.trusted" }); + const arena = try page.getArena(.tiny, "MouseEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig b/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig index 816fa1c8..d791cb39 100644 --- a/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig +++ b/src/browser/webapi/event/NavigationCurrentEntryChangeEvent.zig @@ -45,14 +45,14 @@ const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent { - const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent" }); + const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts, false, page); } pub fn initTrusted(typ: String, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent { - const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent.trusted" }); + const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, opts, true, page); } diff --git a/src/browser/webapi/event/PageTransitionEvent.zig b/src/browser/webapi/event/PageTransitionEvent.zig index e11be386..335a665a 100644 --- a/src/browser/webapi/event/PageTransitionEvent.zig +++ b/src/browser/webapi/event/PageTransitionEvent.zig @@ -38,14 +38,14 @@ const PageTransitionEventOptions = struct { const Options = Event.inheritOptions(PageTransitionEvent, PageTransitionEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PageTransitionEvent { - const arena = try page.getArena(.{ .debug = "PageTransitionEvent" }); + const arena = try page.getArena(.tiny, "PageTransitionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PageTransitionEvent { - const arena = try page.getArena(.{ .debug = "PageTransitionEvent.trusted" }); + const arena = try page.getArena(.tiny, "PageTransitionEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/PointerEvent.zig b/src/browser/webapi/event/PointerEvent.zig index c5440d45..4eb1e250 100644 --- a/src/browser/webapi/event/PointerEvent.zig +++ b/src/browser/webapi/event/PointerEvent.zig @@ -84,7 +84,7 @@ const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PointerEvent { - const arena = try page.getArena(.{ .debug = "UIEvent" }); + const arena = try page.getArena(.tiny, "PointerEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/PopStateEvent.zig b/src/browser/webapi/event/PopStateEvent.zig index cd430cf8..3b0fe4e4 100644 --- a/src/browser/webapi/event/PopStateEvent.zig +++ b/src/browser/webapi/event/PopStateEvent.zig @@ -39,14 +39,14 @@ const PopStateEventOptions = struct { const Options = Event.inheritOptions(PopStateEvent, PopStateEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PopStateEvent { - const arena = try page.getArena(.{ .debug = "PopStateEvent" }); + const arena = try page.getArena(.tiny, "PopStateEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PopStateEvent { - const arena = try page.getArena(.{ .debug = "PopStateEvent.trusted" }); + const arena = try page.getArena(.tiny, "PopStateEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/ProgressEvent.zig b/src/browser/webapi/event/ProgressEvent.zig index 6498da48..895bff09 100644 --- a/src/browser/webapi/event/ProgressEvent.zig +++ b/src/browser/webapi/event/ProgressEvent.zig @@ -39,14 +39,14 @@ const ProgressEventOptions = struct { const Options = Event.inheritOptions(ProgressEvent, ProgressEventOptions); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*ProgressEvent { - const arena = try page.getArena(.{ .debug = "ProgressEvent" }); + const arena = try page.getArena(.tiny, "ProgressEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, _opts, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*ProgressEvent { - const arena = try page.getArena(.{ .debug = "ProgressEvent.trusted" }); + const arena = try page.getArena(.tiny, "ProgressEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/PromiseRejectionEvent.zig b/src/browser/webapi/event/PromiseRejectionEvent.zig index cc014b39..44af3904 100644 --- a/src/browser/webapi/event/PromiseRejectionEvent.zig +++ b/src/browser/webapi/event/PromiseRejectionEvent.zig @@ -37,7 +37,7 @@ const PromiseRejectionEventOptions = struct { const Options = Event.inheritOptions(PromiseRejectionEvent, PromiseRejectionEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*PromiseRejectionEvent { - const arena = try page.getArena(.{ .debug = "PromiseRejectionEvent" }); + const arena = try page.getArena(.tiny, "PromiseRejectionEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/SubmitEvent.zig b/src/browser/webapi/event/SubmitEvent.zig index f48365dc..3400cbcd 100644 --- a/src/browser/webapi/event/SubmitEvent.zig +++ b/src/browser/webapi/event/SubmitEvent.zig @@ -39,14 +39,14 @@ const SubmitEventOptions = struct { const Options = Event.inheritOptions(SubmitEvent, SubmitEventOptions); pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*SubmitEvent { - const arena = try page.getArena(.{ .debug = "SubmitEvent" }); + const arena = try page.getArena(.tiny, "SubmitEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); return initWithTrusted(arena, type_string, opts_, false, page); } pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*SubmitEvent { - const arena = try page.getArena(.{ .debug = "SubmitEvent.trusted" }); + const arena = try page.getArena(.tiny, "SubmitEvent.trusted"); errdefer page.releaseArena(arena); return initWithTrusted(arena, typ, _opts, true, page); } diff --git a/src/browser/webapi/event/TextEvent.zig b/src/browser/webapi/event/TextEvent.zig index 3ddb2636..dcc5e478 100644 --- a/src/browser/webapi/event/TextEvent.zig +++ b/src/browser/webapi/event/TextEvent.zig @@ -40,7 +40,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*TextEvent { - const arena = try page.getArena(.{ .debug = "TextEvent" }); + const arena = try page.getArena(.tiny, "TextEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/UIEvent.zig b/src/browser/webapi/event/UIEvent.zig index 6874d6d5..2b456738 100644 --- a/src/browser/webapi/event/UIEvent.zig +++ b/src/browser/webapi/event/UIEvent.zig @@ -51,7 +51,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*UIEvent { - const arena = try page.getArena(.{ .debug = "UIEvent" }); + const arena = try page.getArena(.tiny, "UIEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/event/WheelEvent.zig b/src/browser/webapi/event/WheelEvent.zig index 4711ac25..8f79ab8a 100644 --- a/src/browser/webapi/event/WheelEvent.zig +++ b/src/browser/webapi/event/WheelEvent.zig @@ -50,7 +50,7 @@ pub const Options = Event.inheritOptions( ); pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*WheelEvent { - const arena = try page.getArena(.{ .debug = "WheelEvent" }); + const arena = try page.getArena(.medium, "WheelEvent"); errdefer page.releaseArena(arena); const type_string = try String.init(arena, typ, .{}); diff --git a/src/browser/webapi/net/Response.zig b/src/browser/webapi/net/Response.zig index e4fbd46d..7ed7ba4d 100644 --- a/src/browser/webapi/net/Response.zig +++ b/src/browser/webapi/net/Response.zig @@ -57,7 +57,7 @@ const InitOpts = struct { }; pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response { - const arena = try page.getArena(.{ .debug = "Response" }); + const arena = try page.getArena(.large, "Response"); errdefer page.releaseArena(arena); const opts = opts_ orelse InitOpts{}; @@ -174,7 +174,7 @@ pub fn bytes(self: *const Response, page: *Page) !js.Promise { } pub fn clone(self: *const Response, page: *Page) !*Response { - const arena = try page.getArena(.{ .debug = "Response.clone" }); + const arena = try page.getArena((self._body orelse "").len + self._url.len + 256, "Response.clone"); errdefer page.releaseArena(arena); const body = if (self._body) |b| try arena.dupe(u8, b) else null; diff --git a/src/browser/webapi/net/WebSocket.zig b/src/browser/webapi/net/WebSocket.zig index 1244a61e..aef8809a 100644 --- a/src/browser/webapi/net/WebSocket.zig +++ b/src/browser/webapi/net/WebSocket.zig @@ -105,10 +105,10 @@ pub fn init(url: []const u8, protocols_: ?[]const u8, page: *Page) !*WebSocket { } } - const arena = try page.getArena(.{ .debug = "WebSocket" }); + const arena = try page.getArena(.medium, "WebSocket"); errdefer page.releaseArena(arena); - const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encode = true }); + const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset }); const http_client = page._session.browser.http_client; const conn = http_client.network.newConnection() orelse { @@ -272,12 +272,10 @@ pub fn send(self: *WebSocket, data: SendData) !void { return error.InvalidStateError; } - // Get a dedicated arena for this message - const arena = try self._page._session.getArena(.{ .debug = "WebSocket message" }); - errdefer self._page._session.releaseArena(arena); - switch (data) { .blob => |blob| { + const arena = try self._page._session.getArena(blob._slice.len, "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .binary = .{ .arena = arena, .data = try arena.dupe(u8, blob._slice), @@ -285,15 +283,21 @@ pub fn send(self: *WebSocket, data: SendData) !void { }, .js_val => |js_val| { if (js_val.isString()) |str| { + const arena = try self._page._session.getArena(str.len(), "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .text = .{ .arena = arena, .data = try str.toSliceWithAlloc(arena), } }); } else { const binary = try js_val.toZig(BinaryData); + const buffer = binary.asBuffer(); + + const arena = try self._page._session.getArena(buffer.len, "WebSocket.message"); + errdefer self._page._session.releaseArena(arena); try self.queueMessage(.{ .binary = .{ .arena = arena, - .data = try arena.dupe(u8, binary.asBuffer()), + .data = try arena.dupe(u8, buffer), } }); } }, diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 62e05a17..92ba9997 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -89,7 +89,7 @@ const ResponseType = enum { }; pub fn init(page: *Page) !*XMLHttpRequest { - const arena = try page.getArena(.{ .debug = "XMLHttpRequest" }); + const arena = try page.getArena(.large, "XMLHttpRequest"); errdefer page.releaseArena(arena); const self = try page._factory.xhrEventTarget(arena, XMLHttpRequest{ ._page = page, @@ -210,7 +210,7 @@ pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void const page = self._page; self._method = try parseMethod(method_); - self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encode = true }); + self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset }); try self.stateChanged(.opened, page); } diff --git a/src/browser/webapi/selector/Selector.zig b/src/browser/webapi/selector/Selector.zig index a3d5d894..838cecfd 100644 --- a/src/browser/webapi/selector/Selector.zig +++ b/src/browser/webapi/selector/Selector.zig @@ -45,7 +45,7 @@ pub fn querySelectorAll(root: *Node, input: []const u8, page: *Page) !*List { return error.SyntaxError; } - const arena = try page.getArena(.{ .debug = "querySelectorAll" }); + const arena = try page.getArena(.small, "querySelectorAll"); errdefer page.releaseArena(arena); var nodes: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; diff --git a/src/cdp/CDP.zig b/src/cdp/CDP.zig index 024954db..222098c8 100644 --- a/src/cdp/CDP.zig +++ b/src/cdp/CDP.zig @@ -487,10 +487,10 @@ pub const BrowserContext = struct { pub fn createIsolatedWorld(self: *BrowserContext, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld { const browser = &self.cdp.browser; - const arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld" }); + const arena = try browser.arena_pool.acquire(.small, "IsolatedWorld"); errdefer browser.arena_pool.release(arena); - const call_arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld.call_arena" }); + const call_arena = try browser.arena_pool.acquire(.tiny, "IsolatedWorld.call_arena"); errdefer browser.arena_pool.release(call_arena); const world = try arena.create(IsolatedWorld); diff --git a/src/cdp/domains/page.zig b/src/cdp/domains/page.zig index 267cada8..beb86c6b 100644 --- a/src/cdp/domains/page.zig +++ b/src/cdp/domains/page.zig @@ -286,7 +286,7 @@ fn navigate(cmd: *CDP.Command) !void { page = try session.replacePage(); } - const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8"); try page.navigate(encoded_url, .{ .reason = .address_bar, .cdp_id = cmd.input.id, diff --git a/src/cdp/domains/target.zig b/src/cdp/domains/target.zig index bce7e00d..822659f7 100644 --- a/src/cdp/domains/target.zig +++ b/src/cdp/domains/target.zig @@ -220,7 +220,7 @@ fn createTarget(cmd: *CDP.Command) !void { } if (!std.mem.eql(u8, "about:blank", params.url)) { - const encoded_url = try URL.ensureEncoded(page.call_arena, params.url); + const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8"); try page.navigate( encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null } }, diff --git a/src/html5ever/lib.rs b/src/html5ever/lib.rs index 6fab9763..9d14e784 100644 --- a/src/html5ever/lib.rs +++ b/src/html5ever/lib.rs @@ -151,6 +151,303 @@ pub extern "C" fn html5ever_parse_document_with_encoding( .one(StrTendril::from(decoded.as_ref())); } +// === Encoding API for TextDecoder === + +/// Result of encoding label lookup +#[repr(C)] +pub struct EncodingInfo { + /// 0 = not found, 1 = found + pub found: u8, + /// Opaque handle to the encoding (actually &'static Encoding) + pub handle: *const c_void, + /// Length of canonical name + pub name_len: usize, + /// Pointer to canonical encoding name (static, lowercase) + pub name_ptr: *const c_uchar, +} + +/// Look up an encoding by its label (case-insensitive, whitespace-trimmed) +#[no_mangle] +pub extern "C" fn encoding_for_label( + label: *const c_uchar, + label_len: usize, +) -> EncodingInfo { + if label.is_null() || label_len == 0 { + return EncodingInfo { + found: 0, + name_len: 0, + handle: std::ptr::null(), + name_ptr: std::ptr::null(), + }; + } + + let label_bytes = unsafe { std::slice::from_raw_parts(label, label_len) }; + + match Encoding::for_label(label_bytes) { + Some(encoding) => { + let name = encoding.name(); + EncodingInfo { + found: 1, + name_len: name.len(), + name_ptr: name.as_ptr(), + handle: encoding as *const _ as *const c_void, + } + } + None => EncodingInfo { + found: 0, + name_len: 0, + name_ptr: std::ptr::null(), + handle: std::ptr::null(), + }, + } +} + +/// Calculate maximum UTF-8 buffer size needed for decoding +#[no_mangle] +pub extern "C" fn encoding_max_utf8_buffer_length( + handle: *const c_void, + input_len: usize, +) -> usize { + if handle.is_null() { + return 0; + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let decoder = encoding.new_decoder(); + decoder.max_utf8_buffer_length(input_len).unwrap_or(0) +} + +/// Result of decoding operation +#[repr(C)] +pub struct DecodeResult { + /// 0 = no errors, 1 = had malformed sequences (replaced with U+FFFD) + pub had_errors: u8, + /// Number of input bytes consumed + pub bytes_read: usize, + /// Number of UTF-8 bytes written to output buffer + pub bytes_written: usize, +} + +/// Decode bytes from source encoding to UTF-8 +/// For streaming, set is_last=0; for final/complete decode, set is_last=1 +#[no_mangle] +pub extern "C" fn encoding_decode( + handle: *const c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_len: usize, + is_last: u8, +) -> DecodeResult { + if handle.is_null() || output.is_null() { + return DecodeResult { + had_errors: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let input_bytes = if input.is_null() || input_len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(input, input_len) } + }; + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; + + let mut decoder = encoding.new_decoder(); + let last = is_last != 0; + + let (result, bytes_read, bytes_written, had_errors) = + decoder.decode_to_utf8(input_bytes, output_slice, last); + + // If output buffer was too small, we still report what we could process + let _ = result; // CoderResult::InputEmpty or CoderResult::OutputFull + + DecodeResult { + had_errors: if had_errors { 1 } else { 0 }, + bytes_read, + bytes_written, + } +} + +// === Streaming Decoder API === + +use encoding_rs::Decoder; + +/// Create a streaming decoder that maintains state across calls +#[no_mangle] +pub extern "C" fn encoding_decoder_new(handle: *const c_void) -> *mut c_void { + if handle.is_null() { + return std::ptr::null_mut(); + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let decoder = Box::new(encoding.new_decoder()); + Box::into_raw(decoder) as *mut c_void +} + +/// Decode using a streaming decoder (maintains state for incomplete sequences) +#[no_mangle] +pub extern "C" fn encoding_decoder_decode( + decoder_ptr: *mut c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_len: usize, + is_last: u8, +) -> DecodeResult { + if decoder_ptr.is_null() || output.is_null() { + return DecodeResult { + had_errors: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let decoder: &mut Decoder = unsafe { &mut *(decoder_ptr as *mut Decoder) }; + let input_bytes = if input.is_null() || input_len == 0 { + &[] + } else { + unsafe { std::slice::from_raw_parts(input, input_len) } + }; + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; + + let last = is_last != 0; + let (result, bytes_read, bytes_written, had_errors) = + decoder.decode_to_utf8(input_bytes, output_slice, last); + + let _ = result; + + DecodeResult { + had_errors: if had_errors { 1 } else { 0 }, + bytes_read, + bytes_written, + } +} + +/// Free a streaming decoder +#[no_mangle] +pub extern "C" fn encoding_decoder_free(decoder_ptr: *mut c_void) { + if !decoder_ptr.is_null() { + unsafe { + drop(Box::from_raw(decoder_ptr as *mut Decoder)); + } + } +} + +// === Encoding API (UTF-8 to legacy encoding with NCR fallback) === + +/// Result of encoding operation +#[repr(C)] +pub struct EncodeResult { + /// 0 = success, 1 = output buffer too small + pub status: u8, + /// Number of input bytes consumed + pub bytes_read: usize, + /// Number of bytes written to output buffer + pub bytes_written: usize, +} + +/// Encode UTF-8 to a legacy encoding, replacing unencodable characters with +/// HTML decimal numeric character references (&#codepoint;). +/// +/// This is used for URL query string encoding per WHATWG URL spec. +/// encoding_rs's encode_from_utf8 already produces NCRs for unmappable chars. +#[no_mangle] +pub extern "C" fn encoding_encode_with_ncr( + handle: *const c_void, + input: *const c_uchar, + input_len: usize, + output: *mut c_uchar, + output_capacity: usize, +) -> EncodeResult { + if handle.is_null() || output.is_null() { + return EncodeResult { + status: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + + let input_str = if input.is_null() || input_len == 0 { + "" + } else { + let bytes = unsafe { std::slice::from_raw_parts(input, input_len) }; + match std::str::from_utf8(bytes) { + Ok(s) => s, + Err(_) => { + return EncodeResult { + status: 1, + bytes_read: 0, + bytes_written: 0, + }; + } + } + }; + + // For UTF-8 encoding, just copy directly (no NCR needed) + if encoding == encoding_rs::UTF_8 { + if input_len > output_capacity { + return EncodeResult { + bytes_read: 0, + bytes_written: 0, + status: 1, + }; + } + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) }; + output_slice[..input_len].copy_from_slice(input_str.as_bytes()); + return EncodeResult { + bytes_read: input_len, + bytes_written: input_len, + status: 0, + }; + } + + let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) }; + let mut encoder = encoding.new_encoder(); + + // encode_from_utf8 automatically produces NCRs for unmappable characters + let (result, bytes_read, bytes_written, _had_unmappables) = + encoder.encode_from_utf8(input_str, output_slice, true); + + match result { + encoding_rs::CoderResult::InputEmpty => EncodeResult { + bytes_read, + bytes_written, + status: 0, + }, + encoding_rs::CoderResult::OutputFull => EncodeResult { + bytes_read, + bytes_written, + status: 1, + }, + } +} + +/// Calculate maximum output buffer size needed for encoding with NCR fallback. +/// Worst case: every character becomes &#codepoint; where codepoint is up to 7 digits. +#[no_mangle] +pub extern "C" fn encoding_max_encode_buffer_length( + handle: *const c_void, + input_len: usize, +) -> usize { + if handle.is_null() { + return 0; + } + let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) }; + let encoder = encoding.new_encoder(); + // This returns the max buffer size accounting for NCR expansion + encoder + .max_buffer_length_from_utf8_if_no_unmappables(input_len) + .map(|len| { + // Add extra space for potential NCRs (each char could become &#nnnnnn; = 10 bytes) + // But realistically, most chars are mappable, so add 2x as safety margin + len.saturating_mul(2) + }) + .unwrap_or(input_len * 10) +} + #[no_mangle] pub extern "C" fn html5ever_parse_fragment( html: *mut c_uchar, diff --git a/src/lightpanda.zig b/src/lightpanda.zig index 4d6c23fb..b0356e93 100644 --- a/src/lightpanda.zig +++ b/src/lightpanda.zig @@ -107,7 +107,7 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void { // } // } - const encoded_url = try URL.ensureEncoded(page.call_arena, url); + const encoded_url = try URL.ensureEncoded(page.call_arena, url, "UTF-8"); _ = try page.navigate(encoded_url, .{ .reason = .address_bar, .kind = .{ .push = null }, diff --git a/src/network/IpFilter.zig b/src/network/IpFilter.zig new file mode 100644 index 00000000..73977188 --- /dev/null +++ b/src/network/IpFilter.zig @@ -0,0 +1,624 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const posix = std.posix; +const libcurl = @import("../sys/libcurl.zig"); + +const IpFilter = @This(); + +/// Binary representation for bitwise CIDR comparison. +pub const Ipv4Addr = [4]u8; +pub const Ipv6Addr = [16]u8; + +pub const CidrV4 = struct { + network: u32, + mask: u32, + + fn fromPrefix(addr: Ipv4Addr, prefix_len: u6) CidrV4 { + const network = std.mem.readInt(u32, &addr, .big); + const mask: u32 = if (prefix_len == 0) + 0 + else if (prefix_len == 32) + 0xFFFFFFFF + else + ~(@as(u32, 0xFFFFFFFF) >> @intCast(prefix_len)); + return .{ .network = network, .mask = mask }; + } +}; + +pub const CidrV6 = struct { + network_hi: u64, + network_lo: u64, + mask_hi: u64, + mask_lo: u64, + + fn fromPrefix(addr: Ipv6Addr, prefix_len: u8) CidrV6 { + const network_hi = std.mem.readInt(u64, addr[0..8], .big); + const network_lo = std.mem.readInt(u64, addr[8..16], .big); + var mask_hi: u64 = 0; + var mask_lo: u64 = 0; + if (prefix_len > 0) { + if (prefix_len < 64) { + mask_hi = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len)); + } else if (prefix_len == 64) { + mask_hi = 0xFFFFFFFFFFFFFFFF; + } else if (prefix_len < 128) { + mask_hi = 0xFFFFFFFFFFFFFFFF; + mask_lo = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len - 64)); + } else { + // prefix_len == 128 + mask_hi = 0xFFFFFFFFFFFFFFFF; + mask_lo = 0xFFFFFFFFFFFFFFFF; + } + } + return .{ .network_hi = network_hi, .network_lo = network_lo, .mask_hi = mask_hi, .mask_lo = mask_lo }; + } +}; + +// IpFilter fields +block_private: bool, +cidrs: ?Cidrs, + +// ── Comptime helpers ───────────────────────────────────────────────────────── + +/// Comptime helper: parse dotted-decimal IPv4 to [4]u8. +fn parseIpv4Comptime(comptime s: []const u8) Ipv4Addr { + var result: Ipv4Addr = undefined; + var octet: u8 = 0; + var octet_idx: usize = 0; + for (s) |ch| { + if (ch == '.') { + result[octet_idx] = octet; + octet_idx += 1; + octet = 0; + } else { + octet = octet * 10 + (ch - '0'); + } + } + result[octet_idx] = octet; + return result; +} + +/// Comptime helper: build a CidrV4. +fn makeCidrV4(comptime addr: []const u8, comptime prefix: u6) CidrV4 { + return CidrV4.fromPrefix(parseIpv4Comptime(addr), prefix); +} + +/// Comptime helper: build a CidrV6 from a 16-byte literal array. +fn makeCidrV6(comptime bytes: Ipv6Addr, comptime prefix: u8) CidrV6 { + return CidrV6.fromPrefix(bytes, prefix); +} + +// ── Comptime CIDR range tables ─────────────────────────────────────────────── + +const PRIVATE_V4 = [_]CidrV4{ + makeCidrV4("127.0.0.0", 8), // localhost + makeCidrV4("0.0.0.0", 8), // current network + makeCidrV4("10.0.0.0", 8), // RFC1918 + makeCidrV4("172.16.0.0", 12), // RFC1918 + makeCidrV4("192.168.0.0", 16), // RFC1918 + makeCidrV4("169.254.0.0", 16), // link-local +}; + +const PRIVATE_V6 = [_]CidrV6{ + // ::/128 — IPv6 Unspecified + makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 128), + // ::1/128 — IPv6 localhost + makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128), + // fe80::/10 — link-local + makeCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10), + // fc00::/7 — ULA + makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 7), +}; + +// ── Runtime IP parsing ─────────────────────────────────────────────────────── + +/// Parse dotted-decimal IPv4 string to 4-byte array. Returns null on parse failure. +fn parseIpv4(str: []const u8) ?Ipv4Addr { + var addr: Ipv4Addr = undefined; + var it = std.mem.splitScalar(u8, str, '.'); + var i: usize = 0; + while (it.next()) |part| : (i += 1) { + if (i >= 4) return null; + addr[i] = std.fmt.parseInt(u8, part, 10) catch return null; + } + if (i != 4) return null; + return addr; +} + +/// Parse IPv6 string to 16-byte array. Handles compressed notation. +/// Strips zone ID (e.g. "fe80::1%eth0" -> "fe80::1"). +/// Returns null on parse failure. +fn parseIpv6(str: []const u8) ?Ipv6Addr { + // Strip zone ID + const clean = if (std.mem.indexOfScalar(u8, str, '%')) |idx| str[0..idx] else str; + const parsed = std.net.Address.parseIp6(clean, 0) catch return null; + return parsed.in6.sa.addr; +} + +// ── CIDR matching ──────────────────────────────────────────────────────────── + +/// Detect IPv4-mapped IPv6 address (::ffff:x.x.x.x). +/// Returns the embedded IPv4 address if detected, null otherwise. +fn isIpv4Mapped(addr: Ipv6Addr) ?Ipv4Addr { + // IPv4-mapped prefix: 10 zero bytes + 2 0xFF bytes + const prefix = [12]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; + if (!std.mem.eql(u8, addr[0..12], &prefix)) return null; + return addr[12..16].*; +} + +/// Check if IPv4 address falls within a CIDR range. +fn matchesCidrV4(addr: Ipv4Addr, cidr: CidrV4) bool { + const addr_int = std.mem.readInt(u32, &addr, .big); + return (addr_int ^ cidr.network) & cidr.mask == 0; +} + +/// Check if IPv6 address falls within a CIDR range. +fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool { + const addr_hi = std.mem.readInt(u64, addr[0..8], .big); + const addr_lo = std.mem.readInt(u64, addr[8..16], .big); + return ((addr_hi ^ cidr.network_hi) & cidr.mask_hi == 0) and + ((addr_lo ^ cidr.network_lo) & cidr.mask_lo == 0); +} + +// ── Public API ─────────────────────────────────────────────────────────────── + +pub const Cidrs = struct { + v4: []CidrV4, + v6: []CidrV6, + allow_v4: []CidrV4, + allow_v6: []CidrV6, + + pub fn deinit(self: Cidrs, allocator: std.mem.Allocator) void { + allocator.free(self.v4); + allocator.free(self.v6); + allocator.free(self.allow_v4); + allocator.free(self.allow_v6); + } +}; + +/// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32") +/// into a Cidrs struct. Entries prefixed with '-' are added to the allow list +/// (e.g. "-10.0.0.42/32" exempts that IP from blocking). +/// Caller owns the returned Cidrs and must free them via Cidrs.deinit. +/// Returns error.InvalidCidr on any malformed entry. +pub fn parseCidrList( + allocator: std.mem.Allocator, + cidr_str: []const u8, +) !Cidrs { + var v4_list: std.ArrayList(CidrV4) = .empty; + errdefer v4_list.deinit(allocator); + var v6_list: std.ArrayList(CidrV6) = .empty; + errdefer v6_list.deinit(allocator); + var allow_v4_list: std.ArrayList(CidrV4) = .empty; + errdefer allow_v4_list.deinit(allocator); + var allow_v6_list: std.ArrayList(CidrV6) = .empty; + errdefer allow_v6_list.deinit(allocator); + + var it = std.mem.splitScalar(u8, cidr_str, ','); + while (it.next()) |entry| { + const trimmed = std.mem.trim(u8, entry, " \t"); + if (trimmed.len == 0) continue; + + const is_allow = trimmed[0] == '-'; + const cidr_part = if (is_allow) trimmed[1..] else trimmed; + + const slash = std.mem.indexOfScalar(u8, cidr_part, '/') orelse return error.InvalidCidr; + const addr_str = cidr_part[0..slash]; + const prefix_str = cidr_part[slash + 1 ..]; + + if (parseIpv4(addr_str)) |v4| { + const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; + if (prefix > 32) return error.InvalidCidr; + const cidr = CidrV4.fromPrefix(v4, @intCast(prefix)); + if (is_allow) { + try allow_v4_list.append(allocator, cidr); + } else { + try v4_list.append(allocator, cidr); + } + } else if (parseIpv6(addr_str)) |v6| { + const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr; + if (prefix > 128) return error.InvalidCidr; + const cidr = CidrV6.fromPrefix(v6, prefix); + if (is_allow) { + try allow_v6_list.append(allocator, cidr); + } else { + try v6_list.append(allocator, cidr); + } + } else { + return error.InvalidCidr; + } + } + + const v4 = try v4_list.toOwnedSlice(allocator); + errdefer allocator.free(v4); + const v6 = try v6_list.toOwnedSlice(allocator); + errdefer allocator.free(v6); + const allow_v4 = try allow_v4_list.toOwnedSlice(allocator); + errdefer allocator.free(allow_v4); + const allow_v6 = try allow_v6_list.toOwnedSlice(allocator); + return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 }; +} + +// Create a IpFilter. Set block_private to block outbound requests to RFC1918, +// localhost, link-local, and ULA ranges. Pass parsed CIDRs for additional +// custom block/allow ranges; the filter takes ownership of the Cidrs and will +// free them on deinit. +pub fn init( + block_private: bool, + cidrs: ?Cidrs, +) IpFilter { + return .{ + .block_private = block_private, + .cidrs = cidrs, + }; +} + +pub fn deinit(self: IpFilter, allocator: std.mem.Allocator) void { + if (self.cidrs) |c| { + c.deinit(allocator); + } +} + +fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool { + if (self.cidrs) |c| { + for (c.allow_v4) |cidr| { + if (matchesCidrV4(addr, cidr)) { + return false; + } + } + for (c.v4) |cidr| { + if (matchesCidrV4(addr, cidr)) { + return true; + } + } + } + + if (self.block_private) { + for (PRIVATE_V4) |cidr| { + if (matchesCidrV4(addr, cidr)) { + return true; + } + } + } + + return false; +} + +fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool { + if (self.cidrs) |c| { + for (c.allow_v6) |cidr| { + if (matchesCidrV6(addr, cidr)) { + return false; + } + } + for (c.v6) |cidr| { + if (matchesCidrV6(addr, cidr)) { + return true; + } + } + } + + if (self.block_private) { + for (PRIVATE_V6) |cidr| { + if (matchesCidrV6(addr, cidr)) { + return true; + } + } + } + return false; +} + +/// Check if an address from curl's opensocket callback should be blocked. +/// Extracts the IP directly from the sockaddr structure; no string parsing needed. +/// Fail-closed: unknown address family -> true (blocked). +pub fn isBlockedSockaddr(self: *const IpFilter, sa: *const libcurl.CurlSockAddr) bool { + switch (sa.family) { + posix.AF.INET => { + const sin: *const posix.sockaddr.in = @ptrCast(&sa.addr); + // sin.addr is in network byte order (big-endian); convert to host bytes + const bytes: [4]u8 = @bitCast(sin.addr); + return self.isBlockedV4(bytes); + }, + posix.AF.INET6 => { + const sin6: *const posix.sockaddr.in6 = @ptrCast(&sa.addr); + const addr: Ipv6Addr = sin6.addr; + if (isIpv4Mapped(addr)) |v4| return self.isBlockedV4(v4); + return self.isBlockedV6(addr); + }, + else => return true, // unknown family -> fail-closed + } +} + +const testing = @import("../testing.zig"); +test "IpFilter: IPv4 CIDR matching: private group boundaries" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("0.0.0.0")); + + // Loopback + try testing.expect(filter.testBlocked("127.0.0.1")); + try testing.expect(filter.testBlocked("127.255.255.255")); + try testing.expect(!filter.testBlocked("128.0.0.1")); + + // RFC1918 10.0.0.0/8 + try testing.expect(filter.testBlocked("10.0.0.1")); + try testing.expect(filter.testBlocked("10.255.255.255")); + try testing.expect(!filter.testBlocked("11.0.0.0")); + + // RFC1918 172.16.0.0/12 — critical boundary + try testing.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block + try testing.expect(filter.testBlocked("172.16.0.0")); // MUST block + try testing.expect(filter.testBlocked("172.31.255.255")); // MUST block + try testing.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block + + // RFC1918 192.168.0.0/16 + try testing.expect(filter.testBlocked("192.168.0.1")); + try testing.expect(!filter.testBlocked("192.169.0.0")); + + // Link-local + try testing.expect(filter.testBlocked("169.254.1.1")); + try testing.expect(!filter.testBlocked("169.255.0.0")); + + // Public IP — must NOT be blocked + try testing.expect(!filter.testBlocked("8.8.8.8")); + try testing.expect(!filter.testBlocked("1.1.1.1")); + try testing.expect(!filter.testBlocked("93.184.216.34")); // example.com +} + +test "IpFilter: IPv6 CIDR matching: private group" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("::")); // unspecified + try testing.expect(filter.testBlocked("::1")); // localhost + try testing.expect(filter.testBlocked("fe80::1")); // link-local + try testing.expect(filter.testBlocked("fc00::1")); // ULA + try testing.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7) + try testing.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public + try testing.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare +} + +test "IpFilter: IPv4-mapped IPv6 bypass prevention" { + const filter = IpFilter.init(true, null); + defer filter.deinit(testing.allocator); + + // ::ffff:127.0.0.1 must be blocked (maps to loopback) + try testing.expect(filter.testBlocked("::ffff:127.0.0.1")); + // ::ffff:10.0.0.1 must be blocked (maps to RFC1918) + try testing.expect(filter.testBlocked("::ffff:10.0.0.1")); + // ::ffff:8.8.8.8 must NOT be blocked (maps to public) + try testing.expect(!filter.testBlocked("::ffff:8.8.8.8")); +} + +test "IpFilter: fail-closed: unknown address family blocked by isBlockedSockaddr" { + const filter = IpFilter.init(false, null); + defer filter.deinit(testing.allocator); + + // Construct a sockaddr with an unknown address family + var sa: libcurl.CurlSockAddr = .{ + .family = 255, // not AF_INET or AF_INET6 + .socktype = posix.SOCK.STREAM, + .protocol = 0, + .addrlen = 0, + .addr = undefined, + }; + try testing.expect(filter.isBlockedSockaddr(&sa)); +} + +test "IpFilter: custom CIDR ranges" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(testing.allocator); + + try testing.expect(filter.testBlocked("203.0.113.1")); // in custom range + try testing.expect(filter.testBlocked("203.0.113.255")); // in custom range + try testing.expect(!filter.testBlocked("203.0.114.0")); // outside custom range + try testing.expect(!filter.testBlocked("8.8.8.8")); // not in range +} + +test "IpFilter: private group blocks cloud metadata IP via link-local" { + // 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group. + // Users who want targeted cloud-metadata-only blocking can use --block-cidrs. + const filter_private = IpFilter.init(true, null); + defer filter_private.deinit(testing.allocator); + const filter_none = IpFilter.init(false, null); + defer filter_none.deinit(testing.allocator); + + try testing.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local + try testing.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled +} + +test "IpFilter: parseCidrList: mixed IPv4 and IPv6" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24"); + + try testing.expectEqual(2, cidrs.v4.len); + try testing.expectEqual(1, cidrs.v6.len); + + // spot-check: 203.0.113.0/24 and 192.168.1.0/24 + const f = IpFilter.init(false, cidrs); + defer f.deinit(testing.allocator); + try testing.expect(f.testBlocked("203.0.113.1")); + try testing.expect(!f.testBlocked("203.0.114.0")); + try testing.expect(f.testBlocked("192.168.1.1")); + try testing.expect(f.testBlocked("2001:db8::1")); + try testing.expect(!f.testBlocked("2001:db9::1")); +} + +test "IpFilter: allow list exempts from private blocking" { + const cidrs = try parseCidrList(testing.allocator, "-10.0.0.42/32,-fc00::1/128"); + const filter = IpFilter.init(true, cidrs); + defer filter.deinit(testing.allocator); + + // Allowed IPs pass through despite being in private ranges + try testing.expect(!filter.testBlocked("10.0.0.42")); + try testing.expect(!filter.testBlocked("fc00::1")); + + // Other private IPs still blocked + try testing.expect(filter.testBlocked("10.0.0.43")); + try testing.expect(filter.testBlocked("10.0.0.41")); + try testing.expect(filter.testBlocked("192.168.1.1")); + try testing.expect(filter.testBlocked("fc00::2")); +} + +test "IpFilter: allow list exempts from custom CIDR blocking" { + const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24,-203.0.113.100/32"); + const filter = IpFilter.init(false, cidrs); + defer filter.deinit(testing.allocator); + + try testing.expect(!filter.testBlocked("203.0.113.100")); // allowed + try testing.expect(filter.testBlocked("203.0.113.99")); // blocked + try testing.expect(filter.testBlocked("203.0.113.101")); // blocked +} + +test "IpFilter: parseCidrList: allow entries with '-' prefix" { + const cidrs = try parseCidrList(testing.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128"); + + try testing.expectEqual(1, cidrs.v4.len); + try testing.expectEqual(0, cidrs.v6.len); + try testing.expectEqual(1, cidrs.allow_v4.len); + try testing.expectEqual(1, cidrs.allow_v6.len); + + const f = IpFilter.init(false, cidrs); + defer f.deinit(testing.allocator); + try testing.expect(!f.testBlocked("10.0.0.42")); // allowed + try testing.expect(f.testBlocked("10.0.0.43")); // blocked + try testing.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed) +} + +test "IpFilter: parseCidrList: invalid input returns error" { + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "not-a-cidr")); + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/33")); // prefix too large + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0")); // missing prefix + try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/abc")); // non-numeric prefix +} + +test "IpFilter: matchesCidrV4: exact match /32" { + const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 100 }, 32); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 100 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 101 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 99 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /0 matches everything" { + const cidr = CidrV4.fromPrefix(.{ 0, 0, 0, 0 }, 0); + try testing.expect(matchesCidrV4(.{ 0, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 255, 255, 255, 255 }, cidr)); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 1 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /8 boundary" { + const cidr = CidrV4.fromPrefix(.{ 10, 0, 0, 0 }, 8); + try testing.expect(matchesCidrV4(.{ 10, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 10, 255, 255, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 11, 0, 0, 0 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 9, 255, 255, 255 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /12 boundary (172.16.0.0/12)" { + const cidr = CidrV4.fromPrefix(.{ 172, 16, 0, 0 }, 12); + // In range + try testing.expect(matchesCidrV4(.{ 172, 16, 0, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 172, 31, 255, 255 }, cidr)); + try testing.expect(matchesCidrV4(.{ 172, 20, 100, 50 }, cidr)); + // Out of range + try testing.expect(!matchesCidrV4(.{ 172, 15, 255, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 172, 32, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV4: /24 network" { + const cidr = CidrV4.fromPrefix(.{ 203, 0, 113, 0 }, 24); + try testing.expect(matchesCidrV4(.{ 203, 0, 113, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 203, 0, 113, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 203, 0, 112, 255 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 203, 0, 114, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV4: non-byte-aligned /25" { + const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 0 }, 25); + // 192.168.1.0 - 192.168.1.127 should match + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 0 }, cidr)); + try testing.expect(matchesCidrV4(.{ 192, 168, 1, 127 }, cidr)); + // 192.168.1.128+ should not match + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 128 }, cidr)); + try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 255 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /128 exact match" { + const addr: Ipv6Addr = .{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + const cidr = CidrV6.fromPrefix(addr, 128); + try testing.expect(matchesCidrV6(addr, cidr)); + + var different = addr; + different[15] = 2; + try testing.expect(!matchesCidrV6(different, cidr)); +} + +test "IpFilter: matchesCidrV6: /0 matches everything" { + const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); +} + +test "IpFilter: matchesCidrV6: /64 boundary" { + // 2001:db8::/64 + const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 64); + // In range - any suffix in lower 64 bits + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + // Out of range - different prefix + try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /48 network" { + // 2001:db8:abcd::/48 + const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 48); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: /10 link-local (fe80::/10)" { + const cidr = CidrV6.fromPrefix(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10); + // fe80:: through febf:: should match (first 10 bits: 1111111010) + try testing.expect(matchesCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0xfe, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr)); + // fec0:: should NOT match (11th bit differs) + try testing.expect(!matchesCidrV6(.{ 0xfe, 0xc0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr)); +} + +test "IpFilter: matchesCidrV6: prefix > 64 bits (/96)" { + // ::ffff:0:0/96 (IPv4-mapped prefix) + const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0 }, 96); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1 }, cidr)); + try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 10, 0, 0, 1 }, cidr)); + try testing.expect(!matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xfe, 192, 168, 1, 1 }, cidr)); +} + +/// Test-only convenience: parse an IP string and check against the filter. +/// Test inputs must be valid IPs; unreachable on parse failure. +fn testBlocked(self: *const IpFilter, ip: []const u8) bool { + if (parseIpv4(ip)) |v4| return self.isBlockedV4(v4); + if (parseIpv6(ip)) |v6| { + if (isIpv4Mapped(v6)) |v4| return self.isBlockedV4(v4); + return self.isBlockedV6(v6); + } + unreachable; +} diff --git a/src/network/Network.zig b/src/network/Network.zig index 1fb8c8fb..359646ce 100644 --- a/src/network/Network.zig +++ b/src/network/Network.zig @@ -28,6 +28,7 @@ const Config = @import("../Config.zig"); const libcurl = @import("../sys/libcurl.zig"); const http = @import("http.zig"); +const IpFilter = @import("IpFilter.zig"); const RobotStore = @import("Robots.zig").RobotStore; const WebBotAuth = @import("WebBotAuth.zig"); @@ -85,6 +86,9 @@ callbacks: [MAX_TICK_CALLBACKS]TickCallback = undefined, callbacks_len: usize = 0, callbacks_mutex: std.Thread.Mutex = .{}, +/// Optional IP filter for blocking requests to private/internal networks (--block-private-networks). +ip_filter: ?*IpFilter = null, + const TickCallback = struct { ctx: *anyopaque, fun: *const fn (*anyopaque) void, @@ -230,13 +234,31 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { ca_blob = try loadCerts(allocator); } + // IP filter for blocking requests to private/internal networks. + const block_private = config.blockPrivateNetworks(); + const cidrs: ?IpFilter.Cidrs = blk: { + const s = config.blockCidrs() orelse break :blk null; + break :blk try IpFilter.parseCidrList(allocator, s); + }; + const has_cidrs = if (cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false; + const ip_filter: ?*IpFilter = blk: { + if (!block_private and !has_cidrs) break :blk null; + const f = try allocator.create(IpFilter); + f.* = IpFilter.init(block_private, cidrs); + break :blk f; + }; + errdefer if (ip_filter) |f| { + f.deinit(allocator); + allocator.destroy(f); + }; + const count: usize = config.httpMaxConcurrent(); const connections = try allocator.alloc(http.Connection, count); errdefer allocator.free(connections); var available: std.DoublyLinkedList = .{}; for (0..count) |i| { - connections[i] = try http.Connection.init(ca_blob, config); + connections[i] = try http.Connection.init(ca_blob, config, ip_filter); available.append(&connections[i].node); } @@ -280,6 +302,8 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network { .ws_pool = .init(allocator), .ws_max = config.wsMaxConcurrent(), + + .ip_filter = ip_filter, }; } @@ -316,6 +340,11 @@ pub fn deinit(self: *Network) void { if (self.cache) |*cache| cache.deinit(); + if (self.ip_filter) |f| { + f.deinit(self.allocator); + self.allocator.destroy(f); + } + globalDeinit(); } @@ -612,7 +641,7 @@ pub fn releaseConnection(self: *Network, conn: *http.Connection) void { self.ws_count -= 1; }, else => { - conn.reset(self.config, self.ca_blob) catch |err| { + conn.reset(self.config, self.ca_blob, self.ip_filter) catch |err| { lp.assert(false, "couldn't reset curl easy", .{ .err = err }); }; self.conn_mutex.lock(); @@ -637,7 +666,7 @@ pub fn newConnection(self: *Network) ?*http.Connection { }; // don't do this under lock - conn.* = http.Connection.init(self.ca_blob, self.config) catch { + conn.* = http.Connection.init(self.ca_blob, self.config, self.ip_filter) catch { self.ws_mutex.lock(); defer self.ws_mutex.unlock(); self.ws_pool.destroy(conn); diff --git a/src/network/http.zig b/src/network/http.zig index ec49b60f..f542be3c 100644 --- a/src/network/http.zig +++ b/src/network/http.zig @@ -17,9 +17,11 @@ // along with this program. If not, see . const std = @import("std"); +const posix = std.posix; const Config = @import("../Config.zig"); const libcurl = @import("../sys/libcurl.zig"); +const IpFilter = @import("IpFilter.zig"); const log = @import("lightpanda").log; const assert = @import("lightpanda").assert; @@ -229,6 +231,35 @@ pub const ResponseHead = struct { } }; +/// Opensocket callback: blocks connections to private/internal IP ranges +/// before TCP SYN, regardless of request origin (JS, HTML resources, redirects, etc.). +/// Called by curl after DNS resolution, before the socket is created. +/// Returns CURL_SOCKET_BAD to block; otherwise creates and returns a real socket fd. +/// clientp is a *const IpFilter passed via CURLOPT_OPENSOCKETDATA. +fn opensocketCallback( + purpose: libcurl.CurlSockType, + address: *libcurl.CurlSockAddr, + clientp: ?*anyopaque, +) libcurl.CurlSocket { + const filter: *const IpFilter = @ptrCast(@alignCast(clientp orelse return libcurl.CURL_SOCKET_BAD)); + if (filter.isBlockedSockaddr(address)) { + if (address.family == posix.AF.INET or address.family == posix.AF.INET6) { + const ip = std.net.Address.initPosix(@ptrCast(&address.addr)); + log.warn(.http, "blocked by IP filter", .{ .ip = ip }); + } else { + log.warn(.http, "blocked by IP filter", .{ .family = address.family }); + } + return libcurl.CURL_SOCKET_BAD; + } + _ = purpose; // purpose is informational; we always open the same socket type + const fd = posix.socket( + @intCast(address.family), + @intCast(address.socktype), + @intCast(address.protocol), + ) catch return libcurl.CURL_SOCKET_BAD; + return fd; +} + pub const Connection = struct { _easy: *libcurl.Curl, transport: Transport, @@ -240,13 +271,17 @@ pub const Connection = struct { websocket: *@import("../browser/webapi/net/WebSocket.zig"), }; - pub fn init(ca_blob: ?libcurl.CurlBlob, config: *const Config) !Connection { + pub fn init( + ca_blob: ?libcurl.CurlBlob, + config: *const Config, + ip_filter: ?*const IpFilter, + ) !Connection { const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy; var self = Connection{ ._easy = easy, .transport = .none }; errdefer self.deinit(); - try self.reset(config, ca_blob); + try self.reset(config, ca_blob, ip_filter); return self; } @@ -371,6 +406,7 @@ pub const Connection = struct { self: *Connection, config: *const Config, ca_blob: ?libcurl.CurlBlob, + ip_filter: ?*const IpFilter, ) !void { libcurl.curl_easy_reset(self._easy); self.transport = .none; @@ -421,6 +457,15 @@ pub const Connection = struct { // try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback); } + + // default write callback to prevent libcurl from writing to stdout + try self.setWriteCallback(discardBody); + + // IP filter: block private/internal network addresses + if (ip_filter) |filter| { + try libcurl.curl_easy_setopt(self._easy, .opensocket_function, opensocketCallback); + try libcurl.curl_easy_setopt(self._easy, .opensocket_data, @constCast(filter)); + } } fn discardBody(_: [*]const u8, count: usize, len: usize, _: ?*anyopaque) usize { @@ -603,3 +648,53 @@ fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8, } return 0; } + +// ── Unit tests for opensocketCallback ──────────────────────────────────────── + +fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr { + var sa: posix.sockaddr.in = .{ + .port = 0, + .addr = @bitCast(ip), + }; + var curl_sa: libcurl.CurlSockAddr = .{ + .family = posix.AF.INET, + .socktype = posix.SOCK.STREAM, + .protocol = 0, + .addrlen = @sizeOf(posix.sockaddr.in), + .addr = undefined, + }; + @memcpy(std.mem.asBytes(&curl_sa.addr)[0..@sizeOf(posix.sockaddr.in)], std.mem.asBytes(&sa)); + return curl_sa; +} + +test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" { + const filter = IpFilter.init(true, null); + var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); + const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); +} + +test "opensocketCallback: public IPv4 opens a real socket" { + // 8.8.8.8 — not in any blocked range; callback should create a real socket + const filter = IpFilter.init(true, null); + var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); + const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + // A real fd is always >= 0 + try std.testing.expect(fd >= 0); + posix.close(fd); +} + +test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" { + var sa = makeSockAddrV4(.{ 8, 8, 8, 8 }); + const result = opensocketCallback(.ipcxn, &sa, null); + try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result); +} + +test "opensocketCallback: block_private=false allows private IP" { + // When block_private is false the filter blocks nothing + const filter = IpFilter.init(false, null); + var sa = makeSockAddrV4(.{ 127, 0, 0, 1 }); + const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter))); + try std.testing.expect(fd >= 0); + posix.close(fd); +} diff --git a/src/sys/libcurl.zig b/src/sys/libcurl.zig index 31587823..b621a3a3 100644 --- a/src/sys/libcurl.zig +++ b/src/sys/libcurl.zig @@ -43,6 +43,23 @@ pub const curl_writefunc_error: usize = c.CURL_WRITEFUNC_ERROR; pub const curl_readfunc_pause: usize = c.CURL_READFUNC_PAUSE; pub const CurlReadFunction = fn ([*]u8, usize, usize, *anyopaque) usize; +pub const CurlSockType = enum(c.curlsocktype) { + ipcxn = c.CURLSOCKTYPE_IPCXN, + accept = c.CURLSOCKTYPE_ACCEPT, +}; + +/// Mirror of curl's struct curl_sockaddr. The addr field is a struct sockaddr +/// inline (not a pointer), so addrlen tells you how many bytes of addr are valid. +pub const CurlSockAddr = extern struct { + family: c_int, + socktype: c_int, + protocol: c_int, + addrlen: c_uint, + addr: std.posix.sockaddr, +}; + +pub const CURL_SOCKET_BAD: c.curl_socket_t = c.CURL_SOCKET_BAD; + pub const FreeCallback = fn (ptr: ?*anyopaque) void; pub const StrdupCallback = fn (str: [*:0]const u8) ?[*:0]u8; pub const MallocCallback = fn (size: usize) ?*anyopaque; @@ -137,8 +154,17 @@ comptime { return 0; } }.cb; + const opensocket_cb_check: c.curl_opensocket_callback = struct { + fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t { + _ = clientp; + _ = purpose; + _ = address; + return CURL_SOCKET_BAD; + } + }.cb; _ = debug_cb_check; _ = write_cb_check; + _ = opensocket_cb_check; if (@sizeOf(CurlWaitFd) != @sizeOf(c.curl_waitfd)) { @compileError("CurlWaitFd size mismatch"); @@ -152,6 +178,17 @@ comptime { if (c.CURL_WAIT_POLLIN != 1 or c.CURL_WAIT_POLLPRI != 2 or c.CURL_WAIT_POLLOUT != 4) { @compileError("CURL_WAIT_* flag values don't match CurlWaitEvents packed struct bit layout"); } + if (@sizeOf(CurlSockAddr) != @sizeOf(c.curl_sockaddr)) { + @compileError("CurlSockAddr size mismatch with curl_sockaddr"); + } + if (@offsetOf(CurlSockAddr, "family") != @offsetOf(c.curl_sockaddr, "family") or + @offsetOf(CurlSockAddr, "socktype") != @offsetOf(c.curl_sockaddr, "socktype") or + @offsetOf(CurlSockAddr, "protocol") != @offsetOf(c.curl_sockaddr, "protocol") or + @offsetOf(CurlSockAddr, "addrlen") != @offsetOf(c.curl_sockaddr, "addrlen") or + @offsetOf(CurlSockAddr, "addr") != @offsetOf(c.curl_sockaddr, "addr")) + { + @compileError("CurlSockAddr layout mismatch with curl_sockaddr"); + } } pub const CurlOption = enum(c.CURLoption) { @@ -190,6 +227,8 @@ pub const CurlOption = enum(c.CURLoption) { read_function = c.CURLOPT_READFUNCTION, connect_only = c.CURLOPT_CONNECT_ONLY, upload = c.CURLOPT_UPLOAD, + opensocket_function = c.CURLOPT_OPENSOCKETFUNCTION, + opensocket_data = c.CURLOPT_OPENSOCKETDATA, }; pub const CurlMOption = enum(c.CURLMoption) { @@ -620,6 +659,7 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype .header_data, .read_data, .write_data, + .opensocket_data, => blk: { const ptr: ?*anyopaque = switch (@typeInfo(@TypeOf(value))) { .null => null, @@ -643,6 +683,20 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype break :blk c.curl_easy_setopt(easy, opt, cb); }, + .opensocket_function => blk: { + const cb: c.curl_opensocket_callback = switch (@typeInfo(@TypeOf(value))) { + .null => null, + .@"fn" => struct { + fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t { + const addr: *CurlSockAddr = @ptrCast(address orelse return CURL_SOCKET_BAD); + return value(@enumFromInt(purpose), addr, clientp); + } + }.cb, + else => @compileError("expected Zig function or null for " ++ @tagName(option) ++ ", got " ++ @typeName(@TypeOf(value))), + }; + break :blk c.curl_easy_setopt(easy, opt, cb); + }, + .header_function => blk: { const cb: c.curl_write_callback = switch (@typeInfo(@TypeOf(value))) { .null => null,