Merge branch 'main' into fix/markdown-link-formatting

This commit is contained in:
Adrià Arrufat
2026-04-10 16:26:14 +02:00
89 changed files with 2001 additions and 335 deletions

View File

@@ -1,5 +1,5 @@
name: "Browsercore install"
description: "Install deps for the project browsercore"
name: "Deps install"
description: "Install deps for the browser"
inputs:
arch:

42
.github/actions/v8-snapshot/action.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: "V8 snaphsot"
description: "Generate v8 snapshot"
inputs:
arch:
description: 'CPU arch used to select the v8 lib'
required: false
default: 'x86_64'
os:
description: 'OS used to select the v8 lib'
required: false
default: 'linux'
runs:
using: "composite"
steps:
# Use the commit hash of bridge.zig and Snapshot.zig as cache key for
# snapshot.
- name: V8 snapshot cache key
id: snapshot_cache_key
run: echo "hash=v8-snapshot-${{ inputs.os }}_${{ inputs.arch }}-$(git log -n 1 --pretty=format:%H --
src/browser/js/bridge.zig
src/browser/js/Snapshot.zig
)" >> "$GITHUB_OUTPUT"
shell: bash
# Fetch the cache for snapshot
- name: Cache V8 snapshot
id: cache-v8-snapshot
uses: actions/cache@v5
env:
cache-name: cache-v8-snapshot
with:
path: src/snapshot.bin
key: ${{ steps.snapshot_cache_key.outputs.hash }}
# Generate snapshot on cache miss.
- name: v8 snapshot
shell: bash
if: hashFiles('src/snapshot.bin') == ''
run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin

View File

@@ -39,7 +39,7 @@ jobs:
name: zig build release
runs-on: ubuntu-latest
timeout-minutes: 15
timeout-minutes: 20
# Don't run the CI with draft PR.
if: github.event.pull_request.draft == false
@@ -50,9 +50,7 @@ jobs:
fetch-depth: 0
- uses: ./.github/actions/install
- name: v8 snapshot
run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin
- uses: ./.github/actions/v8-snapshot
- name: zig build release
run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64

View File

@@ -36,8 +36,10 @@ jobs:
os: ${{env.OS}}
arch: ${{env.ARCH}}
- name: v8 snapshot
run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin
- uses: ./.github/actions/v8-snapshot
with:
os: ${{env.OS}}
arch: ${{env.ARCH}}
- name: zig build release
run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=generic
@@ -170,6 +172,21 @@ jobs:
cd ./wptdiff
CGO_ENABLED=0 go build
- run: |
./wptdiff/wptdiff --completion |tee completion.log
- name: Send completion to slack
uses: slackapi/slack-github-action@v3.0.1
with:
errors: true
method: files.uploadV2
token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
payload: |
channel_id: ${{ vars.WPT_SLACK_CHANNEL_ID }}
initial_comment: "Last WPT completion"
file: "./completion.log"
filename: "wpt-completion-${{ github.sha }}.txt"
- run: |
./wptdiff/wptdiff |tee diff.log

View File

@@ -69,7 +69,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
app.telemetry = try Telemetry.init(app, config.mode);
errdefer app.telemetry.deinit(allocator);
app.arena_pool = ArenaPool.init(allocator, 512, 1024 * 16);
app.arena_pool = ArenaPool.init(allocator, .{});
errdefer app.arena_pool.deinit();
return app;

View File

@@ -27,33 +27,52 @@ const ArenaPool = @This();
const IS_DEBUG = builtin.mode == .Debug;
allocator: Allocator,
retain_bytes: usize,
free_list_len: u16 = 0,
free_list: ?*Entry = null,
free_list_max: u16,
entry_pool: std.heap.MemoryPool(Entry),
mutex: std.Thread.Mutex = .{},
// Debug mode: track acquire/release counts per debug name to detect leaks and double-frees
_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {},
pub const BucketSize = enum { tiny, small, medium, large };
const Bucket = struct {
free_list: ?*Entry = null,
free_list_len: u16 = 0,
free_list_max: u16,
retain_bytes: usize,
};
const Entry = struct {
next: ?*Entry,
arena: ArenaAllocator,
bucket: *Bucket,
debug: if (IS_DEBUG) []const u8 else void = if (IS_DEBUG) "" else {},
};
pub const DebugInfo = struct {
debug: []const u8 = "",
pub const Config = struct {
tiny: Config.Bucket = .{ .max = 512, .retain = 1024 },
small: Config.Bucket = .{ .max = 128, .retain = 4 * 1024 },
medium: Config.Bucket = .{ .max = 64, .retain = 16 * 1024 },
large: Config.Bucket = .{ .max = 32, .retain = 128 * 1024 },
const Bucket = struct {
max: u16,
retain: usize,
};
};
pub fn init(allocator: Allocator, free_list_max: u16, retain_bytes: usize) ArenaPool {
tiny: Bucket,
small: Bucket,
medium: Bucket,
large: Bucket,
allocator: Allocator,
mutex: std.Thread.Mutex = .{},
entry_pool: std.heap.MemoryPool(Entry),
_leak_track: if (IS_DEBUG) std.StringHashMapUnmanaged(isize) else void = if (IS_DEBUG) .empty else {},
pub fn init(allocator: Allocator, config: Config) ArenaPool {
return .{
.allocator = allocator,
.free_list_max = free_list_max,
.retain_bytes = retain_bytes,
.entry_pool = .init(allocator),
._leak_track = if (IS_DEBUG) .empty else {},
.tiny = .{ .free_list_max = config.tiny.max, .retain_bytes = config.tiny.retain },
.small = .{ .free_list_max = config.small.max, .retain_bytes = config.small.retain },
.medium = .{ .free_list_max = config.medium.max, .retain_bytes = config.medium.retain },
.large = .{ .free_list_max = config.large.max, .retain_bytes = config.large.retain },
};
}
@@ -73,24 +92,49 @@ pub fn deinit(self: *ArenaPool) void {
self._leak_track.deinit(self.allocator);
}
var entry = self.free_list;
while (entry) |e| {
entry = e.next;
e.arena.deinit();
// Free all arenas in all buckets
inline for (&[_]*Bucket{ &self.tiny, &self.small, &self.medium, &self.large }) |bucket| {
var entry = bucket.free_list;
while (entry) |e| {
entry = e.next;
e.arena.deinit();
}
}
self.entry_pool.deinit();
}
pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator {
// Acquire an arena from the pool.
// - Pass a BucketSize (.tiny, .small, .medium, .large) for explicit bucket selection
// - Pass a usize for automatic bucket selection based on expected size
pub fn acquire(self: *ArenaPool, size_or_bucket: anytype, debug: []const u8) !Allocator {
const bucket = blk: {
const T = @TypeOf(size_or_bucket);
if (T == BucketSize or T == @TypeOf(.enum_literal)) {
break :blk switch (@as(BucketSize, size_or_bucket)) {
.tiny => &self.tiny,
.small => &self.small,
.medium => &self.medium,
.large => &self.large,
};
}
if (T == usize or T == comptime_int) {
if (size_or_bucket <= self.tiny.retain_bytes) break :blk &self.tiny;
if (size_or_bucket <= self.small.retain_bytes) break :blk &self.small;
if (size_or_bucket <= self.medium.retain_bytes) break :blk &self.medium;
break :blk &self.large;
}
@compileError("acquire expects BucketSize or usize, got " ++ @typeName(T));
};
self.mutex.lock();
defer self.mutex.unlock();
if (self.free_list) |entry| {
self.free_list = entry.next;
self.free_list_len -= 1;
if (bucket.free_list) |entry| {
bucket.free_list = entry.next;
bucket.free_list_len -= 1;
if (IS_DEBUG) {
entry.debug = dbg.debug;
const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug);
entry.debug = debug;
const gop = try self._leak_track.getOrPut(self.allocator, debug);
if (!gop.found_existing) {
gop.value_ptr.* = 0;
}
@@ -102,12 +146,13 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator {
const entry = try self.entry_pool.create();
entry.* = .{
.next = null,
.bucket = bucket,
.debug = if (IS_DEBUG) debug else {},
.arena = ArenaAllocator.init(self.allocator),
.debug = if (IS_DEBUG) dbg.debug else {},
};
if (IS_DEBUG) {
const gop = try self._leak_track.getOrPut(self.allocator, dbg.debug);
const gop = try self._leak_track.getOrPut(self.allocator, debug);
if (!gop.found_existing) {
gop.value_ptr.* = 0;
}
@@ -116,12 +161,14 @@ pub fn acquire(self: *ArenaPool, dbg: DebugInfo) !Allocator {
return entry.arena.allocator();
}
// Universal release - determines bucket from the Entry automatically
pub fn release(self: *ArenaPool, allocator: Allocator) void {
const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
const entry: *Entry = @fieldParentPtr("arena", arena);
const bucket = entry.bucket;
// Reset the arena before acquiring the lock to minimize lock hold time
_ = arena.reset(.{ .retain_with_limit = self.retain_bytes });
_ = arena.reset(.{ .retain_with_limit = bucket.retain_bytes });
self.mutex.lock();
defer self.mutex.unlock();
@@ -139,105 +186,113 @@ pub fn release(self: *ArenaPool, allocator: Allocator) void {
}
}
const free_list_len = self.free_list_len;
if (free_list_len == self.free_list_max) {
if (bucket.free_list_len >= bucket.free_list_max) {
arena.deinit();
self.entry_pool.destroy(entry);
return;
}
entry.next = self.free_list;
self.free_list_len = free_list_len + 1;
self.free_list = entry;
entry.next = bucket.free_list;
bucket.free_list = entry;
bucket.free_list_len += 1;
}
pub fn reset(_: *const ArenaPool, allocator: Allocator, retain: usize) void {
const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
_ = arena.reset(.{ .retain_with_limit = retain });
}
pub fn resetRetain(_: *const ArenaPool, allocator: Allocator) void {
const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
const arena: *ArenaAllocator = @ptrCast(@alignCast(allocator.ptr));
_ = arena.reset(.retain_capacity);
}
const testing = std.testing;
test "arena pool - basic acquire and use" {
var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16);
test "ArenaPool: basic acquire and release" {
var pool = ArenaPool.init(testing.allocator, .{});
defer pool.deinit();
const alloc = try pool.acquire(.{ .debug = "test" });
const buf = try alloc.alloc(u8, 64);
@memset(buf, 0xAB);
try testing.expectEqual(@as(u8, 0xAB), buf[0]);
const tiny = try pool.acquire(.tiny, "test-tiny");
const medium = try pool.acquire(.medium, "test-medium");
const large = try pool.acquire(.large, "test-large");
pool.release(alloc);
// All three must be distinct arenas
try testing.expect(tiny.ptr != medium.ptr);
try testing.expect(medium.ptr != large.ptr);
_ = try tiny.alloc(u8, 64);
_ = try medium.alloc(u8, 1024);
_ = try large.alloc(u8, 4096);
// Universal release works for all buckets
pool.release(tiny);
pool.release(medium);
pool.release(large);
try testing.expectEqual(1, pool.tiny.free_list_len);
try testing.expectEqual(1, pool.medium.free_list_len);
try testing.expectEqual(1, pool.large.free_list_len);
}
test "arena pool - reuse entry after release" {
var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16);
test "ArenaPool: reuse from correct bucket" {
var pool = ArenaPool.init(testing.allocator, .{});
defer pool.deinit();
const alloc1 = try pool.acquire(.{ .debug = "test" });
try testing.expectEqual(@as(u16, 0), pool.free_list_len);
const tiny1 = try pool.acquire(.tiny, "test");
pool.release(tiny1);
try testing.expectEqual(1, pool.tiny.free_list_len);
pool.release(alloc1);
try testing.expectEqual(@as(u16, 1), pool.free_list_len);
// Next acquire with .tiny should reuse from tiny bucket
const tiny2 = try pool.acquire(.tiny, "test");
try testing.expectEqual(0, pool.tiny.free_list_len);
try testing.expectEqual(tiny1.ptr, tiny2.ptr);
// The same entry should be returned from the free list.
const alloc2 = try pool.acquire(.{ .debug = "test" });
try testing.expectEqual(@as(u16, 0), pool.free_list_len);
try testing.expectEqual(alloc1.ptr, alloc2.ptr);
// acquire with .medium should NOT get the tiny arena
const medium = try pool.acquire(.medium, "test-medium");
try testing.expect(medium.ptr != tiny2.ptr);
pool.release(alloc2);
pool.release(tiny2);
pool.release(medium);
}
test "arena pool - multiple concurrent arenas" {
var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16);
test "ArenaPool: respects per-bucket max limits" {
var pool = ArenaPool.init(testing.allocator, .{
.tiny = .{ .max = 1, .retain = 1024 },
.medium = .{ .max = 2, .retain = 1024 },
.large = .{ .max = 1, .retain = 1024 },
});
defer pool.deinit();
const a1 = try pool.acquire(.{ .debug = "test1" });
const a2 = try pool.acquire(.{ .debug = "test2" });
const a3 = try pool.acquire(.{ .debug = "test3" });
// Acquire 3 tiny arenas
const t1 = try pool.acquire(.tiny, "t1");
const t2 = try pool.acquire(.tiny, "t2");
const t3 = try pool.acquire(.tiny, "t3");
// All three must be distinct arenas.
try testing.expect(a1.ptr != a2.ptr);
try testing.expect(a2.ptr != a3.ptr);
try testing.expect(a1.ptr != a3.ptr);
// Release all 3, but only 1 should be kept (tiny_max = 1)
pool.release(t1);
try testing.expectEqual(1, pool.tiny.free_list_len);
pool.release(t2);
try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t2 discarded
pool.release(t3);
try testing.expectEqual(1, pool.tiny.free_list_len); // still 1, t3 discarded
_ = try a1.alloc(u8, 16);
_ = try a2.alloc(u8, 32);
_ = try a3.alloc(u8, 48);
// Acquire 3 medium arenas
const m1 = try pool.acquire(.medium, "m1");
const m2 = try pool.acquire(.medium, "m2");
const m3 = try pool.acquire(.medium, "m3");
pool.release(a1);
pool.release(a2);
pool.release(a3);
try testing.expectEqual(@as(u16, 3), pool.free_list_len);
// Release all 3, but only 2 should be kept (medium_max = 2)
pool.release(m1);
pool.release(m2);
pool.release(m3);
try testing.expectEqual(2, pool.medium.free_list_len);
}
test "arena pool - free list respects max limit" {
// Cap the free list at 1 so the second release discards its arena.
var pool = ArenaPool.init(testing.allocator, 1, 1024 * 16);
test "ArenaPool: reset clears memory without releasing" {
var pool = ArenaPool.init(testing.allocator, .{});
defer pool.deinit();
const a1 = try pool.acquire(.{ .debug = "test1" });
const a2 = try pool.acquire(.{ .debug = "test2" });
pool.release(a1);
try testing.expectEqual(@as(u16, 1), pool.free_list_len);
// The free list is full; a2's arena should be destroyed, not queued.
pool.release(a2);
try testing.expectEqual(@as(u16, 1), pool.free_list_len);
}
test "arena pool - reset clears memory without releasing" {
var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16);
defer pool.deinit();
const alloc = try pool.acquire(.{ .debug = "test" });
const alloc = try pool.acquire(.medium, "test");
const buf = try alloc.alloc(u8, 128);
@memset(buf, 0xFF);
@@ -246,7 +301,7 @@ test "arena pool - reset clears memory without releasing" {
pool.reset(alloc, 0);
// The free list must stay empty; the allocator was not released.
try testing.expectEqual(@as(u16, 0), pool.free_list_len);
try testing.expectEqual(0, pool.medium.free_list_len);
// Allocating again through the same arena must still work.
const buf2 = try alloc.alloc(u8, 64);
@@ -256,18 +311,60 @@ test "arena pool - reset clears memory without releasing" {
pool.release(alloc);
}
test "arena pool - deinit with entries in free list" {
test "ArenaPool: deinit with entries in free list" {
// Verifies that deinit properly cleans up free-listed arenas (no leaks
// detected by the test allocator).
var pool = ArenaPool.init(testing.allocator, 512, 1024 * 16);
var pool = ArenaPool.init(testing.allocator, .{});
const a1 = try pool.acquire(.{ .debug = "test1" });
const a2 = try pool.acquire(.{ .debug = "test2" });
const a1 = try pool.acquire(.tiny, "test1");
const a2 = try pool.acquire(.medium, "test2");
_ = try a1.alloc(u8, 256);
_ = try a2.alloc(u8, 512);
pool.release(a1);
pool.release(a2);
try testing.expectEqual(@as(u16, 2), pool.free_list_len);
try testing.expectEqual(1, pool.tiny.free_list_len);
try testing.expectEqual(1, pool.medium.free_list_len);
pool.deinit();
}
test "ArenaPool: small bucket" {
var pool = ArenaPool.init(testing.allocator, .{
.small = .{ .max = 2, .retain = 4 * 1024 },
});
defer pool.deinit();
const s1 = try pool.acquire(.small, "s1");
const s2 = try pool.acquire(.small, "s2");
const s3 = try pool.acquire(.small, "s3");
pool.release(s1);
pool.release(s2);
pool.release(s3);
try testing.expectEqual(2, pool.small.free_list_len);
}
test "ArenaPool: size-based acquire" {
var pool = ArenaPool.init(testing.allocator, .{});
defer pool.deinit();
// <= 1KB -> tiny
const a = try pool.acquire(500, "fits-tiny");
// <= 4KB -> small
const b = try pool.acquire(2000, "fits-small");
// <= 16KB -> medium
const c = try pool.acquire(8000, "fits-medium");
// > 16KB -> large
const d = try pool.acquire(20000, "fits-large");
pool.release(a);
pool.release(b);
pool.release(c);
pool.release(d);
try testing.expectEqual(1, pool.tiny.free_list_len);
try testing.expectEqual(1, pool.small.free_list_len);
try testing.expectEqual(1, pool.medium.free_list_len);
try testing.expectEqual(1, pool.large.free_list_len);
}

View File

@@ -212,6 +212,20 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
};
}
pub fn blockPrivateNetworks(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.block_private_networks,
else => unreachable,
};
}
pub fn blockCidrs(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.block_cidrs,
else => unreachable,
};
}
pub fn maxConnections(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_connections,
@@ -300,6 +314,9 @@ pub const Common = struct {
web_bot_auth_key_file: ?[]const u8 = null,
web_bot_auth_keyid: ?[]const u8 = null,
web_bot_auth_domain: ?[]const u8 = null,
block_private_networks: bool = false,
block_cidrs: ?[]const u8 = null,
};
/// Pre-formatted HTTP headers for reuse across Http and Client.
@@ -362,6 +379,21 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\ we make requests towards.
\\ Defaults to false.
\\
\\--block-private-networks
\\ Blocks HTTP requests to private/internal IP addresses
\\ after DNS resolution. Useful for sandboxing, multi-tenant
\\ deployments, and preventing access to internal infrastructure
\\ regardless of what triggers the request (JavaScript, HTML
\\ resources, redirects, etc.).
\\ Defaults to false.
\\
\\--block-cidrs
\\ Additional CIDR ranges to block, comma-separated.
\\ Prefix with '-' to allow (exempt from blocking).
\\ e.g. --block-cidrs 169.254.169.254/32,fd00:ec2::254/128
\\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32
\\ Can be used standalone or combined with --block-private-networks.
\\
\\--http-proxy The HTTP proxy to use for all HTTP requests.
\\ A username:password can be included for basic authentication.
\\ Defaults to none.
@@ -1145,5 +1177,19 @@ fn parseCommonArg(
return true;
}
if (std.mem.eql(u8, "--block-private-networks", opt)) {
common.block_private_networks = true;
return true;
}
if (std.mem.eql(u8, "--block-cidrs", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--block-cidrs" });
return error.InvalidArgument;
};
common.block_cidrs = try allocator.dupe(u8, str);
return true;
}
return false;
}

View File

@@ -671,7 +671,7 @@ pub fn getNodeDetails(
if (el.getAttributeSafe(comptime .wrap("href"))) |h| {
const URL = lp.URL;
href = URL.resolve(arena, page.base(), h, .{ .encode = true }) catch h;
href = URL.resolve(arena, page.base(), h, .{ .encoding = page.charset }) catch h;
}
if (el.is(Element.Html.Input)) |input| {

View File

@@ -374,7 +374,8 @@ fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
fn processRequest(self: *Client, req: Request) !void {
if (self.network.cache) |*cache| {
if (req.method == .GET) {
const arena = try self.network.app.arena_pool.acquire(.{ .debug = "HttpClient.processRequest.cache" });
// cache is only used to read the meta data
const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache");
defer self.network.app.arena_pool.release(arena);
var iter = req.headers.iterator();

View File

@@ -207,6 +207,9 @@ base_url: ?[:0]const u8 = null,
// referer header cache.
referer_header: ?[:0]const u8 = null,
// Document charset (canonical name from encoding_rs, static lifetime)
charset: []const u8 = "UTF-8",
// Arbitrary buffer. Need to temporarily lowercase a value? Use this. No lifetime
// guarantee - it's valid until someone else uses it.
buf: [BUF_SIZE]u8 = undefined,
@@ -248,7 +251,7 @@ pub fn init(self: *Page, frame_id: u32, session: *Session, parent: ?*Page) !void
log.debug(.page, "page.init", .{});
}
const call_arena = try session.getArena(.{ .debug = "call_arena" });
const call_arena = try session.getArena(.medium, "call_arena");
errdefer session.releaseArena(call_arena);
const factory = &session.factory;
@@ -429,8 +432,8 @@ pub fn headersForRequest(self: *Page, headers: *HttpClient.Headers) !void {
}
}
pub fn getArena(self: *Page, comptime opts: Session.GetArenaOpts) !Allocator {
return self._session.getArena(opts);
pub fn getArena(self: *Page, size_or_bucket: anytype, debug: []const u8) !Allocator {
return self._session.getArena(size_or_bucket, debug);
}
pub fn releaseArena(self: *Page, allocator: Allocator) void {
@@ -510,7 +513,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
log.warn(.js, "invalid blob", .{ .url = request_url });
return error.BlobNotFound;
};
const parse_arena = try self.getArena(.{ .debug = "Page.parseBlob" });
const parse_arena = try self.getArena(.medium, "Page.parseBlob");
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
parser.parse(blob._slice);
@@ -619,7 +622,7 @@ pub fn scheduleNavigation(self: *Page, request_url: []const u8, opts: NavigateOp
if (self.canScheduleNavigation(std.meta.activeTag(nt)) == false) {
return;
}
const arena = try self._session.getArena(.{ .debug = "scheduleNavigation" });
const arena = try self._session.getArena(.small, "scheduleNavigation");
errdefer self._session.releaseArena(arena);
return self.scheduleNavigationWithArena(arena, request_url, opts, nt);
}
@@ -658,7 +661,7 @@ fn scheduleNavigationWithArena(originator: *Page, arena: Allocator, request_url:
arena,
page_base,
request_url,
.{ .always_dupe = true, .encode = true },
.{ .always_dupe = true, .encoding = originator.charset },
);
break :blk .{ u, false };
};
@@ -962,9 +965,13 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
switch (mime.content_type) {
.text_html => {
self._parse_state = .{ .html = .{
.mime = mime,
} };
// Normalize and store the charset using encoding_rs canonical names
const charset_str = mime.charsetString();
const info = h5e.encoding_for_label(charset_str.ptr, charset_str.len);
if (info.isValid()) {
self.charset = info.name();
}
self._parse_state = .{ .html = .empty };
},
.application_json, .text_javascript, .text_css, .text_plain => {
var arr: std.ArrayList(u8) = .empty;
@@ -979,7 +986,7 @@ fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
}
switch (self._parse_state) {
.html => |*html| try html.buf.appendSlice(self.arena, data),
.html => |*html| try html.appendSlice(self.arena, data),
.text => |*buf| {
// we have to escape the data...
var v = data;
@@ -1022,18 +1029,19 @@ fn pageDoneCallback(ctx: *anyopaque) !void {
});
};
const parse_arena = try self.getArena(.{ .debug = "Page.parse" });
const parse_arena = try self.getArena(.medium, "Page.parse");
defer self.releaseArena(parse_arena);
var parser = Parser.init(parse_arena, self.document.asNode(), self);
switch (self._parse_state) {
.html => |*html_state| {
const raw_html = html_state.buf.items;
if (html_state.needsEncodingConversion()) {
parser.parseWithEncoding(raw_html, html_state.mime.charsetString());
} else {
.html => |*html_buf| {
const raw_html = html_buf.items;
if (std.mem.eql(u8, self.charset, "UTF-8")) {
parser.parse(raw_html);
} else {
parser.parseWithEncoding(raw_html, self.charset);
}
self._script_manager.staticScriptsDone();
self._parse_state = .complete;
@@ -1188,7 +1196,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *IFrame) !void {
self.call_arena, // ok to use, page.navigate dupes this
self.base(),
src,
.{ .encode = true },
.{ .encoding = self.charset },
);
};
@@ -3164,21 +3172,11 @@ const ParseState = union(enum) {
pre,
complete,
err: anyerror,
html: Html,
html: std.ArrayList(u8),
text: std.ArrayList(u8),
image: std.ArrayList(u8),
raw: std.ArrayList(u8),
raw_done: []const u8,
const Html = struct {
mime: Mime,
buf: std.ArrayList(u8) = .empty,
fn needsEncodingConversion(self: *const Html) bool {
const charset = self.mime.charsetString();
return !std.ascii.eqlIgnoreCase(charset, "utf-8") and !std.ascii.eqlIgnoreCase(charset, "utf8");
}
};
};
const LoadState = enum {
@@ -3568,7 +3566,7 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form
// I don't think this is technically correct, but FormData handles it ok
const form_data = try FormData.init(form, submitter_, self);
const arena = try self._session.getArena(.{ .debug = "submitForm" });
const arena = try self._session.getArena(.medium, "submitForm");
errdefer self._session.releaseArena(arena);
const encoding = form_element.getAttributeSafe(comptime .wrap("enctype"));
@@ -3628,9 +3626,6 @@ fn asUint(comptime string: anytype) std.meta.Int(
const testing = @import("../testing.zig");
test "WebApi: Page" {
const filter: testing.LogFilter = .init(&.{ .http, .js });
defer filter.deinit();
try testing.htmlRunner("page", .{});
}

View File

@@ -249,7 +249,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
}
pub fn waitForSelector(self: *Runner, selector: [:0]const u8, timeout_ms: u32) !*Node.Element {
const arena = try self.session.getArena(.{ .debug = "Runner.waitForSelector" });
const arena = try self.session.getArena(.small, "Runner.waitForSelector");
defer self.session.releaseArena(arena);
var timer = try std.time.Timer.start();

View File

@@ -188,7 +188,7 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
var handover = false;
const page = self.page;
const arena = try page.getArena(.{ .debug = "addFromElement" });
const arena = try page.getArena(.large, "SM.addFromElement");
errdefer if (!handover) {
page.releaseArena(arena);
};
@@ -369,7 +369,7 @@ pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const
errdefer _ = self.imported_modules.remove(url);
const page = self.page;
const arena = try page.getArena(.{ .debug = "preloadImport" });
const arena = try page.getArena(.large, "SM.preloadImport");
errdefer page.releaseArena(arena);
const script = try arena.create(Script);
@@ -469,7 +469,7 @@ pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource {
pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void {
const page = self.page;
const arena = try page.getArena(.{ .debug = "getAsyncImport" });
const arena = try page.getArena(.large, "SM.getAsyncImport");
errdefer page.releaseArena(arena);
const script = try arena.create(Script);

View File

@@ -110,10 +110,10 @@ pub fn init(self: *Session, browser: *Browser, notification: *Notification) !voi
const allocator = browser.app.allocator;
const arena_pool = browser.arena_pool;
const arena = try arena_pool.acquire(.{ .debug = "Session" });
const arena = try arena_pool.acquire(.small, "Session");
errdefer arena_pool.release(arena);
const page_arena = try arena_pool.acquire(.{ .debug = "Session.page_arena" });
const page_arena = try arena_pool.acquire(.large, "Session.page_arena");
errdefer arena_pool.release(page_arena);
self.* = .{
@@ -186,12 +186,8 @@ pub fn removePage(self: *Session) void {
}
}
pub const GetArenaOpts = struct {
debug: []const u8,
};
pub fn getArena(self: *Session, opts: GetArenaOpts) !Allocator {
return self.arena_pool.acquire(.{ .debug = opts.debug });
pub fn getArena(self: *Session, size_or_bucket: anytype, debug: []const u8) !Allocator {
return self.arena_pool.acquire(size_or_bucket, debug);
}
pub fn releaseArena(self: *Session, allocator: Allocator) void {

View File

@@ -66,7 +66,7 @@ dirty: bool = false,
pub fn init(page: *Page) !StyleManager {
return .{
.page = page,
.arena = try page.getArena(.{ .debug = "StyleManager" }),
.arena = try page.getArena(.medium, "StyleManager"),
};
}

View File

@@ -19,16 +19,19 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ResolveOpts = struct {
encode: bool = false,
pub const ResolveOpts = struct {
/// null = don't encode, "UTF-8" = standard percent encoding,
/// other charset = encode query string using that charset with NCR fallback
encoding: ?[]const u8 = null,
always_dupe: bool = false,
};
// path is anytype, so that it can be used with both []const u8 and [:0]const u8
pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, comptime opts: ResolveOpts) ![:0]const u8 {
pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, opts: ResolveOpts) ![:0]const u8 {
const PT = @TypeOf(source_path);
var path: [:0]const u8 = if (comptime !isNullTerminated(PT) or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path;
const needs_dupe = comptime !isNullTerminated(PT);
var path: [:0]const u8 = if (needs_dupe or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path;
if (base.len == 0) {
return processResolved(allocator, path, opts);
@@ -186,14 +189,12 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, c
return processResolved(allocator, out[0..out_i :0], opts);
}
fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 {
if (!comptime opts.encode) {
return url;
}
return ensureEncoded(allocator, url);
fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) ![:0]const u8 {
const encoding = opts.encoding orelse return url;
return ensureEncoded(allocator, url, encoding);
}
pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8, encoding: []const u8) ![:0]const u8 {
const scheme_end = std.mem.indexOf(u8, url, "://");
const authority_start = if (scheme_end) |end| end + 3 else 0;
const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url;
@@ -205,18 +206,18 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end;
const path_to_encode = url[path_start..path_end];
// Path is always UTF-8 percent encoded per URL spec
const encoded_path = try percentEncodeSegment(allocator, path_to_encode, .path);
// Query string uses document encoding
const encoded_query = if (query_start) |qs| blk: {
const query_to_encode = url[qs + 1 .. query_end];
const encoded = try percentEncodeSegment(allocator, query_to_encode, .query);
break :blk encoded;
break :blk try encodeQueryString(allocator, query_to_encode, encoding);
} else null;
const encoded_fragment = if (fragment_start) |fs| blk: {
const fragment_to_encode = url[fs + 1 ..];
const encoded = try percentEncodeSegment(allocator, fragment_to_encode, .query);
break :blk encoded;
break :blk try percentEncodeSegment(allocator, fragment_to_encode, .query);
} else null;
if (encoded_path.ptr == path_to_encode.ptr and
@@ -242,7 +243,7 @@ pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
return buf.items[0 .. buf.items.len - 1 :0];
}
const EncodeSet = enum { path, query, userinfo, fragment };
const EncodeSet = enum { path, query, query_legacy, userinfo, fragment };
fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 {
// Check if encoding is needed
@@ -285,17 +286,65 @@ fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime enco
return buf.items;
}
const h5e = @import("parser/html5ever.zig");
/// Encode a query string using the specified encoding.
/// For UTF-8, this is standard percent encoding.
/// For legacy encodings, unmappable characters are replaced with NCRs (&#codepoint;).
fn encodeQueryString(allocator: Allocator, query: []const u8, encoding: []const u8) ![]const u8 {
// For UTF-8, use standard percent encoding
if (std.mem.eql(u8, encoding, "UTF-8")) {
return percentEncodeSegment(allocator, query, .query);
}
// For legacy encodings, first encode to the target charset with NCR fallback
const enc_info = h5e.encoding_for_label(encoding.ptr, encoding.len);
if (!enc_info.isValid()) {
// Unknown encoding, fall back to UTF-8
return percentEncodeSegment(allocator, query, .query);
}
// Calculate max buffer size for encoded output
const max_encoded_len = h5e.encoding_max_encode_buffer_length(enc_info.handle.?, query.len);
if (max_encoded_len == 0) {
return percentEncodeSegment(allocator, query, .query);
}
const encode_buf = try allocator.alloc(u8, max_encoded_len);
defer allocator.free(encode_buf);
// Encode UTF-8 to legacy encoding with NCR fallback
const result = h5e.encoding_encode_with_ncr(
enc_info.handle.?,
query.ptr,
query.len,
encode_buf.ptr,
encode_buf.len,
);
if (!result.isSuccess()) {
// Encoding failed, fall back to UTF-8
return percentEncodeSegment(allocator, query, .query);
}
// Now percent-encode the result using query_legacy to preserve NCRs
const encoded_bytes = encode_buf[0..result.bytes_written];
return percentEncodeSegment(allocator, encoded_bytes, .query_legacy);
}
fn shouldPercentEncode(c: u8, comptime encode_set: EncodeSet) bool {
return switch (c) {
// Unreserved characters (RFC 3986)
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false,
// sub-delims allowed in path/query but some must be encoded in userinfo
'!', '$', '&', '\'', '(', ')', '*', '+', ',' => false,
';', '=' => encode_set == .userinfo,
// sub-delims allowed in path/query but some must be encoded in userinfo/query_legacy
'!', '$', '\'', '(', ')', '*', '+', ',' => false,
// '&' and ';' must be encoded for legacy encoding (to preserve NCRs like &#nnnnn;)
'&', ';' => encode_set == .userinfo or encode_set == .query_legacy,
'=' => encode_set == .userinfo,
// Separators: userinfo must encode these
'/', ':', '@' => encode_set == .userinfo,
// '?' is allowed in queries only
'?' => encode_set != .query,
'?' => encode_set != .query and encode_set != .query_legacy,
// '#' is allowed in fragments only
'#' => encode_set != .fragment,
// Everything else needs encoding (including space)
@@ -1130,7 +1179,7 @@ test "URL: ensureEncoded" {
};
for (cases) |case| {
const result = try ensureEncoded(testing.arena_allocator, case.url);
const result = try ensureEncoded(testing.arena_allocator, case.url, "UTF-8");
try testing.expectString(case.expected, result);
}
}
@@ -1296,7 +1345,7 @@ test "URL: resolve with encoding" {
};
for (cases) |case| {
const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true });
const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encoding = "UTF-8" });
try testing.expectString(case.expected, result);
}
}

View File

@@ -182,7 +182,7 @@ pub fn collectInteractiveElements(
.id = el.getAttributeSafe(comptime .wrap("id")),
.class = el.getAttributeSafe(comptime .wrap("class")),
.href = if (el.getAttributeSafe(comptime .wrap("href"))) |href|
URL.resolve(arena, page.base(), href, .{ .encode = true }) catch href
URL.resolve(arena, page.base(), href, .{ .encoding = page.charset }) catch href
else
null,
.input_type = getInputType(el),

View File

@@ -261,7 +261,7 @@ pub const ContextParams = struct {
};
pub fn createContext(self: *Env, page: *Page, params: ContextParams) !*Context {
const context_arena = try self.app.arena_pool.acquire(.{ .debug = params.debug_name });
const context_arena = try self.app.arena_pool.acquire(.large, params.debug_name);
errdefer self.app.arena_pool.release(context_arena);
const isolate = self.isolate;

View File

@@ -1479,7 +1479,7 @@ fn createFinalizerCallback(
) !*Session.FinalizerCallback {
const session = self.ctx.session;
const arena = try session.getArena(.{ .debug = "FinalizerCallback" });
const arena = try session.getArena(.tiny, "FinalizerCallback");
errdefer session.releaseArena(arena);
const fc = try arena.create(Session.FinalizerCallback);

View File

@@ -45,7 +45,7 @@ key: []const u8,
security_token: v8.Global,
pub fn init(app: *App, isolate: js.Isolate, key: []const u8) !*Origin {
const arena = try app.arena_pool.acquire(.{ .debug = "Origin" });
const arena = try app.arena_pool.acquire(.tiny, "Origin");
errdefer app.arena_pool.release(arena);
var hs: js.HandleScope = undefined;

View File

@@ -126,6 +126,7 @@ pub fn create() !Snapshot {
var data_start: usize = 0;
const isolate = v8.v8__SnapshotCreator__getIsolate(snapshot_creator).?;
defer v8.v8__Isolate__LowMemoryNotification(isolate);
{
// CreateBlob, which we'll call once everything is setup, MUST NOT

View File

@@ -44,11 +44,11 @@ fn _toSlice(self: String, comptime null_terminate: bool, allocator: Allocator) !
const handle = self.handle;
const isolate = local.isolate.handle;
const len = v8.v8__String__Utf8Length(handle, isolate);
const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(len), 0) else allocator.alloc(u8, @intCast(len)));
const l = v8.v8__String__Utf8Length(handle, isolate);
const buf = try (if (comptime null_terminate) allocator.allocSentinel(u8, @intCast(l), 0) else allocator.alloc(u8, @intCast(l)));
const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8);
if (comptime IS_DEBUG) {
std.debug.assert(n == len);
std.debug.assert(n == l);
}
return buf;
@@ -64,32 +64,32 @@ pub fn toSSOWithAlloc(self: String, allocator: Allocator) !SSO {
const handle = self.handle;
const isolate = self.local.isolate.handle;
const len: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate));
const l: usize = @intCast(v8.v8__String__Utf8Length(handle, isolate));
if (len <= 12) {
if (l <= 12) {
var content: [12]u8 = undefined;
const n = v8.v8__String__WriteUtf8(handle, isolate, &content[0], content.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8);
if (comptime IS_DEBUG) {
std.debug.assert(n == len);
std.debug.assert(n == l);
}
// Weird that we do this _after_, but we have to..I've seen weird issues
// in ReleaseMode where v8 won't write to content if it starts off zero
// initiated
@memset(content[len..], 0);
return .{ .len = @intCast(len), .payload = .{ .content = content } };
@memset(content[l..], 0);
return .{ .len = @intCast(l), .payload = .{ .content = content } };
}
const buf = try allocator.alloc(u8, len);
const buf = try allocator.alloc(u8, l);
const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8);
if (comptime IS_DEBUG) {
std.debug.assert(n == len);
std.debug.assert(n == l);
}
var prefix: [4]u8 = @splat(0);
@memcpy(&prefix, buf[0..4]);
return .{
.len = @intCast(len),
.len = @intCast(l),
.payload = .{ .heap = .{
.prefix = prefix,
.ptr = buf.ptr,
@@ -103,9 +103,13 @@ pub fn format(self: String, writer: *std.Io.Writer) !void {
const isolate = local.isolate.handle;
var small: [1024]u8 = undefined;
const len = v8.v8__String__Utf8Length(handle, isolate);
var buf = if (len < 1024) &small else local.call_arena.alloc(u8, @intCast(len)) catch return error.WriteFailed;
const l = v8.v8__String__Utf8Length(handle, isolate);
var buf = if (l < 1024) &small else local.call_arena.alloc(u8, @intCast(l)) catch return error.WriteFailed;
const n = v8.v8__String__WriteUtf8(handle, isolate, buf.ptr, buf.len, v8.NO_NULL_TERMINATION | v8.REPLACE_INVALID_UTF8);
return writer.writeAll(buf[0..n]);
}
pub fn len(self: String) usize {
return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle));
}

View File

@@ -278,7 +278,8 @@ const Context = struct {
}
try self.writer.writeAll("](");
if (el.getAttributeSafe(comptime .wrap("src"))) |src| {
const absolute_src = URL.resolve(self.page.call_arena, self.page.base(), src, .{ .encode = true }) catch src;
const page = self.page;
const absolute_src = URL.resolve(page.call_arena, page.base(), src, .{ .encoding = page.charset }) catch src;
try self.writer.writeAll(absolute_src);
}
try self.writer.writeAll(")");
@@ -286,13 +287,14 @@ const Context = struct {
return;
},
.anchor => {
const page = self.page;
const info = analyzeContent(el.asNode());
const label = getAnchorLabel(el);
const href_raw = el.getAttributeSafe(comptime .wrap("href"));
if (!info.has_visible and label == null and href_raw == null) return;
const href = if (href_raw) |h| URL.resolve(self.page.call_arena, self.page.base(), h, .{ .encode = true }) catch h else null;
const href = if (href_raw) |h| URL.resolve(page.call_arena, page.base(), h, .{ .encoding = page.charset }) catch h else null;
if (info.has_block) {
try self.renderChildren(el.asNode());

View File

@@ -216,3 +216,89 @@ pub extern "c" fn xml5ever_parse_document(
appendBeforeSiblingCallback: *const fn (ctx: *anyopaque, sibling_ref: *anyopaque, NodeOrText) callconv(.c) void,
appendBasedOnParentNodeCallback: *const fn (ctx: *anyopaque, element_ref: *anyopaque, prev_element_ref: *anyopaque, NodeOrText) callconv(.c) void,
) void;
// General encoding api
pub const EncodingInfo = extern struct {
found: u8,
handle: ?*anyopaque,
name_len: usize,
name_ptr: [*]const u8,
pub fn isValid(self: *const EncodingInfo) bool {
return self.found != 0;
}
pub fn name(self: *const EncodingInfo) []const u8 {
if (self.name_len == 0) {
return "";
}
return self.name_ptr[0..self.name_len];
}
};
pub const DecodeResult = extern struct {
had_errors: u8,
bytes_read: usize,
bytes_written: usize,
pub fn hadErrors(self: *const DecodeResult) bool {
return self.had_errors != 0;
}
};
pub extern "c" fn encoding_for_label(
label: [*]const u8,
label_len: usize,
) EncodingInfo;
pub extern "c" fn encoding_max_utf8_buffer_length(
handle: *anyopaque,
input_len: usize,
) usize;
pub extern "c" fn encoding_decode(
handle: *anyopaque,
input: ?[*]const u8,
input_len: usize,
output: [*]u8,
output_len: usize,
is_last: u8,
) DecodeResult;
// Streaming decoder API
pub extern "c" fn encoding_decoder_new(handle: *anyopaque) ?*anyopaque;
pub extern "c" fn encoding_decoder_decode(
decoder: *anyopaque,
input: ?[*]const u8,
input_len: usize,
output: [*]u8,
output_len: usize,
is_last: u8,
) DecodeResult;
pub extern "c" fn encoding_decoder_free(decoder: *anyopaque) void;
// Encoding API (UTF-8 to legacy encoding with NCR fallback)
pub const EncodeResult = extern struct {
status: u8,
bytes_read: usize,
bytes_written: usize,
pub fn isSuccess(self: *const EncodeResult) bool {
return self.status == 0;
}
};
pub extern "c" fn encoding_encode_with_ncr(
handle: *anyopaque,
input: ?[*]const u8,
input_len: usize,
output: [*]u8,
output_capacity: usize,
) EncodeResult;
pub extern "c" fn encoding_max_encode_buffer_length(
handle: *anyopaque,
input_len: usize,
) usize;

View File

@@ -288,7 +288,7 @@ fn collectLink(
) !void {
const rel = el.getAttributeSafe(comptime .wrap("rel")) orelse return;
const raw_href = el.getAttributeSafe(comptime .wrap("href")) orelse return;
const href = URL.resolve(arena, page.base(), raw_href, .{ .encode = true }) catch raw_href;
const href = URL.resolve(arena, page.base(), raw_href, .{ .encoding = page.charset }) catch raw_href;
if (std.ascii.eqlIgnoreCase(rel, "alternate")) {
try alternate.append(arena, .{

View File

@@ -18,6 +18,10 @@
testing.expectEqual("visible", document.visibilityState);
testing.expectEqual(false, document.prerendering);
testing.expectEqual(undefined, Document.prerendering);
// characterSet should return canonical encoding name
testing.expectEqual("UTF-8", document.characterSet);
testing.expectEqual("UTF-8", document.charset);
testing.expectEqual("UTF-8", document.inputEncoding);
</script>
<script id=headAndbody>

View File

@@ -11,7 +11,6 @@
testing.expectEqual('', d1.decode());
testing.expectEqual('香料', d1.decode(new Uint8Array([233, 166, 153, 230, 150, 153])));
testing.expectEqual('香料', d1.decode(new Uint8Array([0xEF, 0xBB, 0xBF, 233, 166, 153, 230, 150, 153])));
testing.expectEqual('<27>4', d1.decode(new Uint8Array([249, 52])));
{
const buffer = new ArrayBuffer(6);
@@ -38,7 +37,7 @@
}
let d2 = new TextDecoder('utf8', {fatal: true})
testing.expectError('Error: InvalidUtf8', () => {
testing.expectError('TypeError', () => {
let data = new Uint8Array([241, 241, 159, 172]);
d2.decode(data);
});
@@ -46,8 +45,8 @@
<script id=stream>
let d3 = new TextDecoder();
testing.expectEqual('', d2.decode(new Uint8Array([226, 153]), { stream: true }));
testing.expectEqual('♥', d2.decode(new Uint8Array([165]), { stream: true }));
testing.expectEqual('', d3.decode(new Uint8Array([226, 153]), { stream: true }));
testing.expectEqual('♥', d3.decode(new Uint8Array([165]), { stream: true }));
</script>
<script id=slice>
@@ -60,5 +59,69 @@
arr1[4] = 84;
arr1[5] = 85;
arr1[6] = 86;
testing.expectEqual('RST', d3.decode(new Uint8Array(buf1, 2, 3)));
let d4 = new TextDecoder();
testing.expectEqual('RST', d4.decode(new Uint8Array(buf1, 2, 3)));
</script>
<script id=legacy_encodings>
// GBK (Chinese)
let gbk = new TextDecoder('gbk');
testing.expectEqual('gbk', gbk.encoding);
testing.expectEqual('中文', gbk.decode(new Uint8Array([0xD6, 0xD0, 0xCE, 0xC4])));
// Shift_JIS (Japanese)
let sjis = new TextDecoder('shift_jis');
testing.expectEqual('shift_jis', sjis.encoding);
testing.expectEqual('日本語', sjis.decode(new Uint8Array([0x93, 0xFA, 0x96, 0x7B, 0x8C, 0xEA])));
// EUC-JP (Japanese)
let eucjp = new TextDecoder('euc-jp');
testing.expectEqual('euc-jp', eucjp.encoding);
// ISO-8859-1 (Latin-1)
let latin1 = new TextDecoder('iso-8859-1');
testing.expectEqual('windows-1252', latin1.encoding); // ISO-8859-1 maps to windows-1252 per spec
testing.expectEqual('café', latin1.decode(new Uint8Array([0x63, 0x61, 0x66, 0xe9])));
// Big5 (Traditional Chinese)
let big5 = new TextDecoder('big5');
testing.expectEqual('big5', big5.encoding);
// UTF-16LE
let utf16le = new TextDecoder('utf-16le');
testing.expectEqual('utf-16le', utf16le.encoding);
testing.expectEqual('AB', utf16le.decode(new Uint8Array([0x41, 0x00, 0x42, 0x00])));
// UTF-16BE
let utf16be = new TextDecoder('utf-16be');
testing.expectEqual('utf-16be', utf16be.encoding);
testing.expectEqual('AB', utf16be.decode(new Uint8Array([0x00, 0x41, 0x00, 0x42])));
</script>
<script id=invalid_label>
// Test invalid encoding label
try {
new TextDecoder('invalid-encoding');
testing.fail();
} catch (e) {
testing.expectEqual(true, e.toString().includes('RangeError'));
}
// Test 'replacement' encoding is rejected
try {
new TextDecoder('replacement');
testing.fail();
} catch (e) {
testing.expectEqual(true, e.toString().includes('RangeError'));
}
</script>
<script id=label_variations>
// Case insensitive
let upper = new TextDecoder('UTF-8');
testing.expectEqual('utf-8', upper.encoding);
// Leading/trailing whitespace
let ws = new TextDecoder(' utf-8 ');
testing.expectEqual('utf-8', ws.encoding);
</script>

View File

@@ -5,6 +5,9 @@
<script id=TextEncoder>
var encoder = new TextEncoder();
testing.expectEqual('utf-8', encoder.encoding);
testing.expectEqual([], Array.from(encoder.encode()));
testing.expectEqual([110, 117, 108, 108], Array.from(encoder.encode(null)));
testing.expectEqual([], Array.from(encoder.encode(undefined)));
testing.expectEqual([226, 130, 172], Array.from(encoder.encode('€')));
testing.expectEqual([111,118,101,114,32,57,48,48,48], encoder.encode("over 9000"));
</script>

View File

@@ -11,6 +11,10 @@
testing.onload(() => {
// GBK-encoded "中文" should be decoded to UTF-8
testing.expectEqual('中文', iframe.contentDocument.getElementById('test').textContent);
// document.characterSet should return canonical encoding name
testing.expectEqual('GBK', iframe.contentDocument.characterSet);
testing.expectEqual('GBK', iframe.contentDocument.charset);
testing.expectEqual('GBK', iframe.contentDocument.inputEncoding);
});
}
</script>
@@ -73,3 +77,32 @@
});
}
</script>
<script id="anchor_href_encoding_with_ncr">
{
// Test that anchor.href encodes unmappable characters as NCRs in non-UTF-8 documents.
// When a character can't be represented in the document's encoding, it should become &#nnnnn;
// Per WHATWG URL Standard, query strings use document encoding with NCR fallback.
const iframe = document.createElement('iframe');
document.body.appendChild(iframe);
iframe.src = 'encoding/gbk.html';
testing.onload(() => {
testing.expectEqual('GBK', iframe.contentDocument.characterSet);
// Test 1: U+3D34 (㴴) - a Han character NOT in GBK, should become NCR &#15668;
const anchor = iframe.contentDocument.createElement('a');
iframe.contentDocument.body.appendChild(anchor);
anchor.href = 'http://example.com/?q=\u3D34';
// The NCR &#15668; percent-encoded is %26%2315668%3B
testing.expectEqual('http://example.com/?q=%26%2315668%3B', anchor.href);
// Test 2: U+4E2D (中) - IS in GBK, should encode to GBK bytes D6D0 then percent-encode
const anchor2 = iframe.contentDocument.createElement('a');
iframe.contentDocument.body.appendChild(anchor2);
anchor2.href = 'http://example.com/?q=\u4E2D';
// GBK encoding of 中 is D6 D0, percent-encoded as %D6%D0
testing.expectEqual('http://example.com/?q=%D6%D0', anchor2.href);
});
}
</script>

View File

@@ -37,7 +37,13 @@
function expectError(expected, fn) {
withError((err) => {
expectEqual(true, err.toString().includes(expected));
if (!err.toString().includes(expected)) {
console.error(`Expecte error to contains: ${expected}, was: ${err.toString()}`);
expectEqual(true, false);
} else {
// to record a successful case
expectTrue(true);
}
}, fn);
}

View File

@@ -77,7 +77,15 @@ pub fn initWithMimeValidation(
validate_mime: bool,
page: *Page,
) !*Blob {
const arena = try page.getArena(.{ .debug = "Blob" });
const data_len = blk: {
const parts = maybe_blob_parts orelse break :blk 0;
var size: usize = 0;
for (parts) |p| {
size += p.len;
}
break :blk size;
};
const arena = try page.getArena(256 + data_len, "Blob");
errdefer page.releaseArena(arena);
const options: InitOptions = maybe_options orelse .{};

View File

@@ -50,7 +50,7 @@ pub fn parseFromString(
@"image/svg+xml",
}, mime_type) orelse return error.NotSupported;
const arena = try page.getArena(.{ .debug = "DOMParser.parseFromString" });
const arena = try page.getArena(.medium, "DOMParser.parseFromString");
defer page.releaseArena(arena);
return switch (target_mime) {

View File

@@ -666,7 +666,7 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
page._parse_mode = .document_write;
defer page._parse_mode = previous_parse_mode;
const arena = try page.getArena(.{ .debug = "Document.write" });
const arena = try page.getArena(.medium, "Document.write");
defer page.releaseArena(arena);
var parser = Parser.init(arena, fragment_node, page);
@@ -1068,10 +1068,15 @@ pub const JsApi = struct {
pub const hasFocus = bridge.function(Document.hasFocus, .{});
pub const prerendering = bridge.property(false, .{ .template = false });
pub const characterSet = bridge.property("UTF-8", .{ .template = false });
pub const charset = bridge.property("UTF-8", .{ .template = false });
pub const inputEncoding = bridge.property("UTF-8", .{ .template = false });
pub const characterSet = bridge.accessor(getCharacterSet, null, .{});
pub const charset = bridge.accessor(getCharacterSet, null, .{});
pub const inputEncoding = bridge.accessor(getCharacterSet, null, .{});
pub const compatMode = bridge.property("CSS1Compat", .{ .template = false });
fn getCharacterSet(self: *const Document) []const u8 {
const doc_page = self._page orelse return "UTF-8";
return doc_page.charset;
}
pub const referrer = bridge.property("", .{ .template = false });
};

View File

@@ -90,14 +90,14 @@ pub const Options = struct {
};
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*Event {
const arena = try page.getArena(.{ .debug = "Event" });
const arena = try page.getArena(.tiny, "Event");
errdefer page.releaseArena(arena);
const str = try String.init(arena, typ, .{});
return initWithTrusted(arena, str, opts_, false);
}
pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*Event {
const arena = try page.getArena(.{ .debug = "Event.trusted" });
const arena = try page.getArena(.tiny, "Event.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, opts_, true);
}

View File

@@ -31,7 +31,7 @@ _proto: *Blob,
// TODO: Implement File API.
pub fn init(page: *Page) !*File {
const arena = try page.getArena(.{ .debug = "File" });
const arena = try page.getArena(.tiny, "File");
errdefer page.releaseArena(arena);
return page._factory.blob(arena, File{ ._proto = undefined });
}

View File

@@ -63,7 +63,7 @@ const Result = union(enum) {
};
pub fn init(page: *Page) !*FileReader {
const arena = try page.getArena(.{ .debug = "FileReader" });
const arena = try page.getArena(.tiny, "FileReader");
errdefer page.releaseArena(arena);
return page._factory.eventTargetWithAllocator(arena, FileReader{

View File

@@ -71,7 +71,7 @@ pub const ObserverInit = struct {
};
pub fn init(callback: js.Function.Temp, options: ?ObserverInit, page: *Page) !*IntersectionObserver {
const arena = try page.getArena(.{ .debug = "IntersectionObserver" });
const arena = try page.getArena(.medium, "IntersectionObserver");
errdefer page.releaseArena(arena);
const opts = options orelse ObserverInit{};
@@ -266,7 +266,7 @@ fn checkIntersection(self: *IntersectionObserver, target: *Element, page: *Page)
(was_intersecting_opt != null and was_intersecting_opt.? != is_now_intersecting);
if (should_report) {
const arena = try page.getArena(.{ .debug = "IntersectionObserverEntry" });
const arena = try page.getArena(.tiny, "IntersectionObserverEntry");
errdefer page.releaseArena(arena);
const entry = try arena.create(IntersectionObserverEntry);

View File

@@ -76,7 +76,7 @@ pub const ObserveOptions = struct {
};
pub fn init(callback: js.Function.Temp, page: *Page) !*MutationObserver {
const arena = try page.getArena(.{ .debug = "MutationObserver" });
const arena = try page.getArena(.medium, "MutationObserver");
errdefer page.releaseArena(arena);
const self = try arena.create(MutationObserver);
@@ -227,7 +227,7 @@ pub fn notifyAttributeChange(
}
}
const arena = try page.getArena(.{ .debug = "MutationRecord" });
const arena = try page.getArena(.tiny, "MutationRecord");
const record = try arena.create(MutationRecord);
record.* = .{
._arena = arena,
@@ -271,7 +271,7 @@ pub fn notifyCharacterDataChange(
continue;
}
const arena = try page.getArena(.{ .debug = "MutationRecord" });
const arena = try page.getArena(.tiny, "MutationRecord");
const record = try arena.create(MutationRecord);
record.* = .{
._arena = arena,
@@ -318,7 +318,7 @@ pub fn notifyChildListChange(
continue;
}
const arena = try page.getArena(.{ .debug = "MutationRecord" });
const arena = try page.getArena(.tiny, "MutationRecord");
const record = try arena.create(MutationRecord);
record.* = .{
._arena = arena,

View File

@@ -22,6 +22,7 @@ const String = @import("../../string.zig").String;
const js = @import("../js/js.zig");
const Page = @import("../Page.zig");
const URL = @import("../URL.zig");
const reflect = @import("../reflect.zig");
const EventTarget = @import("EventTarget.zig");
@@ -511,6 +512,18 @@ pub fn ownerPage(self: *const Node, default: *Page) *Page {
return doc._page orelse default;
}
pub const ResolveURLOpts = struct {
allocator: ?Allocator = null,
};
// Resolve a URL relative to this node's owning document.
// Uses the document's charset for query string encoding (with NCR fallback for unmappable chars).
pub fn resolveURL(self: *const Node, url: anytype, page: *Page, opts: ResolveURLOpts) ![:0]const u8 {
const owner_page = self.ownerPage(page);
const allocator = opts.allocator orelse page.call_arena;
return URL.resolve(allocator, owner_page.base(), url, .{ .encoding = owner_page.charset });
}
pub fn isSameDocumentAs(self: *const Node, other: *const Node, page: *const Page) bool {
// Get the root document for each node
const self_doc = if (self._type == .document) self._type.document else self.ownerDocument(page);

View File

@@ -38,7 +38,7 @@ const QueryDescriptor = struct {
};
// We always report 'prompt' (the default safe value — neither granted nor denied).
pub fn query(_: *const Permissions, qd: QueryDescriptor, page: *Page) !js.Promise {
const arena = try page.getArena(.{ .debug = "PermissionStatus" });
const arena = try page.getArena(.tiny, "PermissionStatus");
errdefer page.releaseArena(arena);
const status = try arena.create(PermissionStatus);

View File

@@ -33,7 +33,7 @@ const Range = @This();
_proto: *AbstractRange,
pub fn init(page: *Page) !*Range {
const arena = try page.getArena(.{ .debug = "Range" });
const arena = try page.getArena(.medium, "Range");
errdefer page.releaseArena(arena);
return page._factory.abstractRange(arena, Range{ ._proto = undefined }, page);
}
@@ -312,7 +312,7 @@ pub fn intersectsNode(self: *const Range, node: *Node) bool {
}
pub fn cloneRange(self: *const Range, page: *Page) !*Range {
const arena = try page.getArena(.{ .debug = "Range.clone" });
const arena = try page.getArena(.medium, "Range.clone");
errdefer page.releaseArena(arena);
const clone = try page._factory.abstractRange(arena, Range{ ._proto = undefined }, page);

View File

@@ -407,7 +407,7 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons
const target_page = self._page;
const source_window = target_page.js.getIncumbent().window;
const arena = try target_page.getArena(.{ .debug = "Window.postMessage" });
const arena = try target_page.getArena(.medium, "Window.postMessage");
errdefer target_page.releaseArena(arena);
// Origin should be the source window's origin (where the message came from)
@@ -645,7 +645,7 @@ fn scheduleCallback(self: *Window, cb: js.Function.Temp, delay_ms: u32, opts: Sc
return error.TooManyTimeout;
}
const arena = try page.getArena(.{ .debug = "Window.schedule" });
const arena = try page.getArena(.tiny, "Window.schedule");
errdefer page.releaseArena(arena);
const timer_id = self._timer_id +% 1;

View File

@@ -52,7 +52,7 @@ _playState: PlayState = .idle,
//
// TODO add support for effect and timeline
pub fn init(page: *Page) !*Animation {
const arena = try page.getArena(.{ .debug = "Animation" });
const arena = try page.getArena(.tiny, "Animation");
errdefer page.releaseArena(arena);
const self = try arena.create(Animation);

View File

@@ -39,7 +39,7 @@ pub const ValueIterator = GenericIterator(Iterator, "1");
pub const EntryIterator = GenericIterator(Iterator, null);
pub fn init(node: *Node, page: *Page) !*ChildNodes {
const arena = try page.getArena(.{ .debug = "ChildNodes" });
const arena = try page.getArena(.small, "ChildNodes");
errdefer page.releaseArena(arena);
const self = try arena.create(ChildNodes);

View File

@@ -33,7 +33,7 @@ _family: []const u8,
pub fn init(family: []const u8, source: []const u8, page: *Page) !*FontFace {
_ = source;
const arena = try page.getArena(.{ .debug = "FontFace" });
const arena = try page.getArena(.tiny, "FontFace");
errdefer page.releaseArena(arena);
const self = try arena.create(FontFace);

View File

@@ -34,7 +34,7 @@ _proto: *EventTarget,
_arena: Allocator,
pub fn init(page: *Page) !*FontFaceSet {
const arena = try page.getArena(.{ .debug = "FontFaceSet" });
const arena = try page.getArena(.tiny, "FontFaceSet");
errdefer page.releaseArena(arena);
return page._factory.eventTargetWithAllocator(arena, FontFaceSet{

View File

@@ -292,7 +292,7 @@ pub fn insertAdjacentHTML(
});
const doc_node = doc.asNode();
const arena = try page.getArena(.{ .debug = "HTML.insertAdjacentHTML" });
const arena = try page.getArena(.medium, "HTML.insertAdjacentHTML");
defer page.releaseArena(arena);
const Parser = @import("../../parser/Parser.zig");

View File

@@ -39,12 +39,11 @@ pub fn asNode(self: *Anchor) *Node {
}
pub fn getHref(self: *Anchor, page: *Page) ![]const u8 {
const element = self.asElement();
const href = element.getAttributeSafe(comptime .wrap("href")) orelse return "";
const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return "";
if (href.len == 0) {
return "";
}
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
return self.asNode().resolveURL(href, page, .{});
}
pub fn setHref(self: *Anchor, value: []const u8, page: *Page) !void {
@@ -203,7 +202,7 @@ fn getResolvedHref(self: *Anchor, page: *Page) !?[:0]const u8 {
if (href.len == 0) {
return null;
}
return try URL.resolve(page.call_arena, page.base(), href, .{});
return try self.asNode().resolveURL(href, page, .{});
}
pub const JsApi = struct {

View File

@@ -97,7 +97,7 @@ pub fn getAction(self: *Form, page: *Page) ![]const u8 {
if (action.len == 0) {
return page.url;
}
return URL.resolve(page.call_arena, page.base(), action, .{ .encode = true });
return element.asNode().resolveURL(action, page, .{});
}
pub fn setAction(self: *Form, value: []const u8, page: *Page) !void {

View File

@@ -48,9 +48,9 @@ pub fn getContentDocument(self: *const IFrame) ?*Document {
return window._document;
}
pub fn getSrc(self: *const IFrame, page: *Page) ![:0]const u8 {
pub fn getSrc(self: *IFrame, page: *Page) ![:0]const u8 {
if (self._src.len == 0) return "";
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
return self.asNode().resolveURL(self._src, page, .{});
}
pub fn setSrc(self: *IFrame, src: []const u8, page: *Page) !void {

View File

@@ -40,9 +40,7 @@ pub fn getSrc(self: *const Image, page: *Page) ![]const u8 {
if (src.len == 0) {
return "";
}
// Always resolve the src against the page URL
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
return element.asConstNode().resolveURL(src, page, .{});
}
pub fn setSrc(self: *Image, value: []const u8, page: *Page) !void {

View File

@@ -44,9 +44,7 @@ pub fn getHref(self: *Link, page: *Page) ![]const u8 {
if (href.len == 0) {
return "";
}
// Always resolve the href against the page URL
return URL.resolve(page.call_arena, page.base(), href, .{ .encode = true });
return element.asNode().resolveURL(href, page, .{});
}
pub fn setHref(self: *Link, value: []const u8, page: *Page) !void {

View File

@@ -235,8 +235,7 @@ pub fn getSrc(self: *const Media, page: *Page) ![]const u8 {
if (src.len == 0) {
return "";
}
const URL = @import("../../URL.zig");
return URL.resolve(page.call_arena, page.base(), src, .{ .encode = true });
return element.asConstNode().resolveURL(src, page, .{});
}
pub fn setSrc(self: *Media, value: []const u8, page: *Page) !void {

View File

@@ -45,9 +45,9 @@ pub fn asNode(self: *Script) *Node {
return self.asElement().asNode();
}
pub fn getSrc(self: *const Script, page: *Page) ![]const u8 {
pub fn getSrc(self: *Script, page: *Page) ![]const u8 {
if (self._src.len == 0) return "";
return try URL.resolve(page.call_arena, page.base(), self._src, .{ .encode = true });
return self.asNode().resolveURL(self._src, page, .{});
}
pub fn setSrc(self: *Script, src: []const u8, page: *Page) !void {

View File

@@ -57,9 +57,7 @@ pub fn getPoster(self: *const Video, page: *Page) ![]const u8 {
if (poster.len == 0) {
return "";
}
const URL = @import("../../URL.zig");
return URL.resolve(page.call_arena, page.base(), poster, .{ .encode = true });
return element.asConstNode().resolveURL(poster, page, .{});
}
pub fn setPoster(self: *Video, value: []const u8, page: *Page) !void {

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
@@ -19,6 +19,7 @@
const std = @import("std");
const lp = @import("lightpanda");
const js = @import("../../js/js.zig");
const html5ever = @import("../../parser/html5ever.zig");
const Page = @import("../../Page.zig");
const Session = @import("../../Session.zig");
@@ -30,13 +31,11 @@ _rc: lp.RC(u8) = .{},
_fatal: bool,
_arena: Allocator,
_ignore_bom: bool,
_stream: std.ArrayList(u8),
const Label = enum {
utf8,
@"utf-8",
@"unicode-1-1-utf-8",
};
_bom_seen: bool,
_decoder: ?*anyopaque, // Persistent streaming decoder
_encoding_handle: *anyopaque,
_encoding_name: []const u8,
_lowercase_name: []const u8, // Cached lowercase version of encoding name
const InitOpts = struct {
fatal: bool = false,
@@ -44,25 +43,41 @@ const InitOpts = struct {
};
pub fn init(label_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*TextDecoder {
if (label_) |label| {
_ = std.meta.stringToEnum(Label, label) orelse return error.RangeError;
const label = label_ orelse "utf-8";
const info = html5ever.encoding_for_label(label.ptr, label.len);
if (!info.isValid()) {
return error.RangeError;
}
const arena = try page.getArena(.{ .debug = "TextDecoder" });
// Check for "replacement" encoding - it's not usable for decoding per spec
const enc_name = info.name();
if (std.mem.eql(u8, enc_name, "replacement")) {
return error.RangeError;
}
const arena = try page.getArena(.large, "TextDecoder");
errdefer page.releaseArena(arena);
const opts = opts_ orelse InitOpts{};
const self = try arena.create(TextDecoder);
self.* = .{
._arena = arena,
._stream = .empty,
._fatal = opts.fatal,
._ignore_bom = opts.ignoreBOM,
._encoding_handle = info.handle.?,
._decoder = null,
._bom_seen = false,
._lowercase_name = "", // Will be lazily allocated
._encoding_name = enc_name, // Points to static Rust memory
};
return self;
}
pub fn deinit(self: *TextDecoder, session: *Session) void {
if (self._decoder) |decoder| {
html5ever.encoding_decoder_free(decoder);
}
session.releaseArena(self._arena);
}
@@ -82,34 +97,110 @@ pub fn getFatal(self: *const TextDecoder) bool {
return self._fatal;
}
pub fn getEncoding(self: *TextDecoder) ![]const u8 {
// Spec requires lowercase encoding name
// Allocate buffer for lowercase name on first access
if (self._lowercase_name.len > 0) {
return self._lowercase_name;
}
self._lowercase_name = try std.ascii.allocLowerString(self._arena, self._encoding_name);
return self._lowercase_name;
}
const DecodeOpts = struct {
stream: bool = false,
};
pub fn decode(self: *TextDecoder, input_: ?[]const u8, opts_: ?DecodeOpts) ![]const u8 {
var input = input_ orelse return "";
const opts: DecodeOpts = opts_ orelse .{};
const input = input_ orelse "";
if (self._stream.items.len > 0) {
try self._stream.appendSlice(self._arena, input);
input = self._stream.items;
}
if (self._fatal and !std.unicode.utf8ValidateSlice(input)) {
if (opts.stream) {
if (self._stream.items.len == 0) {
try self._stream.appendSlice(self._arena, input);
}
return "";
// For non-streaming calls, we don't need a persistent decoder
if (!opts.stream) {
// Reset decoder state if we had one
if (self._decoder) |decoder| {
html5ever.encoding_decoder_free(decoder);
self._decoder = null;
}
} else if (self._decoder == null) {
self._decoder = html5ever.encoding_decoder_new(self._encoding_handle);
if (self._decoder == null) {
return error.OutOfMemory;
}
return error.InvalidUtf8;
}
self._stream.clearRetainingCapacity();
if (self._ignore_bom == false and std.mem.startsWith(u8, input, &.{ 0xEF, 0xBB, 0xBF })) {
return input[3..];
return self._decode(input, self._decoder);
}
fn _decode(self: *TextDecoder, input: []const u8, streaming_decoder: ?*anyopaque) ![]const u8 {
if (input.len == 0) {
return "";
}
return input;
// Calculate max output size
const max_out = html5ever.encoding_max_utf8_buffer_length(
self._encoding_handle,
input.len,
);
if (max_out == 0) {
return "";
}
// Allocate output buffer
const output = try self._arena.alloc(u8, max_out);
// Decode using either streaming or one-shot decoder
const result = if (streaming_decoder) |decoder|
html5ever.encoding_decoder_decode(
decoder,
input.ptr,
input.len,
output.ptr,
output.len,
0, // is_last = false for streaming
)
else
html5ever.encoding_decode(
self._encoding_handle,
input.ptr,
input.len,
output.ptr,
output.len,
1, // is_last = true for one-shot
);
// Handle errors in fatal mode
if (self._fatal and result.hadErrors()) {
if (streaming_decoder != null) {
// Reset decoder on error
if (self._decoder) |decoder| {
html5ever.encoding_decoder_free(decoder);
self._decoder = null;
}
}
self._bom_seen = false;
return error.TypeError;
}
var decoded: []const u8 = output[0..result.bytes_written];
// Handle BOM stripping
if (!self._bom_seen and !self._ignore_bom) {
decoded = stripBom(decoded);
self._bom_seen = true;
}
return decoded;
}
fn stripBom(data: []const u8) []const u8 {
// UTF-8 BOM in decoded output appears as U+FEFF (EF BB BF in UTF-8)
const bom = "\u{FEFF}";
if (std.mem.startsWith(u8, data, bom)) {
return data[bom.len..];
}
return data;
}
pub const JsApi = struct {
@@ -123,7 +214,7 @@ pub const JsApi = struct {
pub const constructor = bridge.constructor(TextDecoder.init, .{});
pub const decode = bridge.function(TextDecoder.decode, .{});
pub const encoding = bridge.property("utf-8", .{ .template = false });
pub const encoding = bridge.accessor(TextDecoder.getEncoding, null, .{});
pub const fatal = bridge.accessor(TextDecoder.getFatal, null, .{});
pub const ignoreBOM = bridge.accessor(TextDecoder.getIgnoreBOM, null, .{});
};

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2023-2025 Lightpanda (Selecy SAS)
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
@@ -26,12 +26,23 @@ pub fn init() TextEncoder {
return .{};
}
pub fn encode(_: *const TextEncoder, v: []const u8) !js.TypedArray(u8) {
if (!std.unicode.utf8ValidateSlice(v)) {
pub fn encode(_: *const TextEncoder, v_: ?js.Value) !js.TypedArray(u8) {
const v = v_ orelse return .{ .values = "" };
if (v.isUndefined()) {
return .{ .values = "" };
}
if (v.isNull()) {
return .{ .values = "null" };
}
const str = try v.toStringSlice();
if (!std.unicode.utf8ValidateSlice(str)) {
return error.InvalidUtf8;
}
return .{ .values = v };
return .{ .values = str };
}
pub const JsApi = struct {

View File

@@ -39,14 +39,14 @@ const CloseEventOptions = struct {
const Options = Event.inheritOptions(CloseEvent, CloseEventOptions);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*CloseEvent {
const arena = try page.getArena(.{ .debug = "CloseEvent" });
const arena = try page.getArena(.tiny, "CloseEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*CloseEvent {
const arena = try page.getArena(.{ .debug = "CloseEvent.trusted" });
const arena = try page.getArena(.tiny, "CloseEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -35,7 +35,7 @@ const CompositionEventOptions = struct {
const Options = Event.inheritOptions(CompositionEvent, CompositionEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CompositionEvent {
const arena = try page.getArena(.{ .debug = "CompositionEvent" });
const arena = try page.getArena(.tiny, "CompositionEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -38,7 +38,7 @@ const CustomEventOptions = struct {
const Options = Event.inheritOptions(CustomEvent, CustomEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*CustomEvent {
const arena = try page.getArena(.{ .debug = "CustomEvent" });
const arena = try page.getArena(.tiny, "CustomEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -47,14 +47,14 @@ pub const ErrorEventOptions = struct {
const Options = Event.inheritOptions(ErrorEvent, ErrorEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*ErrorEvent {
const arena = try page.getArena(.{ .debug = "ErrorEvent" });
const arena = try page.getArena(.small, "ErrorEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, opts_, false, page);
}
pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*ErrorEvent {
const arena = try page.getArena(.{ .debug = "ErrorEvent.trusted" });
const arena = try page.getArena(.small, "ErrorEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, opts_, true, page);
}

View File

@@ -42,13 +42,13 @@ pub const Options = Event.inheritOptions(
);
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FocusEvent {
const arena = try page.getArena(.{ .debug = "FocusEvent.trusted" });
const arena = try page.getArena(.tiny, "FocusEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*FocusEvent {
const arena = try page.getArena(.{ .debug = "FocusEvent" });
const arena = try page.getArena(.tiny, "FocusEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);

View File

@@ -38,14 +38,14 @@ const Options = Event.inheritOptions(FormDataEvent, struct {
});
pub fn init(typ: []const u8, maybe_options: Options, page: *Page) !*FormDataEvent {
const arena = try page.getArena(.{ .debug = "FormDataEvent" });
const arena = try page.getArena(.tiny, "FormDataEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, maybe_options, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*FormDataEvent {
const arena = try page.getArena(.{ .debug = "FormDataEvent.trusted" });
const arena = try page.getArena(.tiny, "FormDataEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -46,13 +46,13 @@ const Options = Event.inheritOptions(
);
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*InputEvent {
const arena = try page.getArena(.{ .debug = "InputEvent.trusted" });
const arena = try page.getArena(.tiny, "InputEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*InputEvent {
const arena = try page.getArena(.{ .debug = "InputEvent" });
const arena = try page.getArena(.tiny, "InputEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);

View File

@@ -186,13 +186,13 @@ const Options = Event.inheritOptions(
);
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*KeyboardEvent {
const arena = try page.getArena(.{ .debug = "KeyboardEvent.trusted" });
const arena = try page.getArena(.tiny, "KeyboardEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*KeyboardEvent {
const arena = try page.getArena(.{ .debug = "KeyboardEvent" });
const arena = try page.getArena(.tiny, "KeyboardEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);

View File

@@ -50,14 +50,14 @@ pub const Data = union(enum) {
const Options = Event.inheritOptions(MessageEvent, MessageEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*MessageEvent {
const arena = try page.getArena(.{ .debug = "MessageEvent" });
const arena = try page.getArena(.small, "MessageEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, opts_, false, page);
}
pub fn initTrusted(typ: String, opts_: ?Options, page: *Page) !*MessageEvent {
const arena = try page.getArena(.{ .debug = "MessageEvent.trusted" });
const arena = try page.getArena(.small, "MessageEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, opts_, true, page);
}

View File

@@ -82,14 +82,14 @@ pub const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*MouseEvent {
const arena = try page.getArena(.{ .debug = "MouseEvent" });
const arena = try page.getArena(.tiny, "MouseEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*MouseEvent {
const arena = try page.getArena(.{ .debug = "MouseEvent.trusted" });
const arena = try page.getArena(.tiny, "MouseEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -45,14 +45,14 @@ const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent {
const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent" });
const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, opts, false, page);
}
pub fn initTrusted(typ: String, opts: Options, page: *Page) !*NavigationCurrentEntryChangeEvent {
const arena = try page.getArena(.{ .debug = "NavigationCurrentEntryChangeEvent.trusted" });
const arena = try page.getArena(.tiny, "NavigationCurrentEntryChangeEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, opts, true, page);
}

View File

@@ -38,14 +38,14 @@ const PageTransitionEventOptions = struct {
const Options = Event.inheritOptions(PageTransitionEvent, PageTransitionEventOptions);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PageTransitionEvent {
const arena = try page.getArena(.{ .debug = "PageTransitionEvent" });
const arena = try page.getArena(.tiny, "PageTransitionEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PageTransitionEvent {
const arena = try page.getArena(.{ .debug = "PageTransitionEvent.trusted" });
const arena = try page.getArena(.tiny, "PageTransitionEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -84,7 +84,7 @@ const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PointerEvent {
const arena = try page.getArena(.{ .debug = "UIEvent" });
const arena = try page.getArena(.tiny, "PointerEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -39,14 +39,14 @@ const PopStateEventOptions = struct {
const Options = Event.inheritOptions(PopStateEvent, PopStateEventOptions);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*PopStateEvent {
const arena = try page.getArena(.{ .debug = "PopStateEvent" });
const arena = try page.getArena(.tiny, "PopStateEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*PopStateEvent {
const arena = try page.getArena(.{ .debug = "PopStateEvent.trusted" });
const arena = try page.getArena(.tiny, "PopStateEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -39,14 +39,14 @@ const ProgressEventOptions = struct {
const Options = Event.inheritOptions(ProgressEvent, ProgressEventOptions);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*ProgressEvent {
const arena = try page.getArena(.{ .debug = "ProgressEvent" });
const arena = try page.getArena(.tiny, "ProgressEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, _opts, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*ProgressEvent {
const arena = try page.getArena(.{ .debug = "ProgressEvent.trusted" });
const arena = try page.getArena(.tiny, "ProgressEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -37,7 +37,7 @@ const PromiseRejectionEventOptions = struct {
const Options = Event.inheritOptions(PromiseRejectionEvent, PromiseRejectionEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*PromiseRejectionEvent {
const arena = try page.getArena(.{ .debug = "PromiseRejectionEvent" });
const arena = try page.getArena(.tiny, "PromiseRejectionEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -39,14 +39,14 @@ const SubmitEventOptions = struct {
const Options = Event.inheritOptions(SubmitEvent, SubmitEventOptions);
pub fn init(typ: []const u8, opts_: ?Options, page: *Page) !*SubmitEvent {
const arena = try page.getArena(.{ .debug = "SubmitEvent" });
const arena = try page.getArena(.tiny, "SubmitEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});
return initWithTrusted(arena, type_string, opts_, false, page);
}
pub fn initTrusted(typ: String, _opts: ?Options, page: *Page) !*SubmitEvent {
const arena = try page.getArena(.{ .debug = "SubmitEvent.trusted" });
const arena = try page.getArena(.tiny, "SubmitEvent.trusted");
errdefer page.releaseArena(arena);
return initWithTrusted(arena, typ, _opts, true, page);
}

View File

@@ -40,7 +40,7 @@ pub const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*TextEvent {
const arena = try page.getArena(.{ .debug = "TextEvent" });
const arena = try page.getArena(.tiny, "TextEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -51,7 +51,7 @@ pub const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*UIEvent {
const arena = try page.getArena(.{ .debug = "UIEvent" });
const arena = try page.getArena(.tiny, "UIEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -50,7 +50,7 @@ pub const Options = Event.inheritOptions(
);
pub fn init(typ: []const u8, _opts: ?Options, page: *Page) !*WheelEvent {
const arena = try page.getArena(.{ .debug = "WheelEvent" });
const arena = try page.getArena(.medium, "WheelEvent");
errdefer page.releaseArena(arena);
const type_string = try String.init(arena, typ, .{});

View File

@@ -57,7 +57,7 @@ const InitOpts = struct {
};
pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response {
const arena = try page.getArena(.{ .debug = "Response" });
const arena = try page.getArena(.large, "Response");
errdefer page.releaseArena(arena);
const opts = opts_ orelse InitOpts{};
@@ -174,7 +174,7 @@ pub fn bytes(self: *const Response, page: *Page) !js.Promise {
}
pub fn clone(self: *const Response, page: *Page) !*Response {
const arena = try page.getArena(.{ .debug = "Response.clone" });
const arena = try page.getArena((self._body orelse "").len + self._url.len + 256, "Response.clone");
errdefer page.releaseArena(arena);
const body = if (self._body) |b| try arena.dupe(u8, b) else null;

View File

@@ -105,10 +105,10 @@ pub fn init(url: []const u8, protocols_: ?[]const u8, page: *Page) !*WebSocket {
}
}
const arena = try page.getArena(.{ .debug = "WebSocket" });
const arena = try page.getArena(.medium, "WebSocket");
errdefer page.releaseArena(arena);
const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encode = true });
const resolved_url = try URL.resolve(arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset });
const http_client = page._session.browser.http_client;
const conn = http_client.network.newConnection() orelse {
@@ -272,12 +272,10 @@ pub fn send(self: *WebSocket, data: SendData) !void {
return error.InvalidStateError;
}
// Get a dedicated arena for this message
const arena = try self._page._session.getArena(.{ .debug = "WebSocket message" });
errdefer self._page._session.releaseArena(arena);
switch (data) {
.blob => |blob| {
const arena = try self._page._session.getArena(blob._slice.len, "WebSocket.message");
errdefer self._page._session.releaseArena(arena);
try self.queueMessage(.{ .binary = .{
.arena = arena,
.data = try arena.dupe(u8, blob._slice),
@@ -285,15 +283,21 @@ pub fn send(self: *WebSocket, data: SendData) !void {
},
.js_val => |js_val| {
if (js_val.isString()) |str| {
const arena = try self._page._session.getArena(str.len(), "WebSocket.message");
errdefer self._page._session.releaseArena(arena);
try self.queueMessage(.{ .text = .{
.arena = arena,
.data = try str.toSliceWithAlloc(arena),
} });
} else {
const binary = try js_val.toZig(BinaryData);
const buffer = binary.asBuffer();
const arena = try self._page._session.getArena(buffer.len, "WebSocket.message");
errdefer self._page._session.releaseArena(arena);
try self.queueMessage(.{ .binary = .{
.arena = arena,
.data = try arena.dupe(u8, binary.asBuffer()),
.data = try arena.dupe(u8, buffer),
} });
}
},

View File

@@ -89,7 +89,7 @@ const ResponseType = enum {
};
pub fn init(page: *Page) !*XMLHttpRequest {
const arena = try page.getArena(.{ .debug = "XMLHttpRequest" });
const arena = try page.getArena(.large, "XMLHttpRequest");
errdefer page.releaseArena(arena);
const self = try page._factory.xhrEventTarget(arena, XMLHttpRequest{
._page = page,
@@ -210,7 +210,7 @@ pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void
const page = self._page;
self._method = try parseMethod(method_);
self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encode = true });
self._url = try URL.resolve(self._arena, page.base(), url, .{ .always_dupe = true, .encoding = page.charset });
try self.stateChanged(.opened, page);
}

View File

@@ -45,7 +45,7 @@ pub fn querySelectorAll(root: *Node, input: []const u8, page: *Page) !*List {
return error.SyntaxError;
}
const arena = try page.getArena(.{ .debug = "querySelectorAll" });
const arena = try page.getArena(.small, "querySelectorAll");
errdefer page.releaseArena(arena);
var nodes: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;

View File

@@ -487,10 +487,10 @@ pub const BrowserContext = struct {
pub fn createIsolatedWorld(self: *BrowserContext, world_name: []const u8, grant_universal_access: bool) !*IsolatedWorld {
const browser = &self.cdp.browser;
const arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld" });
const arena = try browser.arena_pool.acquire(.small, "IsolatedWorld");
errdefer browser.arena_pool.release(arena);
const call_arena = try browser.arena_pool.acquire(.{ .debug = "IsolatedWorld.call_arena" });
const call_arena = try browser.arena_pool.acquire(.tiny, "IsolatedWorld.call_arena");
errdefer browser.arena_pool.release(call_arena);
const world = try arena.create(IsolatedWorld);

View File

@@ -286,7 +286,7 @@ fn navigate(cmd: *CDP.Command) !void {
page = try session.replacePage();
}
const encoded_url = try URL.ensureEncoded(page.call_arena, params.url);
const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8");
try page.navigate(encoded_url, .{
.reason = .address_bar,
.cdp_id = cmd.input.id,

View File

@@ -220,7 +220,7 @@ fn createTarget(cmd: *CDP.Command) !void {
}
if (!std.mem.eql(u8, "about:blank", params.url)) {
const encoded_url = try URL.ensureEncoded(page.call_arena, params.url);
const encoded_url = try URL.ensureEncoded(page.call_arena, params.url, "UTF-8");
try page.navigate(
encoded_url,
.{ .reason = .address_bar, .kind = .{ .push = null } },

View File

@@ -151,6 +151,303 @@ pub extern "C" fn html5ever_parse_document_with_encoding(
.one(StrTendril::from(decoded.as_ref()));
}
// === Encoding API for TextDecoder ===
/// Result of encoding label lookup
#[repr(C)]
pub struct EncodingInfo {
/// 0 = not found, 1 = found
pub found: u8,
/// Opaque handle to the encoding (actually &'static Encoding)
pub handle: *const c_void,
/// Length of canonical name
pub name_len: usize,
/// Pointer to canonical encoding name (static, lowercase)
pub name_ptr: *const c_uchar,
}
/// Look up an encoding by its label (case-insensitive, whitespace-trimmed)
#[no_mangle]
pub extern "C" fn encoding_for_label(
label: *const c_uchar,
label_len: usize,
) -> EncodingInfo {
if label.is_null() || label_len == 0 {
return EncodingInfo {
found: 0,
name_len: 0,
handle: std::ptr::null(),
name_ptr: std::ptr::null(),
};
}
let label_bytes = unsafe { std::slice::from_raw_parts(label, label_len) };
match Encoding::for_label(label_bytes) {
Some(encoding) => {
let name = encoding.name();
EncodingInfo {
found: 1,
name_len: name.len(),
name_ptr: name.as_ptr(),
handle: encoding as *const _ as *const c_void,
}
}
None => EncodingInfo {
found: 0,
name_len: 0,
name_ptr: std::ptr::null(),
handle: std::ptr::null(),
},
}
}
/// Calculate maximum UTF-8 buffer size needed for decoding
#[no_mangle]
pub extern "C" fn encoding_max_utf8_buffer_length(
handle: *const c_void,
input_len: usize,
) -> usize {
if handle.is_null() {
return 0;
}
let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) };
let decoder = encoding.new_decoder();
decoder.max_utf8_buffer_length(input_len).unwrap_or(0)
}
/// Result of decoding operation
#[repr(C)]
pub struct DecodeResult {
/// 0 = no errors, 1 = had malformed sequences (replaced with U+FFFD)
pub had_errors: u8,
/// Number of input bytes consumed
pub bytes_read: usize,
/// Number of UTF-8 bytes written to output buffer
pub bytes_written: usize,
}
/// Decode bytes from source encoding to UTF-8
/// For streaming, set is_last=0; for final/complete decode, set is_last=1
#[no_mangle]
pub extern "C" fn encoding_decode(
handle: *const c_void,
input: *const c_uchar,
input_len: usize,
output: *mut c_uchar,
output_len: usize,
is_last: u8,
) -> DecodeResult {
if handle.is_null() || output.is_null() {
return DecodeResult {
had_errors: 1,
bytes_read: 0,
bytes_written: 0,
};
}
let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) };
let input_bytes = if input.is_null() || input_len == 0 {
&[]
} else {
unsafe { std::slice::from_raw_parts(input, input_len) }
};
let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) };
let mut decoder = encoding.new_decoder();
let last = is_last != 0;
let (result, bytes_read, bytes_written, had_errors) =
decoder.decode_to_utf8(input_bytes, output_slice, last);
// If output buffer was too small, we still report what we could process
let _ = result; // CoderResult::InputEmpty or CoderResult::OutputFull
DecodeResult {
had_errors: if had_errors { 1 } else { 0 },
bytes_read,
bytes_written,
}
}
// === Streaming Decoder API ===
use encoding_rs::Decoder;
/// Create a streaming decoder that maintains state across calls
#[no_mangle]
pub extern "C" fn encoding_decoder_new(handle: *const c_void) -> *mut c_void {
if handle.is_null() {
return std::ptr::null_mut();
}
let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) };
let decoder = Box::new(encoding.new_decoder());
Box::into_raw(decoder) as *mut c_void
}
/// Decode using a streaming decoder (maintains state for incomplete sequences)
#[no_mangle]
pub extern "C" fn encoding_decoder_decode(
decoder_ptr: *mut c_void,
input: *const c_uchar,
input_len: usize,
output: *mut c_uchar,
output_len: usize,
is_last: u8,
) -> DecodeResult {
if decoder_ptr.is_null() || output.is_null() {
return DecodeResult {
had_errors: 1,
bytes_read: 0,
bytes_written: 0,
};
}
let decoder: &mut Decoder = unsafe { &mut *(decoder_ptr as *mut Decoder) };
let input_bytes = if input.is_null() || input_len == 0 {
&[]
} else {
unsafe { std::slice::from_raw_parts(input, input_len) }
};
let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_len) };
let last = is_last != 0;
let (result, bytes_read, bytes_written, had_errors) =
decoder.decode_to_utf8(input_bytes, output_slice, last);
let _ = result;
DecodeResult {
had_errors: if had_errors { 1 } else { 0 },
bytes_read,
bytes_written,
}
}
/// Free a streaming decoder
#[no_mangle]
pub extern "C" fn encoding_decoder_free(decoder_ptr: *mut c_void) {
if !decoder_ptr.is_null() {
unsafe {
drop(Box::from_raw(decoder_ptr as *mut Decoder));
}
}
}
// === Encoding API (UTF-8 to legacy encoding with NCR fallback) ===
/// Result of encoding operation
#[repr(C)]
pub struct EncodeResult {
/// 0 = success, 1 = output buffer too small
pub status: u8,
/// Number of input bytes consumed
pub bytes_read: usize,
/// Number of bytes written to output buffer
pub bytes_written: usize,
}
/// Encode UTF-8 to a legacy encoding, replacing unencodable characters with
/// HTML decimal numeric character references (&#codepoint;).
///
/// This is used for URL query string encoding per WHATWG URL spec.
/// encoding_rs's encode_from_utf8 already produces NCRs for unmappable chars.
#[no_mangle]
pub extern "C" fn encoding_encode_with_ncr(
handle: *const c_void,
input: *const c_uchar,
input_len: usize,
output: *mut c_uchar,
output_capacity: usize,
) -> EncodeResult {
if handle.is_null() || output.is_null() {
return EncodeResult {
status: 1,
bytes_read: 0,
bytes_written: 0,
};
}
let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) };
let input_str = if input.is_null() || input_len == 0 {
""
} else {
let bytes = unsafe { std::slice::from_raw_parts(input, input_len) };
match std::str::from_utf8(bytes) {
Ok(s) => s,
Err(_) => {
return EncodeResult {
status: 1,
bytes_read: 0,
bytes_written: 0,
};
}
}
};
// For UTF-8 encoding, just copy directly (no NCR needed)
if encoding == encoding_rs::UTF_8 {
if input_len > output_capacity {
return EncodeResult {
bytes_read: 0,
bytes_written: 0,
status: 1,
};
}
let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) };
output_slice[..input_len].copy_from_slice(input_str.as_bytes());
return EncodeResult {
bytes_read: input_len,
bytes_written: input_len,
status: 0,
};
}
let output_slice = unsafe { std::slice::from_raw_parts_mut(output, output_capacity) };
let mut encoder = encoding.new_encoder();
// encode_from_utf8 automatically produces NCRs for unmappable characters
let (result, bytes_read, bytes_written, _had_unmappables) =
encoder.encode_from_utf8(input_str, output_slice, true);
match result {
encoding_rs::CoderResult::InputEmpty => EncodeResult {
bytes_read,
bytes_written,
status: 0,
},
encoding_rs::CoderResult::OutputFull => EncodeResult {
bytes_read,
bytes_written,
status: 1,
},
}
}
/// Calculate maximum output buffer size needed for encoding with NCR fallback.
/// Worst case: every character becomes &#codepoint; where codepoint is up to 7 digits.
#[no_mangle]
pub extern "C" fn encoding_max_encode_buffer_length(
handle: *const c_void,
input_len: usize,
) -> usize {
if handle.is_null() {
return 0;
}
let encoding: &'static Encoding = unsafe { &*(handle as *const Encoding) };
let encoder = encoding.new_encoder();
// This returns the max buffer size accounting for NCR expansion
encoder
.max_buffer_length_from_utf8_if_no_unmappables(input_len)
.map(|len| {
// Add extra space for potential NCRs (each char could become &#nnnnnn; = 10 bytes)
// But realistically, most chars are mappable, so add 2x as safety margin
len.saturating_mul(2)
})
.unwrap_or(input_len * 10)
}
#[no_mangle]
pub extern "C" fn html5ever_parse_fragment(
html: *mut c_uchar,

View File

@@ -107,7 +107,7 @@ pub fn fetch(app: *App, url: [:0]const u8, opts: FetchOpts) !void {
// }
// }
const encoded_url = try URL.ensureEncoded(page.call_arena, url);
const encoded_url = try URL.ensureEncoded(page.call_arena, url, "UTF-8");
_ = try page.navigate(encoded_url, .{
.reason = .address_bar,
.kind = .{ .push = null },

624
src/network/IpFilter.zig Normal file
View File

@@ -0,0 +1,624 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const posix = std.posix;
const libcurl = @import("../sys/libcurl.zig");
const IpFilter = @This();
/// Binary representation for bitwise CIDR comparison.
pub const Ipv4Addr = [4]u8;
pub const Ipv6Addr = [16]u8;
pub const CidrV4 = struct {
network: u32,
mask: u32,
fn fromPrefix(addr: Ipv4Addr, prefix_len: u6) CidrV4 {
const network = std.mem.readInt(u32, &addr, .big);
const mask: u32 = if (prefix_len == 0)
0
else if (prefix_len == 32)
0xFFFFFFFF
else
~(@as(u32, 0xFFFFFFFF) >> @intCast(prefix_len));
return .{ .network = network, .mask = mask };
}
};
pub const CidrV6 = struct {
network_hi: u64,
network_lo: u64,
mask_hi: u64,
mask_lo: u64,
fn fromPrefix(addr: Ipv6Addr, prefix_len: u8) CidrV6 {
const network_hi = std.mem.readInt(u64, addr[0..8], .big);
const network_lo = std.mem.readInt(u64, addr[8..16], .big);
var mask_hi: u64 = 0;
var mask_lo: u64 = 0;
if (prefix_len > 0) {
if (prefix_len < 64) {
mask_hi = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len));
} else if (prefix_len == 64) {
mask_hi = 0xFFFFFFFFFFFFFFFF;
} else if (prefix_len < 128) {
mask_hi = 0xFFFFFFFFFFFFFFFF;
mask_lo = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len - 64));
} else {
// prefix_len == 128
mask_hi = 0xFFFFFFFFFFFFFFFF;
mask_lo = 0xFFFFFFFFFFFFFFFF;
}
}
return .{ .network_hi = network_hi, .network_lo = network_lo, .mask_hi = mask_hi, .mask_lo = mask_lo };
}
};
// IpFilter fields
block_private: bool,
cidrs: ?Cidrs,
// ── Comptime helpers ─────────────────────────────────────────────────────────
/// Comptime helper: parse dotted-decimal IPv4 to [4]u8.
fn parseIpv4Comptime(comptime s: []const u8) Ipv4Addr {
var result: Ipv4Addr = undefined;
var octet: u8 = 0;
var octet_idx: usize = 0;
for (s) |ch| {
if (ch == '.') {
result[octet_idx] = octet;
octet_idx += 1;
octet = 0;
} else {
octet = octet * 10 + (ch - '0');
}
}
result[octet_idx] = octet;
return result;
}
/// Comptime helper: build a CidrV4.
fn makeCidrV4(comptime addr: []const u8, comptime prefix: u6) CidrV4 {
return CidrV4.fromPrefix(parseIpv4Comptime(addr), prefix);
}
/// Comptime helper: build a CidrV6 from a 16-byte literal array.
fn makeCidrV6(comptime bytes: Ipv6Addr, comptime prefix: u8) CidrV6 {
return CidrV6.fromPrefix(bytes, prefix);
}
// ── Comptime CIDR range tables ───────────────────────────────────────────────
const PRIVATE_V4 = [_]CidrV4{
makeCidrV4("127.0.0.0", 8), // localhost
makeCidrV4("0.0.0.0", 8), // current network
makeCidrV4("10.0.0.0", 8), // RFC1918
makeCidrV4("172.16.0.0", 12), // RFC1918
makeCidrV4("192.168.0.0", 16), // RFC1918
makeCidrV4("169.254.0.0", 16), // link-local
};
const PRIVATE_V6 = [_]CidrV6{
// ::/128 — IPv6 Unspecified
makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 128),
// ::1/128 — IPv6 localhost
makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128),
// fe80::/10 — link-local
makeCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10),
// fc00::/7 — ULA
makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 7),
};
// ── Runtime IP parsing ───────────────────────────────────────────────────────
/// Parse dotted-decimal IPv4 string to 4-byte array. Returns null on parse failure.
fn parseIpv4(str: []const u8) ?Ipv4Addr {
var addr: Ipv4Addr = undefined;
var it = std.mem.splitScalar(u8, str, '.');
var i: usize = 0;
while (it.next()) |part| : (i += 1) {
if (i >= 4) return null;
addr[i] = std.fmt.parseInt(u8, part, 10) catch return null;
}
if (i != 4) return null;
return addr;
}
/// Parse IPv6 string to 16-byte array. Handles compressed notation.
/// Strips zone ID (e.g. "fe80::1%eth0" -> "fe80::1").
/// Returns null on parse failure.
fn parseIpv6(str: []const u8) ?Ipv6Addr {
// Strip zone ID
const clean = if (std.mem.indexOfScalar(u8, str, '%')) |idx| str[0..idx] else str;
const parsed = std.net.Address.parseIp6(clean, 0) catch return null;
return parsed.in6.sa.addr;
}
// ── CIDR matching ────────────────────────────────────────────────────────────
/// Detect IPv4-mapped IPv6 address (::ffff:x.x.x.x).
/// Returns the embedded IPv4 address if detected, null otherwise.
fn isIpv4Mapped(addr: Ipv6Addr) ?Ipv4Addr {
// IPv4-mapped prefix: 10 zero bytes + 2 0xFF bytes
const prefix = [12]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
if (!std.mem.eql(u8, addr[0..12], &prefix)) return null;
return addr[12..16].*;
}
/// Check if IPv4 address falls within a CIDR range.
fn matchesCidrV4(addr: Ipv4Addr, cidr: CidrV4) bool {
const addr_int = std.mem.readInt(u32, &addr, .big);
return (addr_int ^ cidr.network) & cidr.mask == 0;
}
/// Check if IPv6 address falls within a CIDR range.
fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool {
const addr_hi = std.mem.readInt(u64, addr[0..8], .big);
const addr_lo = std.mem.readInt(u64, addr[8..16], .big);
return ((addr_hi ^ cidr.network_hi) & cidr.mask_hi == 0) and
((addr_lo ^ cidr.network_lo) & cidr.mask_lo == 0);
}
// ── Public API ───────────────────────────────────────────────────────────────
pub const Cidrs = struct {
v4: []CidrV4,
v6: []CidrV6,
allow_v4: []CidrV4,
allow_v6: []CidrV6,
pub fn deinit(self: Cidrs, allocator: std.mem.Allocator) void {
allocator.free(self.v4);
allocator.free(self.v6);
allocator.free(self.allow_v4);
allocator.free(self.allow_v6);
}
};
/// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32")
/// into a Cidrs struct. Entries prefixed with '-' are added to the allow list
/// (e.g. "-10.0.0.42/32" exempts that IP from blocking).
/// Caller owns the returned Cidrs and must free them via Cidrs.deinit.
/// Returns error.InvalidCidr on any malformed entry.
pub fn parseCidrList(
allocator: std.mem.Allocator,
cidr_str: []const u8,
) !Cidrs {
var v4_list: std.ArrayList(CidrV4) = .empty;
errdefer v4_list.deinit(allocator);
var v6_list: std.ArrayList(CidrV6) = .empty;
errdefer v6_list.deinit(allocator);
var allow_v4_list: std.ArrayList(CidrV4) = .empty;
errdefer allow_v4_list.deinit(allocator);
var allow_v6_list: std.ArrayList(CidrV6) = .empty;
errdefer allow_v6_list.deinit(allocator);
var it = std.mem.splitScalar(u8, cidr_str, ',');
while (it.next()) |entry| {
const trimmed = std.mem.trim(u8, entry, " \t");
if (trimmed.len == 0) continue;
const is_allow = trimmed[0] == '-';
const cidr_part = if (is_allow) trimmed[1..] else trimmed;
const slash = std.mem.indexOfScalar(u8, cidr_part, '/') orelse return error.InvalidCidr;
const addr_str = cidr_part[0..slash];
const prefix_str = cidr_part[slash + 1 ..];
if (parseIpv4(addr_str)) |v4| {
const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr;
if (prefix > 32) return error.InvalidCidr;
const cidr = CidrV4.fromPrefix(v4, @intCast(prefix));
if (is_allow) {
try allow_v4_list.append(allocator, cidr);
} else {
try v4_list.append(allocator, cidr);
}
} else if (parseIpv6(addr_str)) |v6| {
const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr;
if (prefix > 128) return error.InvalidCidr;
const cidr = CidrV6.fromPrefix(v6, prefix);
if (is_allow) {
try allow_v6_list.append(allocator, cidr);
} else {
try v6_list.append(allocator, cidr);
}
} else {
return error.InvalidCidr;
}
}
const v4 = try v4_list.toOwnedSlice(allocator);
errdefer allocator.free(v4);
const v6 = try v6_list.toOwnedSlice(allocator);
errdefer allocator.free(v6);
const allow_v4 = try allow_v4_list.toOwnedSlice(allocator);
errdefer allocator.free(allow_v4);
const allow_v6 = try allow_v6_list.toOwnedSlice(allocator);
return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 };
}
// Create a IpFilter. Set block_private to block outbound requests to RFC1918,
// localhost, link-local, and ULA ranges. Pass parsed CIDRs for additional
// custom block/allow ranges; the filter takes ownership of the Cidrs and will
// free them on deinit.
pub fn init(
block_private: bool,
cidrs: ?Cidrs,
) IpFilter {
return .{
.block_private = block_private,
.cidrs = cidrs,
};
}
pub fn deinit(self: IpFilter, allocator: std.mem.Allocator) void {
if (self.cidrs) |c| {
c.deinit(allocator);
}
}
fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool {
if (self.cidrs) |c| {
for (c.allow_v4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return false;
}
}
for (c.v4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return true;
}
}
}
if (self.block_private) {
for (PRIVATE_V4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return true;
}
}
}
return false;
}
fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool {
if (self.cidrs) |c| {
for (c.allow_v6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return false;
}
}
for (c.v6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return true;
}
}
}
if (self.block_private) {
for (PRIVATE_V6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return true;
}
}
}
return false;
}
/// Check if an address from curl's opensocket callback should be blocked.
/// Extracts the IP directly from the sockaddr structure; no string parsing needed.
/// Fail-closed: unknown address family -> true (blocked).
pub fn isBlockedSockaddr(self: *const IpFilter, sa: *const libcurl.CurlSockAddr) bool {
switch (sa.family) {
posix.AF.INET => {
const sin: *const posix.sockaddr.in = @ptrCast(&sa.addr);
// sin.addr is in network byte order (big-endian); convert to host bytes
const bytes: [4]u8 = @bitCast(sin.addr);
return self.isBlockedV4(bytes);
},
posix.AF.INET6 => {
const sin6: *const posix.sockaddr.in6 = @ptrCast(&sa.addr);
const addr: Ipv6Addr = sin6.addr;
if (isIpv4Mapped(addr)) |v4| return self.isBlockedV4(v4);
return self.isBlockedV6(addr);
},
else => return true, // unknown family -> fail-closed
}
}
const testing = @import("../testing.zig");
test "IpFilter: IPv4 CIDR matching: private group boundaries" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("0.0.0.0"));
// Loopback
try testing.expect(filter.testBlocked("127.0.0.1"));
try testing.expect(filter.testBlocked("127.255.255.255"));
try testing.expect(!filter.testBlocked("128.0.0.1"));
// RFC1918 10.0.0.0/8
try testing.expect(filter.testBlocked("10.0.0.1"));
try testing.expect(filter.testBlocked("10.255.255.255"));
try testing.expect(!filter.testBlocked("11.0.0.0"));
// RFC1918 172.16.0.0/12 — critical boundary
try testing.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block
try testing.expect(filter.testBlocked("172.16.0.0")); // MUST block
try testing.expect(filter.testBlocked("172.31.255.255")); // MUST block
try testing.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block
// RFC1918 192.168.0.0/16
try testing.expect(filter.testBlocked("192.168.0.1"));
try testing.expect(!filter.testBlocked("192.169.0.0"));
// Link-local
try testing.expect(filter.testBlocked("169.254.1.1"));
try testing.expect(!filter.testBlocked("169.255.0.0"));
// Public IP — must NOT be blocked
try testing.expect(!filter.testBlocked("8.8.8.8"));
try testing.expect(!filter.testBlocked("1.1.1.1"));
try testing.expect(!filter.testBlocked("93.184.216.34")); // example.com
}
test "IpFilter: IPv6 CIDR matching: private group" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("::")); // unspecified
try testing.expect(filter.testBlocked("::1")); // localhost
try testing.expect(filter.testBlocked("fe80::1")); // link-local
try testing.expect(filter.testBlocked("fc00::1")); // ULA
try testing.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7)
try testing.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public
try testing.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare
}
test "IpFilter: IPv4-mapped IPv6 bypass prevention" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
// ::ffff:127.0.0.1 must be blocked (maps to loopback)
try testing.expect(filter.testBlocked("::ffff:127.0.0.1"));
// ::ffff:10.0.0.1 must be blocked (maps to RFC1918)
try testing.expect(filter.testBlocked("::ffff:10.0.0.1"));
// ::ffff:8.8.8.8 must NOT be blocked (maps to public)
try testing.expect(!filter.testBlocked("::ffff:8.8.8.8"));
}
test "IpFilter: fail-closed: unknown address family blocked by isBlockedSockaddr" {
const filter = IpFilter.init(false, null);
defer filter.deinit(testing.allocator);
// Construct a sockaddr with an unknown address family
var sa: libcurl.CurlSockAddr = .{
.family = 255, // not AF_INET or AF_INET6
.socktype = posix.SOCK.STREAM,
.protocol = 0,
.addrlen = 0,
.addr = undefined,
};
try testing.expect(filter.isBlockedSockaddr(&sa));
}
test "IpFilter: custom CIDR ranges" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24");
const filter = IpFilter.init(false, cidrs);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("203.0.113.1")); // in custom range
try testing.expect(filter.testBlocked("203.0.113.255")); // in custom range
try testing.expect(!filter.testBlocked("203.0.114.0")); // outside custom range
try testing.expect(!filter.testBlocked("8.8.8.8")); // not in range
}
test "IpFilter: private group blocks cloud metadata IP via link-local" {
// 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group.
// Users who want targeted cloud-metadata-only blocking can use --block-cidrs.
const filter_private = IpFilter.init(true, null);
defer filter_private.deinit(testing.allocator);
const filter_none = IpFilter.init(false, null);
defer filter_none.deinit(testing.allocator);
try testing.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local
try testing.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled
}
test "IpFilter: parseCidrList: mixed IPv4 and IPv6" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24");
try testing.expectEqual(2, cidrs.v4.len);
try testing.expectEqual(1, cidrs.v6.len);
// spot-check: 203.0.113.0/24 and 192.168.1.0/24
const f = IpFilter.init(false, cidrs);
defer f.deinit(testing.allocator);
try testing.expect(f.testBlocked("203.0.113.1"));
try testing.expect(!f.testBlocked("203.0.114.0"));
try testing.expect(f.testBlocked("192.168.1.1"));
try testing.expect(f.testBlocked("2001:db8::1"));
try testing.expect(!f.testBlocked("2001:db9::1"));
}
test "IpFilter: allow list exempts from private blocking" {
const cidrs = try parseCidrList(testing.allocator, "-10.0.0.42/32,-fc00::1/128");
const filter = IpFilter.init(true, cidrs);
defer filter.deinit(testing.allocator);
// Allowed IPs pass through despite being in private ranges
try testing.expect(!filter.testBlocked("10.0.0.42"));
try testing.expect(!filter.testBlocked("fc00::1"));
// Other private IPs still blocked
try testing.expect(filter.testBlocked("10.0.0.43"));
try testing.expect(filter.testBlocked("10.0.0.41"));
try testing.expect(filter.testBlocked("192.168.1.1"));
try testing.expect(filter.testBlocked("fc00::2"));
}
test "IpFilter: allow list exempts from custom CIDR blocking" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24,-203.0.113.100/32");
const filter = IpFilter.init(false, cidrs);
defer filter.deinit(testing.allocator);
try testing.expect(!filter.testBlocked("203.0.113.100")); // allowed
try testing.expect(filter.testBlocked("203.0.113.99")); // blocked
try testing.expect(filter.testBlocked("203.0.113.101")); // blocked
}
test "IpFilter: parseCidrList: allow entries with '-' prefix" {
const cidrs = try parseCidrList(testing.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128");
try testing.expectEqual(1, cidrs.v4.len);
try testing.expectEqual(0, cidrs.v6.len);
try testing.expectEqual(1, cidrs.allow_v4.len);
try testing.expectEqual(1, cidrs.allow_v6.len);
const f = IpFilter.init(false, cidrs);
defer f.deinit(testing.allocator);
try testing.expect(!f.testBlocked("10.0.0.42")); // allowed
try testing.expect(f.testBlocked("10.0.0.43")); // blocked
try testing.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed)
}
test "IpFilter: parseCidrList: invalid input returns error" {
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "not-a-cidr"));
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/33")); // prefix too large
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0")); // missing prefix
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/abc")); // non-numeric prefix
}
test "IpFilter: matchesCidrV4: exact match /32" {
const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 100 }, 32);
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 100 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 101 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 99 }, cidr));
}
test "IpFilter: matchesCidrV4: /0 matches everything" {
const cidr = CidrV4.fromPrefix(.{ 0, 0, 0, 0 }, 0);
try testing.expect(matchesCidrV4(.{ 0, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 255, 255, 255, 255 }, cidr));
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 1 }, cidr));
}
test "IpFilter: matchesCidrV4: /8 boundary" {
const cidr = CidrV4.fromPrefix(.{ 10, 0, 0, 0 }, 8);
try testing.expect(matchesCidrV4(.{ 10, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 10, 255, 255, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 11, 0, 0, 0 }, cidr));
try testing.expect(!matchesCidrV4(.{ 9, 255, 255, 255 }, cidr));
}
test "IpFilter: matchesCidrV4: /12 boundary (172.16.0.0/12)" {
const cidr = CidrV4.fromPrefix(.{ 172, 16, 0, 0 }, 12);
// In range
try testing.expect(matchesCidrV4(.{ 172, 16, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 172, 31, 255, 255 }, cidr));
try testing.expect(matchesCidrV4(.{ 172, 20, 100, 50 }, cidr));
// Out of range
try testing.expect(!matchesCidrV4(.{ 172, 15, 255, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 172, 32, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV4: /24 network" {
const cidr = CidrV4.fromPrefix(.{ 203, 0, 113, 0 }, 24);
try testing.expect(matchesCidrV4(.{ 203, 0, 113, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 203, 0, 113, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 203, 0, 112, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 203, 0, 114, 0 }, cidr));
}
test "IpFilter: matchesCidrV4: non-byte-aligned /25" {
const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 0 }, 25);
// 192.168.1.0 - 192.168.1.127 should match
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 127 }, cidr));
// 192.168.1.128+ should not match
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 128 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 255 }, cidr));
}
test "IpFilter: matchesCidrV6: /128 exact match" {
const addr: Ipv6Addr = .{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
const cidr = CidrV6.fromPrefix(addr, 128);
try testing.expect(matchesCidrV6(addr, cidr));
var different = addr;
different[15] = 2;
try testing.expect(!matchesCidrV6(different, cidr));
}
test "IpFilter: matchesCidrV6: /0 matches everything" {
const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0);
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
}
test "IpFilter: matchesCidrV6: /64 boundary" {
// 2001:db8::/64
const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 64);
// In range - any suffix in lower 64 bits
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
// Out of range - different prefix
try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: /48 network" {
// 2001:db8:abcd::/48
const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 48);
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: /10 link-local (fe80::/10)" {
const cidr = CidrV6.fromPrefix(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10);
// fe80:: through febf:: should match (first 10 bits: 1111111010)
try testing.expect(matchesCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0xfe, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
// fec0:: should NOT match (11th bit differs)
try testing.expect(!matchesCidrV6(.{ 0xfe, 0xc0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: prefix > 64 bits (/96)" {
// ::ffff:0:0/96 (IPv4-mapped prefix)
const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0 }, 96);
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 10, 0, 0, 1 }, cidr));
try testing.expect(!matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xfe, 192, 168, 1, 1 }, cidr));
}
/// Test-only convenience: parse an IP string and check against the filter.
/// Test inputs must be valid IPs; unreachable on parse failure.
fn testBlocked(self: *const IpFilter, ip: []const u8) bool {
if (parseIpv4(ip)) |v4| return self.isBlockedV4(v4);
if (parseIpv6(ip)) |v6| {
if (isIpv4Mapped(v6)) |v4| return self.isBlockedV4(v4);
return self.isBlockedV6(v6);
}
unreachable;
}

View File

@@ -28,6 +28,7 @@ const Config = @import("../Config.zig");
const libcurl = @import("../sys/libcurl.zig");
const http = @import("http.zig");
const IpFilter = @import("IpFilter.zig");
const RobotStore = @import("Robots.zig").RobotStore;
const WebBotAuth = @import("WebBotAuth.zig");
@@ -85,6 +86,9 @@ callbacks: [MAX_TICK_CALLBACKS]TickCallback = undefined,
callbacks_len: usize = 0,
callbacks_mutex: std.Thread.Mutex = .{},
/// Optional IP filter for blocking requests to private/internal networks (--block-private-networks).
ip_filter: ?*IpFilter = null,
const TickCallback = struct {
ctx: *anyopaque,
fun: *const fn (*anyopaque) void,
@@ -230,13 +234,31 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
ca_blob = try loadCerts(allocator);
}
// IP filter for blocking requests to private/internal networks.
const block_private = config.blockPrivateNetworks();
const cidrs: ?IpFilter.Cidrs = blk: {
const s = config.blockCidrs() orelse break :blk null;
break :blk try IpFilter.parseCidrList(allocator, s);
};
const has_cidrs = if (cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false;
const ip_filter: ?*IpFilter = blk: {
if (!block_private and !has_cidrs) break :blk null;
const f = try allocator.create(IpFilter);
f.* = IpFilter.init(block_private, cidrs);
break :blk f;
};
errdefer if (ip_filter) |f| {
f.deinit(allocator);
allocator.destroy(f);
};
const count: usize = config.httpMaxConcurrent();
const connections = try allocator.alloc(http.Connection, count);
errdefer allocator.free(connections);
var available: std.DoublyLinkedList = .{};
for (0..count) |i| {
connections[i] = try http.Connection.init(ca_blob, config);
connections[i] = try http.Connection.init(ca_blob, config, ip_filter);
available.append(&connections[i].node);
}
@@ -280,6 +302,8 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
.ws_pool = .init(allocator),
.ws_max = config.wsMaxConcurrent(),
.ip_filter = ip_filter,
};
}
@@ -316,6 +340,11 @@ pub fn deinit(self: *Network) void {
if (self.cache) |*cache| cache.deinit();
if (self.ip_filter) |f| {
f.deinit(self.allocator);
self.allocator.destroy(f);
}
globalDeinit();
}
@@ -612,7 +641,7 @@ pub fn releaseConnection(self: *Network, conn: *http.Connection) void {
self.ws_count -= 1;
},
else => {
conn.reset(self.config, self.ca_blob) catch |err| {
conn.reset(self.config, self.ca_blob, self.ip_filter) catch |err| {
lp.assert(false, "couldn't reset curl easy", .{ .err = err });
};
self.conn_mutex.lock();
@@ -637,7 +666,7 @@ pub fn newConnection(self: *Network) ?*http.Connection {
};
// don't do this under lock
conn.* = http.Connection.init(self.ca_blob, self.config) catch {
conn.* = http.Connection.init(self.ca_blob, self.config, self.ip_filter) catch {
self.ws_mutex.lock();
defer self.ws_mutex.unlock();
self.ws_pool.destroy(conn);

View File

@@ -17,9 +17,11 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const posix = std.posix;
const Config = @import("../Config.zig");
const libcurl = @import("../sys/libcurl.zig");
const IpFilter = @import("IpFilter.zig");
const log = @import("lightpanda").log;
const assert = @import("lightpanda").assert;
@@ -229,6 +231,35 @@ pub const ResponseHead = struct {
}
};
/// Opensocket callback: blocks connections to private/internal IP ranges
/// before TCP SYN, regardless of request origin (JS, HTML resources, redirects, etc.).
/// Called by curl after DNS resolution, before the socket is created.
/// Returns CURL_SOCKET_BAD to block; otherwise creates and returns a real socket fd.
/// clientp is a *const IpFilter passed via CURLOPT_OPENSOCKETDATA.
fn opensocketCallback(
purpose: libcurl.CurlSockType,
address: *libcurl.CurlSockAddr,
clientp: ?*anyopaque,
) libcurl.CurlSocket {
const filter: *const IpFilter = @ptrCast(@alignCast(clientp orelse return libcurl.CURL_SOCKET_BAD));
if (filter.isBlockedSockaddr(address)) {
if (address.family == posix.AF.INET or address.family == posix.AF.INET6) {
const ip = std.net.Address.initPosix(@ptrCast(&address.addr));
log.warn(.http, "blocked by IP filter", .{ .ip = ip });
} else {
log.warn(.http, "blocked by IP filter", .{ .family = address.family });
}
return libcurl.CURL_SOCKET_BAD;
}
_ = purpose; // purpose is informational; we always open the same socket type
const fd = posix.socket(
@intCast(address.family),
@intCast(address.socktype),
@intCast(address.protocol),
) catch return libcurl.CURL_SOCKET_BAD;
return fd;
}
pub const Connection = struct {
_easy: *libcurl.Curl,
transport: Transport,
@@ -240,13 +271,17 @@ pub const Connection = struct {
websocket: *@import("../browser/webapi/net/WebSocket.zig"),
};
pub fn init(ca_blob: ?libcurl.CurlBlob, config: *const Config) !Connection {
pub fn init(
ca_blob: ?libcurl.CurlBlob,
config: *const Config,
ip_filter: ?*const IpFilter,
) !Connection {
const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy;
var self = Connection{ ._easy = easy, .transport = .none };
errdefer self.deinit();
try self.reset(config, ca_blob);
try self.reset(config, ca_blob, ip_filter);
return self;
}
@@ -371,6 +406,7 @@ pub const Connection = struct {
self: *Connection,
config: *const Config,
ca_blob: ?libcurl.CurlBlob,
ip_filter: ?*const IpFilter,
) !void {
libcurl.curl_easy_reset(self._easy);
self.transport = .none;
@@ -421,6 +457,15 @@ pub const Connection = struct {
// try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback);
}
// default write callback to prevent libcurl from writing to stdout
try self.setWriteCallback(discardBody);
// IP filter: block private/internal network addresses
if (ip_filter) |filter| {
try libcurl.curl_easy_setopt(self._easy, .opensocket_function, opensocketCallback);
try libcurl.curl_easy_setopt(self._easy, .opensocket_data, @constCast(filter));
}
}
fn discardBody(_: [*]const u8, count: usize, len: usize, _: ?*anyopaque) usize {
@@ -603,3 +648,53 @@ fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8,
}
return 0;
}
// ── Unit tests for opensocketCallback ────────────────────────────────────────
fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr {
var sa: posix.sockaddr.in = .{
.port = 0,
.addr = @bitCast(ip),
};
var curl_sa: libcurl.CurlSockAddr = .{
.family = posix.AF.INET,
.socktype = posix.SOCK.STREAM,
.protocol = 0,
.addrlen = @sizeOf(posix.sockaddr.in),
.addr = undefined,
};
@memcpy(std.mem.asBytes(&curl_sa.addr)[0..@sizeOf(posix.sockaddr.in)], std.mem.asBytes(&sa));
return curl_sa;
}
test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" {
const filter = IpFilter.init(true, null);
var sa = makeSockAddrV4(.{ 127, 0, 0, 1 });
const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result);
}
test "opensocketCallback: public IPv4 opens a real socket" {
// 8.8.8.8 — not in any blocked range; callback should create a real socket
const filter = IpFilter.init(true, null);
var sa = makeSockAddrV4(.{ 8, 8, 8, 8 });
const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
// A real fd is always >= 0
try std.testing.expect(fd >= 0);
posix.close(fd);
}
test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" {
var sa = makeSockAddrV4(.{ 8, 8, 8, 8 });
const result = opensocketCallback(.ipcxn, &sa, null);
try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result);
}
test "opensocketCallback: block_private=false allows private IP" {
// When block_private is false the filter blocks nothing
const filter = IpFilter.init(false, null);
var sa = makeSockAddrV4(.{ 127, 0, 0, 1 });
const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
try std.testing.expect(fd >= 0);
posix.close(fd);
}

View File

@@ -43,6 +43,23 @@ pub const curl_writefunc_error: usize = c.CURL_WRITEFUNC_ERROR;
pub const curl_readfunc_pause: usize = c.CURL_READFUNC_PAUSE;
pub const CurlReadFunction = fn ([*]u8, usize, usize, *anyopaque) usize;
pub const CurlSockType = enum(c.curlsocktype) {
ipcxn = c.CURLSOCKTYPE_IPCXN,
accept = c.CURLSOCKTYPE_ACCEPT,
};
/// Mirror of curl's struct curl_sockaddr. The addr field is a struct sockaddr
/// inline (not a pointer), so addrlen tells you how many bytes of addr are valid.
pub const CurlSockAddr = extern struct {
family: c_int,
socktype: c_int,
protocol: c_int,
addrlen: c_uint,
addr: std.posix.sockaddr,
};
pub const CURL_SOCKET_BAD: c.curl_socket_t = c.CURL_SOCKET_BAD;
pub const FreeCallback = fn (ptr: ?*anyopaque) void;
pub const StrdupCallback = fn (str: [*:0]const u8) ?[*:0]u8;
pub const MallocCallback = fn (size: usize) ?*anyopaque;
@@ -137,8 +154,17 @@ comptime {
return 0;
}
}.cb;
const opensocket_cb_check: c.curl_opensocket_callback = struct {
fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t {
_ = clientp;
_ = purpose;
_ = address;
return CURL_SOCKET_BAD;
}
}.cb;
_ = debug_cb_check;
_ = write_cb_check;
_ = opensocket_cb_check;
if (@sizeOf(CurlWaitFd) != @sizeOf(c.curl_waitfd)) {
@compileError("CurlWaitFd size mismatch");
@@ -152,6 +178,17 @@ comptime {
if (c.CURL_WAIT_POLLIN != 1 or c.CURL_WAIT_POLLPRI != 2 or c.CURL_WAIT_POLLOUT != 4) {
@compileError("CURL_WAIT_* flag values don't match CurlWaitEvents packed struct bit layout");
}
if (@sizeOf(CurlSockAddr) != @sizeOf(c.curl_sockaddr)) {
@compileError("CurlSockAddr size mismatch with curl_sockaddr");
}
if (@offsetOf(CurlSockAddr, "family") != @offsetOf(c.curl_sockaddr, "family") or
@offsetOf(CurlSockAddr, "socktype") != @offsetOf(c.curl_sockaddr, "socktype") or
@offsetOf(CurlSockAddr, "protocol") != @offsetOf(c.curl_sockaddr, "protocol") or
@offsetOf(CurlSockAddr, "addrlen") != @offsetOf(c.curl_sockaddr, "addrlen") or
@offsetOf(CurlSockAddr, "addr") != @offsetOf(c.curl_sockaddr, "addr"))
{
@compileError("CurlSockAddr layout mismatch with curl_sockaddr");
}
}
pub const CurlOption = enum(c.CURLoption) {
@@ -190,6 +227,8 @@ pub const CurlOption = enum(c.CURLoption) {
read_function = c.CURLOPT_READFUNCTION,
connect_only = c.CURLOPT_CONNECT_ONLY,
upload = c.CURLOPT_UPLOAD,
opensocket_function = c.CURLOPT_OPENSOCKETFUNCTION,
opensocket_data = c.CURLOPT_OPENSOCKETDATA,
};
pub const CurlMOption = enum(c.CURLMoption) {
@@ -620,6 +659,7 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype
.header_data,
.read_data,
.write_data,
.opensocket_data,
=> blk: {
const ptr: ?*anyopaque = switch (@typeInfo(@TypeOf(value))) {
.null => null,
@@ -643,6 +683,20 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype
break :blk c.curl_easy_setopt(easy, opt, cb);
},
.opensocket_function => blk: {
const cb: c.curl_opensocket_callback = switch (@typeInfo(@TypeOf(value))) {
.null => null,
.@"fn" => struct {
fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t {
const addr: *CurlSockAddr = @ptrCast(address orelse return CURL_SOCKET_BAD);
return value(@enumFromInt(purpose), addr, clientp);
}
}.cb,
else => @compileError("expected Zig function or null for " ++ @tagName(option) ++ ", got " ++ @typeName(@TypeOf(value))),
};
break :blk c.curl_easy_setopt(easy, opt, cb);
},
.header_function => blk: {
const cb: c.curl_write_callback = switch (@typeInfo(@TypeOf(value))) {
.null => null,