mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
Improve WPT /url/ tests
This is a bit all over the place.
1 - Replace libidn2 with rust-idna. It looks like there are different idna
profiles, and rust-idna (from the servo project) implements the whatwg
one. libidn2 would be too strict in some cases and not strict enough in
others. (Gemini says I could use libidn2 for this, but what it suggested
didn't work, and I couldn't figure it out myself, and claude insisted it
_did not_ have the correct implementation for what we want).
2 - We previously only ran a URL through idna if it wasn't ascii. Turns out
we also need to run it if there's a "xn--" (aka, an IDNA ACE prefix) in
there. This helps us pass hundreds of WPT cases, and it's pretty cheap.
3 - Implement more of the Area WebAPI. Mostly copied from Anchor.
4 - Add username/password accessor to Anchor/Area
5 - window.open validates the URL (i.e. tries to resolve it and handles the
error)
6 - Invalid idna conversion maps to a TypeError
7 - Cleanup closed popups on the next tick (like destroyed pages), rather than
at an interval or on shutdown. This one seems unrelated, but some of these
tests are opening hundreds (thousands?) of popups and then closing them.
This commit is contained in:
180
build.zig
180
build.zig
@@ -227,6 +227,7 @@ fn linkHtml5Ever(b: *Build, mod: *Build.Module) !void {
|
||||
"src/html5ever/lib.rs",
|
||||
"src/html5ever/sink.rs",
|
||||
"src/html5ever/types.rs",
|
||||
"src/html5ever/url.rs",
|
||||
}) |path| {
|
||||
exec_cargo.addFileInput(b.path(path));
|
||||
}
|
||||
@@ -310,13 +311,6 @@ fn linkCurl(b: *Build, mod: *Build.Module, is_tsan: bool) !void {
|
||||
const boringssl = buildBoringSsl(b, target, mod.optimize.?);
|
||||
for (boringssl) |lib| curl.root_module.linkLibrary(lib);
|
||||
|
||||
const libidn2 = buildLibidn2(b, target, mod.optimize.?, is_tsan);
|
||||
curl.root_module.linkLibrary(libidn2);
|
||||
// Also expose libidn2 to the consuming module so src/sys/idna.zig's
|
||||
// @cImport of <idn2.h> resolves. Without this, lightpanda_module only
|
||||
// sees idn2.h transitively if a system libidn2 happens to be installed.
|
||||
mod.linkLibrary(libidn2);
|
||||
|
||||
switch (target.result.os.tag) {
|
||||
.macos => {
|
||||
// needed for proxying on mac
|
||||
@@ -471,168 +465,6 @@ fn buildNghttp2(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.O
|
||||
return lib;
|
||||
}
|
||||
|
||||
fn buildLibidn2(
|
||||
b: *Build,
|
||||
target: Build.ResolvedTarget,
|
||||
optimize: std.builtin.OptimizeMode,
|
||||
is_tsan: bool,
|
||||
) *Build.Step.Compile {
|
||||
const dep = b.dependency("libidn2", .{});
|
||||
|
||||
const os = target.result.os.tag;
|
||||
const is_darwin = os.isDarwin();
|
||||
|
||||
const mod = b.createModule(.{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
.sanitize_thread = is_tsan,
|
||||
});
|
||||
|
||||
// libidn2's autoconf+gnulib stack expects a config.h with hundreds of
|
||||
// HAVE_*/_GL_ATTRIBUTE_* defines — including ~800 lines of attribute-
|
||||
// detection macros emitted from gnulib-common.m4 via AH_VERBATIM. We
|
||||
// vendor a single autoconf-generated config.h rather than try to
|
||||
// reproduce that machinery in the Zig build system.
|
||||
mod.addIncludePath(b.path("vendor/libidn2"));
|
||||
|
||||
// Substitute the gnulib-style .in.h templates. All @VAR@ in them are
|
||||
// either DLL-visibility markers (empty for static POSIX) or
|
||||
// HAVE_UNISTRING_WOE32DLL_H (0).
|
||||
inline for (.{ "unitypes", "unistr", "uniconv", "unictype", "uninorm" }) |name| {
|
||||
mod.addConfigHeader(renderUnistringHeader(b, dep, name));
|
||||
}
|
||||
|
||||
mod.addIncludePath(dep.path("lib"));
|
||||
mod.addIncludePath(dep.path("unistring"));
|
||||
// gl/ holds gnulib helpers — only malloca and version-etc headers are
|
||||
// referenced from the sources we compile; we don't need the full gl/ shim
|
||||
// layer (system header replacements).
|
||||
mod.addIncludePath(dep.path("gl"));
|
||||
|
||||
const lib = b.addLibrary(.{ .name = "idn2", .root_module = mod });
|
||||
lib.installHeader(dep.path("lib/idn2.h"), "idn2.h");
|
||||
|
||||
if (is_darwin) {
|
||||
// unistring's striconveh.c calls real iconv_*, which on macOS lives
|
||||
// in libiconv (separate from libSystem). On glibc Linux iconv is in
|
||||
// libc itself; on musl it would also need a separate -liconv.
|
||||
mod.linkSystemLibrary("iconv", .{});
|
||||
|
||||
// libidn2's lib/lookup.c calls strchrnul() without including
|
||||
// <string.h>; the prototype is declared in vendor/libidn2/config.h
|
||||
// alongside the existing strverscmp shim. macOS libc lacked the
|
||||
// symbol entirely before 15.4 — provide it here so the link
|
||||
// succeeds. Mirrors how gl/strverscmp.c is wired up below.
|
||||
lib.addCSourceFile(.{
|
||||
.file = b.path("vendor/libidn2/darwin/strchrnul.c"),
|
||||
.flags = &.{},
|
||||
});
|
||||
}
|
||||
|
||||
lib.addCSourceFiles(.{
|
||||
.root = dep.path("lib"),
|
||||
.flags = &.{ "-DHAVE_CONFIG_H", "-DIDN2_STATIC" },
|
||||
.files = &.{
|
||||
"bidi.c", "context.c", "data.c", "decode.c",
|
||||
"error.c", "free.c", "idna.c", "lookup.c",
|
||||
"punycode.c", "register.c", "tables.c", "tr46map.c",
|
||||
"version.c",
|
||||
},
|
||||
});
|
||||
lib.addCSourceFiles(.{
|
||||
.root = dep.path("gl"),
|
||||
.flags = &.{"-DHAVE_CONFIG_H"},
|
||||
// malloca.c provides striconveha's stack-or-heap allocator; strverscmp
|
||||
// is a glibc extension absent on macOS that lib/version.c needs.
|
||||
.files = &.{ "malloca.c", "strverscmp.c" },
|
||||
});
|
||||
lib.addCSourceFiles(.{
|
||||
.root = dep.path("unistring"),
|
||||
.flags = &.{"-DHAVE_CONFIG_H"},
|
||||
.files = &.{
|
||||
"c-ctype.c", "c-strcasecmp.c", "c-strncasecmp.c",
|
||||
"free.c", "iconv.c", "iconv_close.c",
|
||||
"iconv_open.c", "localcharset.c", "stdlib.c",
|
||||
"striconveh.c", "striconveha.c", "unistd.c",
|
||||
"uniconv/u8-conv-from-enc.c", "uniconv/u8-strconv-from-enc.c", "uniconv/u8-strconv-from-locale.c",
|
||||
"uniconv/u8-strconv-to-enc.c", "uniconv/u8-strconv-to-locale.c", "unictype/bidi_of.c",
|
||||
"unictype/categ_M.c", "unictype/categ_none.c", "unictype/categ_of.c",
|
||||
"unictype/categ_test.c", "unictype/combiningclass.c", "unictype/joiningtype_of.c",
|
||||
"unictype/scripts.c", "uninorm/canonical-decomposition.c", "uninorm/composition.c",
|
||||
"uninorm/decompose-internal.c", "uninorm/decomposition-table.c", "uninorm/nfc.c",
|
||||
"uninorm/nfd.c", "uninorm/u32-normalize.c", "unistr/u32-cmp.c",
|
||||
"unistr/u32-cpy-alloc.c", "unistr/u32-cpy.c", "unistr/u32-mbtouc-unsafe.c",
|
||||
"unistr/u32-strlen.c", "unistr/u32-to-u8.c", "unistr/u32-uctomb.c",
|
||||
"unistr/u8-check.c", "unistr/u8-mblen.c", "unistr/u8-mbtouc.c",
|
||||
"unistr/u8-mbtouc-aux.c", "unistr/u8-mbtouc-unsafe.c", "unistr/u8-mbtouc-unsafe-aux.c",
|
||||
"unistr/u8-mbtoucr.c", "unistr/u8-prev.c", "unistr/u8-strlen.c",
|
||||
"unistr/u8-to-u32.c", "unistr/u8-uctomb.c", "unistr/u8-uctomb-aux.c",
|
||||
},
|
||||
});
|
||||
|
||||
return lib;
|
||||
}
|
||||
|
||||
/// Process one of unistring's `.in.h` template headers into a real `.h`.
|
||||
/// All `@VAR@` substitutions in these headers are either DLL-visibility markers
|
||||
/// (empty for static POSIX builds) or `HAVE_UNISTRING_WOE32DLL_H` (0).
|
||||
fn renderUnistringHeader(b: *Build, dep: *Build.Dependency, name: []const u8) *Build.Step.ConfigHeader {
|
||||
const in_rel = b.fmt("unistring/{s}.in.h", .{name});
|
||||
const out_name = b.fmt("{s}.h", .{name});
|
||||
const lazy = dep.path(in_rel);
|
||||
const path = lazy.getPath3(b, null);
|
||||
|
||||
const file = path.root_dir.handle.openFile(path.sub_path, .{}) catch |e| {
|
||||
std.debug.panic("openFile {s}: {s}", .{ path.sub_path, @errorName(e) });
|
||||
};
|
||||
defer file.close();
|
||||
const contents = file.readToEndAlloc(b.allocator, 4 << 20) catch @panic("OOM");
|
||||
|
||||
const ch = b.addConfigHeader(.{
|
||||
.include_path = out_name,
|
||||
.style = .{ .autoconf_at = lazy },
|
||||
}, .{});
|
||||
|
||||
var seen = std.StringHashMap(void).init(b.allocator);
|
||||
var i: usize = 0;
|
||||
while (std.mem.indexOfScalarPos(u8, contents, i, '@')) |s| {
|
||||
const a = s + 1;
|
||||
const e = std.mem.indexOfScalarPos(u8, contents, a, '@') orelse break;
|
||||
const var_name = contents[a..e];
|
||||
if (!isAtConfigName(var_name)) {
|
||||
// Stray '@' (e.g. an email address in a comment); advance past it
|
||||
// alone so we don't mis-pair with a later '@'.
|
||||
i = s + 1;
|
||||
continue;
|
||||
}
|
||||
const owned = b.allocator.dupe(u8, var_name) catch @panic("OOM");
|
||||
const gop = seen.getOrPut(owned) catch @panic("OOM");
|
||||
if (!gop.found_existing) {
|
||||
if (std.mem.eql(u8, var_name, "HAVE_UNISTRING_WOE32DLL_H")) {
|
||||
ch.addValue(owned, c_int, 0);
|
||||
} else {
|
||||
ch.addValue(owned, []const u8, "");
|
||||
}
|
||||
}
|
||||
i = e + 1;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
fn isAtConfigName(s: []const u8) bool {
|
||||
if (s.len == 0) return false;
|
||||
for (s, 0..) |c, idx| {
|
||||
const ok = switch (c) {
|
||||
'A'...'Z', '_' => true,
|
||||
'0'...'9' => idx > 0,
|
||||
else => false,
|
||||
};
|
||||
if (!ok) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn buildCurl(
|
||||
b: *Build,
|
||||
target: Build.ResolvedTarget,
|
||||
@@ -709,11 +541,11 @@ fn buildCurl(
|
||||
._FILE_OFFSET_BITS = 64,
|
||||
|
||||
.USE_IPV6 = true,
|
||||
// Route IDN hostnames through libidn2 (vendored, see buildLibidn2).
|
||||
// Without this, libcurl ships UTF-8 host bytes to SNI/cert validation
|
||||
// and breaks for non-ASCII hostnames like räksmörgås.se.
|
||||
.HAVE_LIBIDN2 = true,
|
||||
.HAVE_IDN2_H = true,
|
||||
// IDN is handled before libcurl (HttpClient calls URL.ensureHostAscii,
|
||||
// backed by rust-url), so libcurl always receives an ASCII host and
|
||||
// does not link libidn2.
|
||||
.HAVE_LIBIDN2 = false,
|
||||
.HAVE_IDN2_H = false,
|
||||
.CURL_OS = switch (os) {
|
||||
.linux => if (is_android) "\"android\"" else "\"linux\"",
|
||||
else => std.fmt.allocPrint(b.allocator, "\"{s}\"", .{@tagName(os)}) catch @panic("OOM"),
|
||||
|
||||
@@ -34,10 +34,6 @@
|
||||
.url = "https://github.com/allyourcodebase/sqlite3/archive/8f840560eae88ab66668c6827c64ffbd0d74ef37.tar.gz",
|
||||
.hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
|
||||
},
|
||||
.libidn2 = .{
|
||||
.url = "https://ftp.gnu.org/gnu/libidn/libidn2-2.3.8.tar.gz",
|
||||
.hash = "N-V-__8AABGOuAC_dhAN07kfoP4dycCFi8Bka4O-tuhriNH8",
|
||||
},
|
||||
},
|
||||
.paths = .{""},
|
||||
}
|
||||
|
||||
@@ -109,12 +109,6 @@ frame: Frame,
|
||||
// to the original page like this.
|
||||
popups: std.ArrayList(*Frame) = .empty,
|
||||
|
||||
// Popups that have called window.close() but whose teardown is deferred to
|
||||
// Page.deinit. We can't deinit synchronously from window.close() because
|
||||
// that's invoked from JS still running on top of the Frame's V8 context (or
|
||||
// from a script eval whose parser still holds the Frame).
|
||||
queued_close: std.ArrayList(*Frame) = .empty,
|
||||
|
||||
// Lifecycle state. A Page is `.pending` while we hold it as the in-flight
|
||||
// destination of a root navigation — its V8 context exists but is not yet the
|
||||
// session's active context. Flipped to `.active` by Session.commitPendingPage
|
||||
@@ -142,8 +136,6 @@ pub fn init(self: *Page, session: *Session, frame_id: u32) !void {
|
||||
// Tear down the Page and its root Frame. Equivalent to the old
|
||||
// Session.removePage + Session.resetFrameResources.
|
||||
pub fn deinit(self: *Page) void {
|
||||
self.cleanupClosedPopups();
|
||||
|
||||
for (self.popups.items) |popup| {
|
||||
popup.deinit();
|
||||
}
|
||||
@@ -197,13 +189,6 @@ pub fn deinit(self: *Page) void {
|
||||
session.arena_pool.release(self.frame_arena);
|
||||
}
|
||||
|
||||
pub fn cleanupClosedPopups(self: *Page) void {
|
||||
for (self.queued_close.items) |popup| {
|
||||
popup.deinit();
|
||||
}
|
||||
self.queued_close = .empty;
|
||||
}
|
||||
|
||||
pub fn getArena(self: *Page, size_or_bucket: anytype, debug: []const u8) !Allocator {
|
||||
return self.session.getArena(size_or_bucket, debug);
|
||||
}
|
||||
|
||||
@@ -86,10 +86,9 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !void {
|
||||
while (true) {
|
||||
if (gc_hint_timer.read() >= gc_hint_period_ns) {
|
||||
gc_hint_timer.reset();
|
||||
self.frame._page.cleanupClosedPopups();
|
||||
browser.env.memoryPressureNotification(.moderate);
|
||||
}
|
||||
session.processQueuedDestroyed();
|
||||
session.processDestroyQueues();
|
||||
|
||||
const tick_result = self._tick(is_cdp, tick_opts) catch |err| {
|
||||
switch (err) {
|
||||
|
||||
@@ -70,7 +70,8 @@ _active: ?*Page = null,
|
||||
// In-flight root navigation
|
||||
_pending: ?*Page = null,
|
||||
|
||||
_queued_destroy: std.ArrayList(*Page) = .{},
|
||||
_page_descruction_queue: std.ArrayList(*Page) = .{},
|
||||
_frame_destruction_queue: std.ArrayList(*Frame) = .{},
|
||||
|
||||
// Loader IDs are scoped to the Session: each new BrowserContext gets a
|
||||
// fresh counter. Frame IDs (`frame_id_gen`) live on `Browser` instead so
|
||||
@@ -127,7 +128,7 @@ pub fn deinit(self: *Session) void {
|
||||
if (self._active != null) {
|
||||
self.removePage();
|
||||
}
|
||||
self.processQueuedDestroyed();
|
||||
self.processDestroyQueues();
|
||||
|
||||
self.cookie_jar.deinit();
|
||||
|
||||
@@ -137,12 +138,27 @@ pub fn deinit(self: *Session) void {
|
||||
self.arena_pool.release(self.arena);
|
||||
}
|
||||
|
||||
pub fn processQueuedDestroyed(self: *Session) void {
|
||||
for (self._queued_destroy.items) |page| {
|
||||
page.deinit();
|
||||
self.browser.page_pool.destroy(page);
|
||||
pub fn processDestroyQueues(self: *Session) void {
|
||||
{
|
||||
const queue = self._frame_destruction_queue.items;
|
||||
if (queue.len > 0) {
|
||||
for (queue) |frame| {
|
||||
frame.deinit();
|
||||
}
|
||||
self._frame_destruction_queue.clearRetainingCapacity();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const queue = self._page_descruction_queue.items;
|
||||
if (queue.len > 0) {
|
||||
for (queue) |page| {
|
||||
page.deinit();
|
||||
self.browser.page_pool.destroy(page);
|
||||
}
|
||||
self._page_descruction_queue.clearRetainingCapacity();
|
||||
}
|
||||
}
|
||||
self._queued_destroy.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
// True iff there is an active Page. CDP / external callers should use this
|
||||
@@ -161,8 +177,12 @@ fn allocatePage(self: *Session, frame_id: u32) !*Page {
|
||||
}
|
||||
|
||||
// Tear down and free a Page allocated via allocatePage.
|
||||
fn destroyPage(self: *Session, page: *Page) void {
|
||||
self._queued_destroy.append(self.arena, page) catch @panic("OOM");
|
||||
fn queuePageDestruction(self: *Session, page: *Page) void {
|
||||
self._page_descruction_queue.append(self.arena, page) catch @panic("OOM");
|
||||
}
|
||||
|
||||
pub fn queueFrameDestruction(self: *Session, frame: *Frame) void {
|
||||
self._frame_destruction_queue.append(self.arena, frame) catch @panic("OOM");
|
||||
}
|
||||
|
||||
// Tear down the currently-active Page. Dispatches `frame_remove` first
|
||||
@@ -193,7 +213,7 @@ fn tearDownActivePage(self: *Session) void {
|
||||
};
|
||||
|
||||
page.frame.abortTransfers();
|
||||
self.destroyPage(page);
|
||||
self.queuePageDestruction(page);
|
||||
self._active = null;
|
||||
self.navigation.onRemoveFrame();
|
||||
}
|
||||
@@ -209,7 +229,7 @@ fn tearDownActivePage(self: *Session) void {
|
||||
// for any prior teardown of an old page).
|
||||
fn installNewActivePage(self: *Session, frame_id: u32) !*Frame {
|
||||
const page = try self.allocatePage(frame_id);
|
||||
errdefer self.destroyPage(page);
|
||||
errdefer self.queuePageDestruction(page);
|
||||
self._active = page;
|
||||
errdefer self._active = null;
|
||||
|
||||
@@ -227,7 +247,7 @@ pub fn createPage(self: *Session) !*Frame {
|
||||
lp.assert(self._active == null, "Session.createPage - page not null", .{});
|
||||
|
||||
// Drain any pending Page deinits now, while we're at a known-safe point
|
||||
self.processQueuedDestroyed();
|
||||
self.processDestroyQueues();
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.browser, "create page", .{});
|
||||
@@ -522,7 +542,7 @@ pub fn initiateRootNavigation(self: *Session, frame_id: u32, url: [:0]const u8,
|
||||
}
|
||||
|
||||
const page = try self.allocatePage(frame_id);
|
||||
errdefer self.destroyPage(page);
|
||||
errdefer self.queuePageDestruction(page);
|
||||
|
||||
page._state = .pending;
|
||||
self._pending = page;
|
||||
@@ -604,7 +624,7 @@ pub fn commitPendingPage(self: *Session) !void {
|
||||
// done_callback after this point would re-enter against the new
|
||||
// _active and trip the half-torn-down session.
|
||||
old_active.frame.abortTransfers();
|
||||
self.destroyPage(old_active);
|
||||
self.queuePageDestruction(old_active);
|
||||
}
|
||||
|
||||
// Discard a pending Page without committing. Used for failure paths
|
||||
@@ -622,7 +642,7 @@ pub fn discardPendingPage(self: *Session) void {
|
||||
page.frame.abortTransfers();
|
||||
|
||||
self._pending = null;
|
||||
self.destroyPage(page);
|
||||
self.queuePageDestruction(page);
|
||||
}
|
||||
|
||||
// Frame IDs come from `Browser` (per-CDP-connection scope), not
|
||||
|
||||
@@ -35,6 +35,20 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, o
|
||||
const needs_dupe = comptime !isNullTerminated(PT);
|
||||
var path: [:0]const u8 = if (needs_dupe or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path;
|
||||
|
||||
if (std.mem.indexOfAny(u8, path, "\t\r\n")) |first| {
|
||||
path = blk: {
|
||||
var buf: std.ArrayList(u8) = try .initCapacity(allocator, path.len);
|
||||
buf.appendSliceAssumeCapacity(path[0..first]);
|
||||
for (path[first + 1 ..]) |c| {
|
||||
if (c != '\t' and c != '\r' and c != '\n') {
|
||||
buf.appendAssumeCapacity(c);
|
||||
}
|
||||
}
|
||||
buf.appendAssumeCapacity(0);
|
||||
break :blk buf.items[0 .. buf.items.len - 1 :0];
|
||||
};
|
||||
}
|
||||
|
||||
if (base.len == 0) {
|
||||
return processResolved(allocator, path, opts);
|
||||
}
|
||||
@@ -196,11 +210,13 @@ fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) !
|
||||
return ensureEncoded(allocator, url, encoding);
|
||||
}
|
||||
|
||||
/// IDNA-only pass: converts a non-ASCII host (`räksmörgås.se`) to its
|
||||
/// punycode form (`xn--rksmrgs-5wao1o.se`) and leaves everything else alone.
|
||||
/// IDNA pass: converts a non-ASCII host (`räksmörgås.se`) to its punycode form
|
||||
/// (`xn--rksmrgs-5wao1o.se`), validates any ASCII punycode (`xn--…`) labels,
|
||||
/// and leaves everything else alone. Returns `error.Idna` for an invalid
|
||||
/// domain (e.g. malformed punycode), which surfaces as a URL parse failure.
|
||||
fn ensureHostAscii(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
|
||||
const hostname = getHostname(url);
|
||||
if (hostname.len == 0 or !idna.needsAscii(hostname)) {
|
||||
if (hostname.len == 0 or (!idna.needsAscii(hostname) and !hasAceLabel(hostname))) {
|
||||
return url;
|
||||
}
|
||||
|
||||
@@ -217,6 +233,30 @@ fn ensureHostAscii(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
|
||||
return buf.items[0 .. buf.items.len - 1 :0];
|
||||
}
|
||||
|
||||
/// True if any dot-separated label of `host` begins with the IDNA ACE prefix
|
||||
/// "xn--" (case-insensitive). Such labels are punycode: even though they're
|
||||
/// pure ASCII, UTS#46 must decode and validate them, so they can't take the
|
||||
/// `needsAscii` fast path.
|
||||
fn hasAceLabel(host: []const u8) bool {
|
||||
var pos: usize = 0;
|
||||
while (std.mem.indexOfScalarPos(u8, host, pos, '-')) |i| {
|
||||
pos = i + 1;
|
||||
if (i < 2 or i + 1 >= host.len or host[i + 1] != '-') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!std.ascii.eqlIgnoreCase(host[i - 2 .. i], "xn")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const label_start = i - 2;
|
||||
if (label_start == 0 or host[label_start - 1] == '.') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn ensureEncoded(allocator: Allocator, url_in: [:0]const u8, encoding: []const u8) ![:0]const u8 {
|
||||
// Resolve any IDN host first; everything below operates on the ASCII form.
|
||||
const url = try ensureHostAscii(allocator, url_in);
|
||||
@@ -1086,6 +1126,70 @@ test "URL: resolve" {
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: resolve strips tab and newline from input" {
|
||||
defer testing.reset();
|
||||
|
||||
const Case = struct {
|
||||
base: [:0]const u8,
|
||||
path: [:0]const u8,
|
||||
expected: [:0]const u8,
|
||||
};
|
||||
|
||||
const cases = [_]Case{
|
||||
// Control char inside the host of an absolute URL.
|
||||
.{ .base = "https://x/", .path = "https://exa\tmple.com/p", .expected = "https://example.com/p" },
|
||||
.{ .base = "https://x/", .path = "https://example.com/\n\rp", .expected = "https://example.com/p" },
|
||||
// Leading control char (first == 0).
|
||||
.{ .base = "https://example/", .path = "\tfoo.js", .expected = "https://example/foo.js" },
|
||||
// Consecutive control chars.
|
||||
.{ .base = "https://example/", .path = "a\t\r\nb.js", .expected = "https://example/ab.js" },
|
||||
// Control chars spread through the path.
|
||||
.{ .base = "https://example/", .path = "a\tb\nc\rd.js", .expected = "https://example/abcd.js" },
|
||||
// Trailing control char.
|
||||
.{ .base = "https://example/", .path = "foo.js\n", .expected = "https://example/foo.js" },
|
||||
// All-strippable relative path collapses to the base.
|
||||
.{ .base = "https://example/dir/", .path = "\t\r\n", .expected = "https://example/dir/" },
|
||||
// No control chars: unchanged (the fast path).
|
||||
.{ .base = "https://example/", .path = "clean.js", .expected = "https://example/clean.js" },
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
|
||||
try testing.expectString(case.expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: resolve validates ASCII punycode (xn--) labels" {
|
||||
defer testing.reset();
|
||||
|
||||
// Valid punycode is left untouched (the needsAscii fast path would skip it,
|
||||
// so this exercises the xn-- gate going through toAscii and back).
|
||||
const ok = try resolve(testing.arena_allocator, "", "https://xn--rksmrgs-5wao1o.se/x", .{});
|
||||
try testing.expectString("https://xn--rksmrgs-5wao1o.se/x", ok);
|
||||
|
||||
// Malformed punycode must be rejected rather than passed through verbatim.
|
||||
// (URL.init remaps this error.Idna to TypeError for `new URL`.)
|
||||
try testing.expectError(error.Idna, resolve(testing.arena_allocator, "", "https://xn--0.pt/x", .{}));
|
||||
try testing.expectError(error.Idna, resolve(testing.arena_allocator, "", "https://xn--a.pt/x", .{}));
|
||||
}
|
||||
|
||||
test "URL: hasAceLabel" {
|
||||
// ACE prefix at a label start (case-insensitive).
|
||||
try testing.expectEqual(true, hasAceLabel("xn--a"));
|
||||
try testing.expectEqual(true, hasAceLabel("xn--rksmrgs-5wao1o.se"));
|
||||
try testing.expectEqual(true, hasAceLabel("a.xn--b.com"));
|
||||
try testing.expectEqual(true, hasAceLabel("XN--ab.com"));
|
||||
try testing.expectEqual(true, hasAceLabel("foo.example.xn--p1ai"));
|
||||
|
||||
// Has '-', but no ACE label.
|
||||
try testing.expectEqual(false, hasAceLabel("example.com"));
|
||||
try testing.expectEqual(false, hasAceLabel("my-site.com"));
|
||||
try testing.expectEqual(false, hasAceLabel("axn--b.com")); // xn-- not at a label start
|
||||
try testing.expectEqual(false, hasAceLabel("x-n--a.com")); // not "xn" before the '-'
|
||||
try testing.expectEqual(false, hasAceLabel("-.com"));
|
||||
try testing.expectEqual(false, hasAceLabel(""));
|
||||
}
|
||||
|
||||
test "URL: ensureEncoded" {
|
||||
defer testing.reset();
|
||||
|
||||
|
||||
@@ -372,6 +372,7 @@ fn handleError(comptime T: type, comptime F: type, local: *const Local, err: any
|
||||
error.TryCatchRethrow => return,
|
||||
error.InvalidArgument => isolate.createTypeError("invalid argument"),
|
||||
error.TypeError => isolate.createTypeError(""),
|
||||
error.Idna => isolate.createTypeError("invalid domain"),
|
||||
error.RangeError => isolate.createRangeError(""),
|
||||
error.OutOfMemory => isolate.createError("out of memory"),
|
||||
error.IllegalConstructor => isolate.createError("Illegal Constructor"),
|
||||
|
||||
@@ -97,6 +97,28 @@
|
||||
testing.expectEqual('http://example.com/a/b/foo', url.toString());
|
||||
}
|
||||
|
||||
{
|
||||
// IDN hosts are converted to punycode (UTS#46).
|
||||
const url = new URL('https://räksmörgås.se/x');
|
||||
testing.expectEqual('xn--rksmrgs-5wao1o.se', url.hostname);
|
||||
testing.expectEqual('https://xn--rksmrgs-5wao1o.se/x', url.href);
|
||||
}
|
||||
|
||||
{
|
||||
// Valid punycode passes through unchanged.
|
||||
const url = new URL('https://xn--rksmrgs-5wao1o.se/x');
|
||||
testing.expectEqual('xn--rksmrgs-5wao1o.se', url.hostname);
|
||||
}
|
||||
|
||||
{
|
||||
// An invalid domain (malformed punycode) is a parse failure -> TypeError.
|
||||
testing.withError((err) => {
|
||||
testing.expectEqual(true, err.toString().includes('TypeError'));
|
||||
}, () => {
|
||||
const url = new URL('https://xn--0.pt/x');
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
const base = 'http://example.com/a/b/c/d';
|
||||
const url = new URL('../../../../../foo', base);
|
||||
|
||||
@@ -21,6 +21,7 @@ const lp = @import("lightpanda");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const URL = @import("../URL.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
const Console = @import("Console.zig");
|
||||
const History = @import("History.zig");
|
||||
@@ -500,6 +501,14 @@ pub fn open(self: *Window, url_: ?[]const u8, target_: ?[]const u8, features_: ?
|
||||
|
||||
const no_opener = hasFeatureToken(features, "noopener") or hasFeatureToken(features, "noreferrer");
|
||||
|
||||
if (raw_url.len > 0) {
|
||||
// Per spec, we should validate the url
|
||||
_ = URL.resolve(frame.call_arena, frame.base(), raw_url, .{}) catch |err| switch (err) {
|
||||
error.OutOfMemory => |e| return e,
|
||||
else => return error.SyntaxError,
|
||||
};
|
||||
}
|
||||
|
||||
// _self / _parent / _top navigate the current browsing context.
|
||||
if (std.ascii.eqlIgnoreCase(target, "_self") or
|
||||
std.ascii.eqlIgnoreCase(target, "_parent") or
|
||||
@@ -604,9 +613,7 @@ pub fn close(self: *Window) void {
|
||||
// eval whose parser is still holding the Frame. Destroying the context
|
||||
// now leaves dangling pointers in the unwinding script eval (load event
|
||||
// dispatch, runMacrotasks, etc.). Defer to Page.deinit instead.
|
||||
page.queued_close.append(page.frame_arena, frame) catch |err| {
|
||||
log.err(.frame, "queue popup close", .{ .err = err });
|
||||
};
|
||||
page.session.queueFrameDestruction(frame);
|
||||
}
|
||||
|
||||
pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]const u8, transfer: ?[]const *MessagePort, frame: *Frame) !void {
|
||||
@@ -995,7 +1002,7 @@ pub const JsApi = struct {
|
||||
pub const opener = bridge.accessor(Window.getOpener, null, .{});
|
||||
pub const closed = bridge.accessor(Window.getClosed, null, .{});
|
||||
pub const name = bridge.accessor(Window.getName, Window.setName, .{});
|
||||
pub const open = bridge.function(Window.open, .{});
|
||||
pub const open = bridge.function(Window.open, .{ .dom_exception = true });
|
||||
pub const close = bridge.function(Window.close, .{});
|
||||
|
||||
pub const alert = bridge.function(struct {
|
||||
|
||||
@@ -165,6 +165,28 @@ pub fn setProtocol(self: *Anchor, value: []const u8, frame: *Frame) !void {
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getUsername(self: *Anchor, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getUsername(href);
|
||||
}
|
||||
|
||||
pub fn setUsername(self: *Anchor, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setUsername(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getPassword(self: *Anchor, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getPassword(href);
|
||||
}
|
||||
|
||||
pub fn setPassword(self: *Anchor, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setPassword(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getType(self: *Anchor) []const u8 {
|
||||
return self.asElement().getAttributeSafe(comptime .wrap("type")) orelse "";
|
||||
}
|
||||
@@ -221,6 +243,8 @@ pub const JsApi = struct {
|
||||
pub const protocol = bridge.accessor(Anchor.getProtocol, Anchor.setProtocol, .{ .ce_reactions = true });
|
||||
pub const host = bridge.accessor(Anchor.getHost, Anchor.setHost, .{ .ce_reactions = true });
|
||||
pub const hostname = bridge.accessor(Anchor.getHostname, Anchor.setHostname, .{ .ce_reactions = true });
|
||||
pub const username = bridge.accessor(Anchor.getUsername, Anchor.setUsername, .{ .ce_reactions = true });
|
||||
pub const password = bridge.accessor(Anchor.getPassword, Anchor.setPassword, .{ .ce_reactions = true });
|
||||
pub const port = bridge.accessor(Anchor.getPort, Anchor.setPort, .{ .ce_reactions = true });
|
||||
pub const pathname = bridge.accessor(Anchor.getPathname, Anchor.setPathname, .{ .ce_reactions = true });
|
||||
pub const search = bridge.accessor(Anchor.getSearch, Anchor.setSearch, .{ .ce_reactions = true });
|
||||
|
||||
@@ -1,4 +1,23 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const js = @import("../../../js/js.zig");
|
||||
const Frame = @import("../../../Frame.zig");
|
||||
|
||||
const URL = @import("../../../URL.zig");
|
||||
const Node = @import("../../Node.zig");
|
||||
const Element = @import("../../Element.zig");
|
||||
const HtmlElement = @import("../Html.zig");
|
||||
@@ -14,6 +33,155 @@ pub fn asNode(self: *Area) *Node {
|
||||
return self.asElement().asNode();
|
||||
}
|
||||
|
||||
pub fn getHref(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return "";
|
||||
if (href.len == 0) {
|
||||
return "";
|
||||
}
|
||||
return self.asNode().resolveURL(href, frame, .{});
|
||||
}
|
||||
|
||||
pub fn setHref(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
try self.asElement().setAttributeSafe(comptime .wrap("href"), .wrap(value), frame);
|
||||
}
|
||||
|
||||
pub fn getOrigin(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return (try URL.getOrigin(frame.call_arena, href)) orelse "null";
|
||||
}
|
||||
|
||||
pub fn getHost(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
const host = URL.getHost(href);
|
||||
const protocol = URL.getProtocol(href);
|
||||
const port = URL.getPort(href);
|
||||
|
||||
// Strip default ports
|
||||
if (port.len > 0) {
|
||||
if ((std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port, "443")) or
|
||||
(std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port, "80")))
|
||||
{
|
||||
return URL.getHostname(href);
|
||||
}
|
||||
}
|
||||
|
||||
return host;
|
||||
}
|
||||
|
||||
pub fn setHost(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setHost(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getHostname(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getHostname(href);
|
||||
}
|
||||
|
||||
pub fn setHostname(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setHostname(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getUsername(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getUsername(href);
|
||||
}
|
||||
|
||||
pub fn setUsername(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setUsername(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getPassword(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getPassword(href);
|
||||
}
|
||||
|
||||
pub fn setPassword(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setPassword(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getPort(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
const port = URL.getPort(href);
|
||||
const protocol = URL.getProtocol(href);
|
||||
|
||||
// Return empty string for default ports
|
||||
if (port.len > 0) {
|
||||
if ((std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port, "443")) or
|
||||
(std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port, "80")))
|
||||
{
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return port;
|
||||
}
|
||||
|
||||
pub fn setPort(self: *Area, value: ?[]const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setPort(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getSearch(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getSearch(href);
|
||||
}
|
||||
|
||||
pub fn setSearch(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setSearch(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getHash(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getHash(href);
|
||||
}
|
||||
|
||||
pub fn setHash(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setHash(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getPathname(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getPathname(href);
|
||||
}
|
||||
|
||||
pub fn setPathname(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setPathname(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
pub fn getProtocol(self: *Area, frame: *Frame) ![]const u8 {
|
||||
const href = try getResolvedHref(self, frame) orelse return "";
|
||||
return URL.getProtocol(href);
|
||||
}
|
||||
|
||||
pub fn setProtocol(self: *Area, value: []const u8, frame: *Frame) !void {
|
||||
const href = try getResolvedHref(self, frame) orelse return;
|
||||
const new_href = try URL.setProtocol(href, value, frame.call_arena);
|
||||
try setHref(self, new_href, frame);
|
||||
}
|
||||
|
||||
fn getResolvedHref(self: *Area, frame: *Frame) !?[:0]const u8 {
|
||||
const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return null;
|
||||
if (href.len == 0) {
|
||||
return null;
|
||||
}
|
||||
return try self.asNode().resolveURL(href, frame, .{});
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(Area);
|
||||
|
||||
@@ -22,4 +190,17 @@ pub const JsApi = struct {
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
};
|
||||
|
||||
pub const href = bridge.accessor(Area.getHref, Area.setHref, .{ .ce_reactions = true });
|
||||
pub const origin = bridge.accessor(Area.getOrigin, null, .{});
|
||||
pub const protocol = bridge.accessor(Area.getProtocol, Area.setProtocol, .{ .ce_reactions = true });
|
||||
pub const host = bridge.accessor(Area.getHost, Area.setHost, .{ .ce_reactions = true });
|
||||
pub const hostname = bridge.accessor(Area.getHostname, Area.setHostname, .{ .ce_reactions = true });
|
||||
pub const username = bridge.accessor(Area.getUsername, Area.setUsername, .{ .ce_reactions = true });
|
||||
pub const password = bridge.accessor(Area.getPassword, Area.setPassword, .{ .ce_reactions = true });
|
||||
pub const port = bridge.accessor(Area.getPort, Area.setPort, .{ .ce_reactions = true });
|
||||
pub const pathname = bridge.accessor(Area.getPathname, Area.setPathname, .{ .ce_reactions = true });
|
||||
pub const search = bridge.accessor(Area.getSearch, Area.setSearch, .{ .ce_reactions = true });
|
||||
pub const hash = bridge.accessor(Area.getHash, Area.setHash, .{ .ce_reactions = true });
|
||||
pub const toString = bridge.function(Area.getHref, .{});
|
||||
};
|
||||
|
||||
250
src/html5ever/Cargo.lock
generated
250
src/html5ever/Cargo.lock
generated
@@ -30,6 +30,17 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "displaydoc"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.35"
|
||||
@@ -61,6 +72,109 @@ dependencies = [
|
||||
"markup5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_collections"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"potential_utf",
|
||||
"utf8_iter",
|
||||
"yoke",
|
||||
"zerofrom",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_locale_core"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"litemap",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_normalizer"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
|
||||
dependencies = [
|
||||
"icu_collections",
|
||||
"icu_normalizer_data",
|
||||
"icu_properties",
|
||||
"icu_provider",
|
||||
"smallvec",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_normalizer_data"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
|
||||
|
||||
[[package]]
|
||||
name = "icu_properties"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
|
||||
dependencies = [
|
||||
"icu_collections",
|
||||
"icu_locale_core",
|
||||
"icu_properties_data",
|
||||
"icu_provider",
|
||||
"zerotrie",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_properties_data"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
|
||||
|
||||
[[package]]
|
||||
name = "icu_provider"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"icu_locale_core",
|
||||
"writeable",
|
||||
"yoke",
|
||||
"zerofrom",
|
||||
"zerotrie",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
|
||||
dependencies = [
|
||||
"idna_adapter",
|
||||
"smallvec",
|
||||
"utf8_iter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna_adapter"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
|
||||
dependencies = [
|
||||
"icu_normalizer",
|
||||
"icu_properties",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.172"
|
||||
@@ -73,6 +187,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"encoding_rs",
|
||||
"html5ever",
|
||||
"idna",
|
||||
"string_cache",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
@@ -80,6 +195,12 @@ dependencies = [
|
||||
"xml5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "litemap"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.13"
|
||||
@@ -181,6 +302,15 @@ dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "potential_utf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
|
||||
dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
@@ -198,9 +328,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.40"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@@ -258,6 +388,12 @@ version = "1.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.9.0"
|
||||
@@ -294,6 +430,17 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.5.0"
|
||||
@@ -335,6 +482,16 @@ dependencies = [
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinystr"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-arena"
|
||||
version = "2.0.2"
|
||||
@@ -353,6 +510,12 @@ version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
|
||||
|
||||
[[package]]
|
||||
name = "web_atoms"
|
||||
version = "0.2.3"
|
||||
@@ -429,6 +592,12 @@ version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "writeable"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
|
||||
|
||||
[[package]]
|
||||
name = "xml5ever"
|
||||
version = "0.39.0"
|
||||
@@ -438,3 +607,80 @@ dependencies = [
|
||||
"log",
|
||||
"markup5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
"yoke-derive",
|
||||
"zerofrom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke-derive"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
|
||||
dependencies = [
|
||||
"zerofrom-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom-derive"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerotrie"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"yoke",
|
||||
"zerofrom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerovec"
|
||||
version = "0.11.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
|
||||
dependencies = [
|
||||
"yoke",
|
||||
"zerofrom",
|
||||
"zerovec-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerovec-derive"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
@@ -16,6 +16,7 @@ tikv-jemallocator = {version = "0.6.1", features = ["stats"]}
|
||||
tikv-jemalloc-ctl = {version = "0.6.1", features = ["stats"]}
|
||||
xml5ever = "0.39.0"
|
||||
encoding_rs = "0.8"
|
||||
idna = "1.1.0"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
mod sink;
|
||||
mod types;
|
||||
mod url;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[global_allocator]
|
||||
@@ -159,8 +160,7 @@ pub extern "C" fn html5ever_parse_document_with_encoding(
|
||||
};
|
||||
|
||||
// Parse directly from decoded string
|
||||
parse_document(sink, Default::default())
|
||||
.one(StrTendril::from(decoded.as_ref()));
|
||||
parse_document(sink, Default::default()).one(StrTendril::from(decoded.as_ref()));
|
||||
}
|
||||
|
||||
// === Encoding API for TextDecoder ===
|
||||
@@ -180,10 +180,7 @@ pub struct EncodingInfo {
|
||||
|
||||
/// Look up an encoding by its label (case-insensitive, whitespace-trimmed)
|
||||
#[no_mangle]
|
||||
pub extern "C" fn encoding_for_label(
|
||||
label: *const c_uchar,
|
||||
label_len: usize,
|
||||
) -> EncodingInfo {
|
||||
pub extern "C" fn encoding_for_label(label: *const c_uchar, label_len: usize) -> EncodingInfo {
|
||||
if label.is_null() || label_len == 0 {
|
||||
return EncodingInfo {
|
||||
found: 0,
|
||||
|
||||
83
src/html5ever/url.rs
Normal file
83
src/html5ever/url.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// WHATWG "domain to ASCII" backed by the `idna` crate (UTS#46, the same engine
|
||||
// rust-url/Servo use). Pairs with src/sys/idna.zig. Replaced libidn2, whose
|
||||
// IDNA-2008 behavior diverged from the spec. Value-in / value-out: a UTF-8
|
||||
// host string becomes its punycode form, or an error.
|
||||
|
||||
use std::os::raw::c_uchar;
|
||||
use std::slice;
|
||||
|
||||
fn str_from(ptr: *const c_uchar, len: usize) -> Option<&'static str> {
|
||||
// Zig hands empty slices a non-null but dangling pointer, so length must
|
||||
// be checked before forming a slice from raw parts.
|
||||
if ptr.is_null() || len == 0 {
|
||||
return Some("");
|
||||
}
|
||||
let bytes = unsafe { slice::from_raw_parts(ptr, len) };
|
||||
std::str::from_utf8(bytes).ok()
|
||||
}
|
||||
|
||||
// Catch any panic from the IDNA code so it never unwinds across the extern "C"
|
||||
// boundary and aborts the whole process; a panic becomes error code 1.
|
||||
fn ffi_guard<F: FnOnce() -> i32>(f: F) -> i32 {
|
||||
std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)).unwrap_or(1)
|
||||
}
|
||||
|
||||
/// WHATWG "domain to ASCII" (UTS#46, non-transitional, beStrict=false). Writes
|
||||
/// a NUL-terminated owned buffer to *out_ptr / *out_len (caller frees with
|
||||
/// lpurl_free). Returns 0 on success, 1 if `host` is not a valid domain.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn lpurl_domain_to_ascii(
|
||||
host_ptr: *const c_uchar,
|
||||
host_len: usize,
|
||||
out_ptr: *mut *mut c_uchar,
|
||||
out_len: *mut usize,
|
||||
) -> i32 {
|
||||
ffi_guard(move || {
|
||||
let host = match str_from(host_ptr, host_len) {
|
||||
Some(s) => s,
|
||||
None => return 1,
|
||||
};
|
||||
let ascii = match idna::domain_to_ascii(host) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return 1,
|
||||
};
|
||||
let len = ascii.len();
|
||||
let mut bytes = ascii.into_bytes();
|
||||
bytes.push(0);
|
||||
let boxed = bytes.into_boxed_slice();
|
||||
unsafe {
|
||||
*out_ptr = Box::into_raw(boxed) as *mut c_uchar;
|
||||
*out_len = len;
|
||||
}
|
||||
0
|
||||
})
|
||||
}
|
||||
|
||||
/// Free a NUL-terminated buffer handed out by lpurl_domain_to_ascii.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn lpurl_free(ptr: *mut c_uchar, len: usize) {
|
||||
if ptr.is_null() {
|
||||
return;
|
||||
}
|
||||
// The buffer included a NUL terminator, so its length is len + 1 and its
|
||||
// capacity matches exactly (it was a boxed slice).
|
||||
unsafe {
|
||||
let slice = std::ptr::slice_from_raw_parts_mut(ptr, len + 1);
|
||||
drop(Box::from_raw(slice));
|
||||
}
|
||||
}
|
||||
@@ -15,12 +15,19 @@
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const c = @cImport({
|
||||
@cInclude("idn2.h");
|
||||
});
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
pub const Error = error{Idna} || Allocator.Error;
|
||||
|
||||
// WHATWG "domain to ASCII" lives in the rust-url FFI (src/html5ever/url.rs),
|
||||
// which uses the UTS#46-conformant `idna` crate — the same engine rust-url
|
||||
// itself uses.
|
||||
extern "c" fn lpurl_domain_to_ascii(
|
||||
host_ptr: [*]const u8,
|
||||
host_len: usize,
|
||||
out_ptr: *?[*]u8,
|
||||
out_len: *usize,
|
||||
) i32;
|
||||
|
||||
extern "c" fn lpurl_free(ptr: ?[*]u8, len: usize) void;
|
||||
|
||||
/// True if `host` contains any non-ASCII byte and therefore needs IDNA
|
||||
/// processing. Pure-ASCII hostnames are returned unchanged by `toAscii`,
|
||||
@@ -35,21 +42,16 @@ pub fn needsAscii(host: []const u8) bool {
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 hostname to its ASCII (Punycode) form per UTS#46
|
||||
/// IDNA 2008 with non-transitional processing — the algorithm WHATWG URL
|
||||
/// invokes as "domain to ASCII". Returns an allocator-owned slice.
|
||||
pub fn toAscii(allocator: Allocator, host: []const u8) Error![]u8 {
|
||||
const host_z = try allocator.dupeZ(u8, host);
|
||||
defer allocator.free(host_z);
|
||||
|
||||
var out_ptr: [*c]u8 = undefined;
|
||||
const flags: c_int = c.IDN2_NFC_INPUT | c.IDN2_NONTRANSITIONAL;
|
||||
const rc = c.idn2_to_ascii_8z(host_z.ptr, &out_ptr, flags);
|
||||
if (rc != c.IDN2_OK) {
|
||||
/// non-transitional processing — the algorithm WHATWG URL invokes as
|
||||
/// "domain to ASCII". Returns an allocator-owned slice.
|
||||
pub fn toAscii(allocator: Allocator, host: []const u8) ![]u8 {
|
||||
var out_len: usize = 0;
|
||||
var out_ptr: ?[*]u8 = null;
|
||||
if (lpurl_domain_to_ascii(host.ptr, host.len, &out_ptr, &out_len) != 0) {
|
||||
return error.Idna;
|
||||
}
|
||||
defer c.idn2_free(out_ptr);
|
||||
|
||||
return try allocator.dupe(u8, std.mem.span(@as([*:0]const u8, @ptrCast(out_ptr))));
|
||||
defer lpurl_free(out_ptr, out_len);
|
||||
return allocator.dupe(u8, out_ptr.?[0..out_len]);
|
||||
}
|
||||
|
||||
const testing = @import("../testing.zig");
|
||||
@@ -74,3 +76,39 @@ test "idna: German sharp s with non-transitional processing" {
|
||||
defer testing.allocator.free(out);
|
||||
try testing.expectString("xn--fa-hia.de", out);
|
||||
}
|
||||
|
||||
test "idna: needsAscii" {
|
||||
try testing.expectEqual(false, needsAscii(""));
|
||||
try testing.expectEqual(false, needsAscii("xn--fa-hia.de"));
|
||||
try testing.expectEqual(true, needsAscii("faß.de"));
|
||||
try testing.expectEqual(true, needsAscii("\xff"));
|
||||
}
|
||||
|
||||
test "idna: UTS#46 lowercases ASCII" {
|
||||
const out = try toAscii(testing.allocator, "EXAMPLE.COM");
|
||||
defer testing.allocator.free(out);
|
||||
try testing.expectString("example.com", out);
|
||||
}
|
||||
|
||||
test "idna: already-punycode is idempotent" {
|
||||
const out = try toAscii(testing.allocator, "xn--rksmrgs-5wao1o.se");
|
||||
defer testing.allocator.free(out);
|
||||
try testing.expectString("xn--rksmrgs-5wao1o.se", out);
|
||||
}
|
||||
|
||||
test "idna: mixed ASCII and non-ASCII labels" {
|
||||
const out = try toAscii(testing.allocator, "münchen.example.com");
|
||||
defer testing.allocator.free(out);
|
||||
try testing.expectString("xn--mnchen-3ya.example.com", out);
|
||||
}
|
||||
|
||||
test "idna: multi-label CJK" {
|
||||
const out = try toAscii(testing.allocator, "日本.jp");
|
||||
defer testing.allocator.free(out);
|
||||
try testing.expectString("xn--wgv71a.jp", out);
|
||||
}
|
||||
|
||||
test "idna: invalid domain returns error" {
|
||||
// U+FFFD (REPLACEMENT CHARACTER) is disallowed under UTS#46.
|
||||
try testing.expectError(error.Idna, toAscii(testing.allocator, "\u{FFFD}.com"));
|
||||
}
|
||||
|
||||
1915
vendor/libidn2/config.h
vendored
1915
vendor/libidn2/config.h
vendored
File diff suppressed because it is too large
Load Diff
20
vendor/libidn2/darwin/strchrnul.c
vendored
20
vendor/libidn2/darwin/strchrnul.c
vendored
@@ -1,20 +0,0 @@
|
||||
/* Darwin-only strchrnul shim for libidn2.
|
||||
|
||||
strchrnul is a glibc extension. macOS libc lacks it before 15.4, and
|
||||
libidn2's lib/lookup.c never includes <string.h> — so even on newer
|
||||
macOS the declaration would not reach the call site. The matching
|
||||
prototype is declared next to the strverscmp shim in
|
||||
vendor/libidn2/config.h (within the _LIBIDN2_LP_DECLS block, gated on
|
||||
__APPLE__), so callers compile; this file provides the symbol so
|
||||
they link.
|
||||
|
||||
gnulib's strchrnul.c falls through to rawmemchr() when the search byte
|
||||
is NUL — also a glibc extension. libidn2 only ever searches for '.', so
|
||||
a straight byte scan is enough and avoids dragging in a second shim. */
|
||||
|
||||
char *strchrnul(const char *s, int c_in) {
|
||||
const unsigned char c = (unsigned char) c_in;
|
||||
const unsigned char *p = (const unsigned char *) s;
|
||||
while (*p && *p != c) p++;
|
||||
return (char *) p;
|
||||
}
|
||||
Reference in New Issue
Block a user