Merge branch 'main' into agent

This commit is contained in:
Adrià Arrufat
2026-06-01 12:36:08 +02:00
32 changed files with 1603 additions and 2357 deletions

180
build.zig
View File

@@ -229,6 +229,7 @@ fn linkHtml5Ever(b: *Build, mod: *Build.Module) !void {
"src/html5ever/lib.rs",
"src/html5ever/sink.rs",
"src/html5ever/types.rs",
"src/html5ever/url.rs",
}) |path| {
exec_cargo.addFileInput(b.path(path));
}
@@ -312,13 +313,6 @@ fn linkCurl(b: *Build, mod: *Build.Module, is_tsan: bool) !void {
const boringssl = buildBoringSsl(b, target, mod.optimize.?);
for (boringssl) |lib| curl.root_module.linkLibrary(lib);
const libidn2 = buildLibidn2(b, target, mod.optimize.?, is_tsan);
curl.root_module.linkLibrary(libidn2);
// Also expose libidn2 to the consuming module so src/sys/idna.zig's
// @cImport of <idn2.h> resolves. Without this, lightpanda_module only
// sees idn2.h transitively if a system libidn2 happens to be installed.
mod.linkLibrary(libidn2);
switch (target.result.os.tag) {
.macos => {
// needed for proxying on mac
@@ -473,168 +467,6 @@ fn buildNghttp2(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.O
return lib;
}
fn buildLibidn2(
b: *Build,
target: Build.ResolvedTarget,
optimize: std.builtin.OptimizeMode,
is_tsan: bool,
) *Build.Step.Compile {
const dep = b.dependency("libidn2", .{});
const os = target.result.os.tag;
const is_darwin = os.isDarwin();
const mod = b.createModule(.{
.target = target,
.optimize = optimize,
.link_libc = true,
.sanitize_thread = is_tsan,
});
// libidn2's autoconf+gnulib stack expects a config.h with hundreds of
// HAVE_*/_GL_ATTRIBUTE_* defines — including ~800 lines of attribute-
// detection macros emitted from gnulib-common.m4 via AH_VERBATIM. We
// vendor a single autoconf-generated config.h rather than try to
// reproduce that machinery in the Zig build system.
mod.addIncludePath(b.path("vendor/libidn2"));
// Substitute the gnulib-style .in.h templates. All @VAR@ in them are
// either DLL-visibility markers (empty for static POSIX) or
// HAVE_UNISTRING_WOE32DLL_H (0).
inline for (.{ "unitypes", "unistr", "uniconv", "unictype", "uninorm" }) |name| {
mod.addConfigHeader(renderUnistringHeader(b, dep, name));
}
mod.addIncludePath(dep.path("lib"));
mod.addIncludePath(dep.path("unistring"));
// gl/ holds gnulib helpers — only malloca and version-etc headers are
// referenced from the sources we compile; we don't need the full gl/ shim
// layer (system header replacements).
mod.addIncludePath(dep.path("gl"));
const lib = b.addLibrary(.{ .name = "idn2", .root_module = mod });
lib.installHeader(dep.path("lib/idn2.h"), "idn2.h");
if (is_darwin) {
// unistring's striconveh.c calls real iconv_*, which on macOS lives
// in libiconv (separate from libSystem). On glibc Linux iconv is in
// libc itself; on musl it would also need a separate -liconv.
mod.linkSystemLibrary("iconv", .{});
// libidn2's lib/lookup.c calls strchrnul() without including
// <string.h>; the prototype is declared in vendor/libidn2/config.h
// alongside the existing strverscmp shim. macOS libc lacked the
// symbol entirely before 15.4 — provide it here so the link
// succeeds. Mirrors how gl/strverscmp.c is wired up below.
lib.addCSourceFile(.{
.file = b.path("vendor/libidn2/darwin/strchrnul.c"),
.flags = &.{},
});
}
lib.addCSourceFiles(.{
.root = dep.path("lib"),
.flags = &.{ "-DHAVE_CONFIG_H", "-DIDN2_STATIC" },
.files = &.{
"bidi.c", "context.c", "data.c", "decode.c",
"error.c", "free.c", "idna.c", "lookup.c",
"punycode.c", "register.c", "tables.c", "tr46map.c",
"version.c",
},
});
lib.addCSourceFiles(.{
.root = dep.path("gl"),
.flags = &.{"-DHAVE_CONFIG_H"},
// malloca.c provides striconveha's stack-or-heap allocator; strverscmp
// is a glibc extension absent on macOS that lib/version.c needs.
.files = &.{ "malloca.c", "strverscmp.c" },
});
lib.addCSourceFiles(.{
.root = dep.path("unistring"),
.flags = &.{"-DHAVE_CONFIG_H"},
.files = &.{
"c-ctype.c", "c-strcasecmp.c", "c-strncasecmp.c",
"free.c", "iconv.c", "iconv_close.c",
"iconv_open.c", "localcharset.c", "stdlib.c",
"striconveh.c", "striconveha.c", "unistd.c",
"uniconv/u8-conv-from-enc.c", "uniconv/u8-strconv-from-enc.c", "uniconv/u8-strconv-from-locale.c",
"uniconv/u8-strconv-to-enc.c", "uniconv/u8-strconv-to-locale.c", "unictype/bidi_of.c",
"unictype/categ_M.c", "unictype/categ_none.c", "unictype/categ_of.c",
"unictype/categ_test.c", "unictype/combiningclass.c", "unictype/joiningtype_of.c",
"unictype/scripts.c", "uninorm/canonical-decomposition.c", "uninorm/composition.c",
"uninorm/decompose-internal.c", "uninorm/decomposition-table.c", "uninorm/nfc.c",
"uninorm/nfd.c", "uninorm/u32-normalize.c", "unistr/u32-cmp.c",
"unistr/u32-cpy-alloc.c", "unistr/u32-cpy.c", "unistr/u32-mbtouc-unsafe.c",
"unistr/u32-strlen.c", "unistr/u32-to-u8.c", "unistr/u32-uctomb.c",
"unistr/u8-check.c", "unistr/u8-mblen.c", "unistr/u8-mbtouc.c",
"unistr/u8-mbtouc-aux.c", "unistr/u8-mbtouc-unsafe.c", "unistr/u8-mbtouc-unsafe-aux.c",
"unistr/u8-mbtoucr.c", "unistr/u8-prev.c", "unistr/u8-strlen.c",
"unistr/u8-to-u32.c", "unistr/u8-uctomb.c", "unistr/u8-uctomb-aux.c",
},
});
return lib;
}
/// Process one of unistring's `.in.h` template headers into a real `.h`.
/// All `@VAR@` substitutions in these headers are either DLL-visibility markers
/// (empty for static POSIX builds) or `HAVE_UNISTRING_WOE32DLL_H` (0).
fn renderUnistringHeader(b: *Build, dep: *Build.Dependency, name: []const u8) *Build.Step.ConfigHeader {
const in_rel = b.fmt("unistring/{s}.in.h", .{name});
const out_name = b.fmt("{s}.h", .{name});
const lazy = dep.path(in_rel);
const path = lazy.getPath3(b, null);
const file = path.root_dir.handle.openFile(path.sub_path, .{}) catch |e| {
std.debug.panic("openFile {s}: {s}", .{ path.sub_path, @errorName(e) });
};
defer file.close();
const contents = file.readToEndAlloc(b.allocator, 4 << 20) catch @panic("OOM");
const ch = b.addConfigHeader(.{
.include_path = out_name,
.style = .{ .autoconf_at = lazy },
}, .{});
var seen = std.StringHashMap(void).init(b.allocator);
var i: usize = 0;
while (std.mem.indexOfScalarPos(u8, contents, i, '@')) |s| {
const a = s + 1;
const e = std.mem.indexOfScalarPos(u8, contents, a, '@') orelse break;
const var_name = contents[a..e];
if (!isAtConfigName(var_name)) {
// Stray '@' (e.g. an email address in a comment); advance past it
// alone so we don't mis-pair with a later '@'.
i = s + 1;
continue;
}
const owned = b.allocator.dupe(u8, var_name) catch @panic("OOM");
const gop = seen.getOrPut(owned) catch @panic("OOM");
if (!gop.found_existing) {
if (std.mem.eql(u8, var_name, "HAVE_UNISTRING_WOE32DLL_H")) {
ch.addValue(owned, c_int, 0);
} else {
ch.addValue(owned, []const u8, "");
}
}
i = e + 1;
}
return ch;
}
fn isAtConfigName(s: []const u8) bool {
if (s.len == 0) return false;
for (s, 0..) |c, idx| {
const ok = switch (c) {
'A'...'Z', '_' => true,
'0'...'9' => idx > 0,
else => false,
};
if (!ok) return false;
}
return true;
}
fn buildCurl(
b: *Build,
target: Build.ResolvedTarget,
@@ -711,11 +543,11 @@ fn buildCurl(
._FILE_OFFSET_BITS = 64,
.USE_IPV6 = true,
// Route IDN hostnames through libidn2 (vendored, see buildLibidn2).
// Without this, libcurl ships UTF-8 host bytes to SNI/cert validation
// and breaks for non-ASCII hostnames like räksmörgås.se.
.HAVE_LIBIDN2 = true,
.HAVE_IDN2_H = true,
// IDN is handled before libcurl (HttpClient calls URL.ensureHostAscii,
// backed by rust-url), so libcurl always receives an ASCII host and
// does not link libidn2.
.HAVE_LIBIDN2 = false,
.HAVE_IDN2_H = false,
.CURL_OS = switch (os) {
.linux => if (is_android) "\"android\"" else "\"linux\"",
else => std.fmt.allocPrint(b.allocator, "\"{s}\"", .{@tagName(os)}) catch @panic("OOM"),

View File

@@ -34,10 +34,6 @@
.url = "https://github.com/allyourcodebase/sqlite3/archive/8f840560eae88ab66668c6827c64ffbd0d74ef37.tar.gz",
.hash = "sqlite3-3.51.0-DMxLWssOAABZ8cAvU_LfBIbp0kZjm824PU8sSLXpEDdr",
},
.libidn2 = .{
.url = "https://ftp.gnu.org/gnu/libidn/libidn2-2.3.8.tar.gz",
.hash = "N-V-__8AABGOuAC_dhAN07kfoP4dycCFi8Bka4O-tuhriNH8",
},
.zenai = .{
.url = "git+https://github.com/lightpanda-io/zenai.git#120f5fd2a2d29779fbc44584aaa472ad83c6a153",
.hash = "zenai-0.0.0-iOY_VP_EAwBATbpytTro7tmzT1aASeBvKPPvJEO2tEzB",

View File

@@ -560,8 +560,14 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
self.origin = try URL.getOrigin(self.arena, request_url[5.. :0]);
} else if (self.parent) |parent| {
self.origin = parent.origin;
if (is_about_blank) {
self.base_url = parent.base();
}
} else if (self.window._opener) |opener| {
self.origin = opener._frame.origin;
if (is_about_blank) {
self.base_url = opener._frame.base();
}
} else {
self.origin = null;
}

535
src/browser/ImportMap.zig Normal file
View File

@@ -0,0 +1,535 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// Parsed <script type="importmap"> content. Stored on the frame's
// ScriptManager and used by `resolveSpecifier` to map module specifiers
// to URLs per https://html.spec.whatwg.org/multipage/webappapis.html#import-maps
const std = @import("std");
const lp = @import("lightpanda");
const URL = @import("URL.zig");
const log = lp.log;
const Allocator = std.mem.Allocator;
const SpecifierMap = std.json.ArrayHashMap(?[]const u8);
const ImportMap = @This();
/// Sorted by specifier length descending so the longest match wins.
imports: []const Entry = &.{},
/// Sorted by prefix length descending.
scopes: []const Scope = &.{},
const Entry = struct {
specifier: []const u8,
resolved: ?[:0]const u8,
};
const Scope = struct {
prefix: []const u8,
imports: []const Entry,
};
pub const empty: ImportMap = .{};
/// Parse `json_content` and merge it into `self`. Multiple <script type="importmap">
/// elements on a page combine with first-wins semantics: any specifier already
/// defined in `self` keeps its existing resolution, and existing scopes absorb
/// only the new keys from a same-prefix incoming scope.
pub fn merge(self: *ImportMap, arena: Allocator, base: [:0]const u8, json_content: []const u8) !void {
const incoming = try parse(arena, base, json_content);
self.imports = try mergeEntries(arena, self.imports, incoming.imports);
self.scopes = try mergeScopes(arena, self.scopes, incoming.scopes);
}
fn mergeEntries(arena: Allocator, existing: []const Entry, incoming: []const Entry) ![]const Entry {
if (incoming.len == 0) {
return existing;
}
var list: std.ArrayList(Entry) = try .initCapacity(arena, existing.len + incoming.len);
list.appendSliceAssumeCapacity(existing);
for (incoming) |new_entry| {
if (findEntry(existing, new_entry.specifier) != null) {
continue;
}
list.appendAssumeCapacity(new_entry);
}
std.sort.pdq(Entry, list.items, {}, struct {
fn lessThan(_: void, a: Entry, b: Entry) bool {
return a.specifier.len > b.specifier.len;
}
}.lessThan);
return list.items;
}
fn findEntry(entries: []const Entry, specifier: []const u8) ?usize {
for (entries, 0..) |e, i| {
if (std.mem.eql(u8, e.specifier, specifier)) return i;
}
return null;
}
fn mergeScopes(arena: Allocator, existing: []const Scope, incoming: []const Scope) ![]const Scope {
if (incoming.len == 0) {
return existing;
}
var list: std.ArrayList(Scope) = try .initCapacity(arena, existing.len + incoming.len);
// Existing scopes: if the incoming map has the same prefix, merge the
// inner imports (existing entries win); otherwise carry through unchanged.
for (existing) |ex| {
if (findScope(incoming, ex.prefix)) |inc| {
list.appendAssumeCapacity(.{
.prefix = ex.prefix,
.imports = try mergeEntries(arena, ex.imports, inc.imports),
});
} else {
list.appendAssumeCapacity(ex);
}
}
// Incoming scopes with prefixes the existing map didn't have.
for (incoming) |inc| {
if (findScope(existing, inc.prefix) == null) {
list.appendAssumeCapacity(inc);
}
}
std.sort.pdq(Scope, list.items, {}, struct {
fn lessThan(_: void, a: Scope, b: Scope) bool {
return a.prefix.len > b.prefix.len;
}
}.lessThan);
return list.items;
}
fn findScope(scopes: []const Scope, prefix: []const u8) ?Scope {
for (scopes) |s| {
if (std.mem.eql(u8, s.prefix, prefix)) {
return s;
}
}
return null;
}
fn parse(arena: Allocator, base: [:0]const u8, json_content: []const u8) !ImportMap {
const parsed = std.json.parseFromSliceLeaky(struct {
imports: ?SpecifierMap = null,
scopes: ?std.json.ArrayHashMap(SpecifierMap) = null,
}, arena, json_content, .{ .ignore_unknown_fields = true }) catch |err| {
log.warn(.js, "importmap json parse", .{ .err = err });
return error.InvalidImportMap;
};
var im: ImportMap = .{};
if (parsed.imports) |obj| {
im.imports = try sortedNormalizedSpecifierMap(arena, base, obj);
}
if (parsed.scopes) |obj| {
im.scopes = try sortedNormalizedScopes(arena, base, obj);
}
return im;
}
fn sortedNormalizedSpecifierMap(arena: Allocator, base: [:0]const u8, obj: SpecifierMap) ![]const Entry {
const map = obj.map; // the JSON object is a thin wrapper over an ArrayHashMap
var list: std.ArrayList(Entry) = try .initCapacity(arena, map.count());
var it = map.iterator();
while (it.next()) |kv| {
const key = kv.key_ptr.*;
const normalized_key = (try normalizeSpecifierKey(arena, base, key)) orelse continue;
// we specifically track null so that, on match, we return an error
// rather than falling back to the next possible match.
const resolved: ?[:0]const u8 = blk: {
const url = kv.value_ptr.* orelse break :blk null;
const resolved_url = parseIfLikeURL(arena, base, url) orelse {
log.warn(.js, "importmap bad address", .{ .specifier = key, .address = url });
break :blk null;
};
// Spec: if the key ends with "/" the address must end with "/" too.
if (endsWithSlash(normalized_key) and !endsWithSlash(resolved_url)) {
log.warn(.js, "importmap slash mismatch", .{ .specifier = key, .address = url });
break :blk null;
}
break :blk resolved_url;
};
list.appendAssumeCapacity(.{ .specifier = normalized_key, .resolved = resolved });
}
std.sort.pdq(Entry, list.items, {}, struct {
fn lessThan(_: void, a: Entry, b: Entry) bool {
return a.specifier.len > b.specifier.len;
}
}.lessThan);
return list.items;
}
fn sortedNormalizedScopes(arena: Allocator, base: [:0]const u8, obj: std.json.ArrayHashMap(SpecifierMap)) ![]const Scope {
const map = obj.map; // the JSON object is a thin wrapper over an ArrayHashMap
var list: std.ArrayList(Scope) = try .initCapacity(arena, map.count());
var it = map.iterator();
while (it.next()) |kv| {
const scope_key = kv.key_ptr.*;
// Scope keys parse as ordinary URLs (relative against base), not as
// URL-like specifiers — bare strings without ./, ../, /, or a scheme
// are still allowed if they resolve against the base.
const prefix = parseScopeKey(arena, base, scope_key) catch |err| {
log.warn(.js, "importmap bad scope key", .{ .scope = scope_key, .err = err });
continue;
};
list.appendAssumeCapacity(.{
.prefix = prefix,
.imports = try sortedNormalizedSpecifierMap(arena, base, kv.value_ptr.*),
});
}
std.sort.pdq(Scope, list.items, {}, struct {
fn lessThan(_: void, a: Scope, b: Scope) bool {
return a.prefix.len > b.prefix.len;
}
}.lessThan);
return list.items;
}
fn normalizeSpecifierKey(arena: Allocator, base: [:0]const u8, key: []const u8) !?[]const u8 {
if (key.len == 0) {
return null;
}
if (parseIfLikeURL(arena, base, key)) |url| {
return url;
}
return try arena.dupe(u8, key);
}
fn parseScopeKey(arena: Allocator, base: [:0]const u8, key: []const u8) ![]const u8 {
if (key.len == 0) {
return base;
}
return URL.resolve(arena, base, key, .{ .always_dupe = true, .encoding = "UTF-8" });
}
/// Returns the parsed URL if `specifier` looks like a URL. Else returns null;
fn parseIfLikeURL(arena: Allocator, base: [:0]const u8, specifier: []const u8) ?[:0]const u8 {
if (specifier.len == 0) {
return null;
}
if (specifier[0] == '/' or
std.mem.startsWith(u8, specifier, "./") or
std.mem.startsWith(u8, specifier, "../") or
hasScheme(specifier))
{
return URL.resolve(arena, base, specifier, .{ .always_dupe = true, .encoding = "UTF-8" }) catch return null;
}
return null;
}
fn hasScheme(s: []const u8) bool {
if (s.len == 0 or !std.ascii.isAlphabetic(s[0])) return false;
for (s[1..]) |c| {
if (c == ':') return true;
if (!std.ascii.isAlphanumeric(c) and c != '+' and c != '-' and c != '.') return false;
}
return false;
}
fn endsWithSlash(s: []const u8) bool {
return s.len > 0 and s[s.len - 1] == '/';
}
/// Returns the resolved URL on success. Returns `null` when the specifier is
/// bare and no entry matches — the caller decides whether that's an error.
pub fn resolve(
self: *const ImportMap,
arena: Allocator,
base: [:0]const u8,
specifier: [:0]const u8,
) !?[:0]const u8 {
const as_url = parseIfLikeURL(arena, base, specifier);
const normalized: []const u8 = if (as_url) |u| u else specifier;
for (self.scopes) |scope| {
if (scopeMatches(scope.prefix, base) == false) {
continue;
}
if (try resolveImportsMatch(arena, normalized, as_url, scope.imports)) |r| {
return r;
}
}
if (try resolveImportsMatch(arena, normalized, as_url, self.imports)) |r| {
return r;
}
return as_url;
}
fn scopeMatches(prefix: []const u8, base: []const u8) bool {
if (std.mem.eql(u8, prefix, base)) {
return true;
}
return endsWithSlash(prefix) and std.mem.startsWith(u8, base, prefix);
}
fn resolveImportsMatch(
arena: Allocator,
normalized: []const u8,
as_url: ?[:0]const u8,
imports: []const Entry,
) !?[:0]const u8 {
for (imports) |entry| {
if (std.mem.eql(u8, entry.specifier, normalized)) {
return entry.resolved orelse return error.SpecifierResolutionFailed;
}
if (endsWithSlash(entry.specifier) == false) {
continue;
}
if (!std.mem.startsWith(u8, normalized, entry.specifier)) {
continue;
}
// Per spec, trailing-slash prefix matching only applies when the
// specifier is bare or its scheme is "special" (http(s), ws(s),
// file, ftp). data:/blob:/about: don't match prefixes.
if (as_url) |u| {
if (isSpecialUrl(u) == false) {
continue;
}
}
const base_addr = entry.resolved orelse return error.SpecifierResolutionFailed;
const after = normalized[entry.specifier.len..];
const url = URL.resolve(arena, base_addr, after, .{ .always_dupe = true, .encoding = "UTF-8" }) catch {
return error.SpecifierResolutionFailed;
};
// Backtracking prevention — the resolved URL must remain under the
// address (`../` etc. is not allowed to escape).
if (!std.mem.startsWith(u8, url, base_addr)) {
return error.SpecifierResolutionFailed;
}
return url;
}
return null;
}
fn isSpecialUrl(url: []const u8) bool {
const colon = std.mem.indexOfScalarPos(u8, url, 0, ':') orelse return false;
const scheme = url[0..colon];
inline for (.{ "https", "http", "ws", "wss", "file", "ftp" }) |s| {
if (std.ascii.eqlIgnoreCase(scheme, s)) {
return true;
}
}
return false;
}
const testing = @import("../testing.zig");
test "ImportMap: exact match" {
defer testing.reset();
const im = try testParse(
\\{ "imports": { "moment": "/node_modules/moment/index.js" } }
, "https://example.com/app/index.html");
const r = try testResolve(&im, "https://example.com/app.mjs", "moment");
try testing.expectString("https://example.com/node_modules/moment/index.js", r.?);
}
test "ImportMap: trailing slash prefix match" {
defer testing.reset();
const im = try testParse(
\\{ "imports": { "moment/": "/node_modules/moment/src/" } }
, "https://example.com/app/index.html");
const r = try testResolve(&im, "https://example.com/app.mjs", "moment/foo");
try testing.expectString("https://example.com/node_modules/moment/src/foo", r.?);
}
test "ImportMap: specificity — longest match wins" {
defer testing.reset();
const im = try testParse(
\\{ "imports": {
\\ "a": "/1",
\\ "a/": "/2/",
\\ "a/b": "/3",
\\ "a/b/": "/4/"
\\} }
, "https://example.com/app/index.html");
const r1 = try testResolve(&im, "https://example.com/app.mjs", "a");
try testing.expectString("https://example.com/1", r1.?);
const r2 = try testResolve(&im, "https://example.com/app.mjs", "a/");
try testing.expectString("https://example.com/2/", r2.?);
const r3 = try testResolve(&im, "https://example.com/app.mjs", "a/x");
try testing.expectString("https://example.com/2/x", r3.?);
const r4 = try testResolve(&im, "https://example.com/app.mjs", "a/b");
try testing.expectString("https://example.com/3", r4.?);
const r5 = try testResolve(&im, "https://example.com/app.mjs", "a/b/");
try testing.expectString("https://example.com/4/", r5.?);
const r6 = try testResolve(&im, "https://example.com/app.mjs", "a/b/c");
try testing.expectString("https://example.com/4/c", r6.?);
}
test "ImportMap: scopes — most specific scope wins" {
defer testing.reset();
const im = try testParse(
\\{
\\ "imports": { "a": "/a-1.mjs", "b": "/b-1.mjs", "d": "/d-1.mjs" },
\\ "scopes": {
\\ "/scope2/": { "a": "/a-2.mjs", "d": "/d-2.mjs" },
\\ "/scope2/scope3/": { "b": "/b-3.mjs", "d": "/d-3.mjs" }
\\ }
\\}
, "https://example.com/app/index.html");
// From scope2/scope3 base
const a = try testResolve(&im, "https://example.com/scope2/scope3/foo.mjs", "a");
try testing.expectString("https://example.com/a-2.mjs", a.?);
const b = try testResolve(&im, "https://example.com/scope2/scope3/foo.mjs", "b");
try testing.expectString("https://example.com/b-3.mjs", b.?);
const d = try testResolve(&im, "https://example.com/scope2/scope3/foo.mjs", "d");
try testing.expectString("https://example.com/d-3.mjs", d.?);
// Falls back to scope2 for things not in scope3
const a2 = try testResolve(&im, "https://example.com/scope2/foo.mjs", "a");
try testing.expectString("https://example.com/a-2.mjs", a2.?);
const b2 = try testResolve(&im, "https://example.com/scope2/foo.mjs", "b");
try testing.expectString("https://example.com/b-1.mjs", b2.?);
}
test "ImportMap: bare specifier with no match returns null" {
defer testing.reset();
const im = try testParse(
\\{ "imports": { "moment": "/m.js" } }
, "https://example.com/app/index.html");
const r = try testResolve(&im, "https://example.com/app.mjs", "nope");
try testing.expectEqual(null, r);
}
test "ImportMap: URL-like specifier falls back to itself" {
defer testing.reset();
const im: ImportMap = .empty;
const r = try testResolve(&im, "https://example.com/app.mjs", "./foo.js");
try testing.expectString("https://example.com/foo.js", r.?);
}
test "ImportMap: null entry throws (no fallback)" {
defer testing.reset();
const im = try testParse(
\\{ "imports": { "blocked": null } }
, "https://example.com/app/index.html");
try testing.expectError(error.SpecifierResolutionFailed, testResolve(&im, "https://example.com/app.mjs", "blocked"));
}
test "ImportMap: backtracking out of prefix throws" {
defer testing.reset();
const im = try testParse(
\\{ "imports": { "moment/": "/node_modules/moment/src/" } }
, "https://example.com/app/index.html");
try testing.expectError(error.SpecifierResolutionFailed, testResolve(&im, "https://example.com/app.mjs", "moment/../backtrack"));
}
test "ImportMap: merge — first-wins on imports, new keys added" {
defer testing.reset();
const base: [:0]const u8 = "https://example.com/app/index.html";
var im = try testParse(
\\{ "imports": { "a": "/a-first.mjs", "b": "/b-first.mjs" } }
, base);
try im.merge(testing.arena_allocator, base,
\\{ "imports": { "a": "/a-second.mjs", "c": "/c-second.mjs" } }
);
// First-wins: "a" keeps the original mapping.
const a = try testResolve(&im, "https://example.com/app.mjs", "a");
try testing.expectString("https://example.com/a-first.mjs", a.?);
// New key from the second map shows up.
const c = try testResolve(&im, "https://example.com/app.mjs", "c");
try testing.expectString("https://example.com/c-second.mjs", c.?);
}
test "ImportMap: merge — same-prefix scopes merge their imports" {
defer testing.reset();
const base: [:0]const u8 = "https://example.com/app/index.html";
var im = try testParse(
\\{ "scopes": { "/s/": { "a": "/a-first.mjs" } } }
, base);
try im.merge(testing.arena_allocator, base,
\\{ "scopes": { "/s/": { "a": "/a-second.mjs", "b": "/b-second.mjs" }, "/t/": { "x": "/x.mjs" } } }
);
// "a" within /s/ keeps its original value.
const a = try testResolve(&im, "https://example.com/s/foo.mjs", "a");
try testing.expectString("https://example.com/a-first.mjs", a.?);
// "b" was added to /s/ from the second map.
const b = try testResolve(&im, "https://example.com/s/foo.mjs", "b");
try testing.expectString("https://example.com/b-second.mjs", b.?);
// New scope /t/ landed too.
const x = try testResolve(&im, "https://example.com/t/foo.mjs", "x");
try testing.expectString("https://example.com/x.mjs", x.?);
}
fn testParse(content: []const u8, base: [:0]const u8) !ImportMap {
return parse(testing.arena_allocator, base, content);
}
fn testResolve(im: *const ImportMap, base: [:0]const u8, specifier: [:0]const u8) !?[:0]const u8 {
return im.resolve(testing.arena_allocator, base, specifier);
}

View File

@@ -109,12 +109,6 @@ frame: Frame,
// to the original page like this.
popups: std.ArrayList(*Frame) = .empty,
// Popups that have called window.close() but whose teardown is deferred to
// Page.deinit. We can't deinit synchronously from window.close() because
// that's invoked from JS still running on top of the Frame's V8 context (or
// from a script eval whose parser still holds the Frame).
queued_close: std.ArrayList(*Frame) = .empty,
// Lifecycle state. A Page is `.pending` while we hold it as the in-flight
// destination of a root navigation — its V8 context exists but is not yet the
// session's active context. Flipped to `.active` by Session.commitPendingPage
@@ -142,8 +136,6 @@ pub fn init(self: *Page, session: *Session, frame_id: u32) !void {
// Tear down the Page and its root Frame. Equivalent to the old
// Session.removePage + Session.resetFrameResources.
pub fn deinit(self: *Page) void {
self.cleanupClosedPopups();
for (self.popups.items) |popup| {
popup.deinit();
}
@@ -197,13 +189,6 @@ pub fn deinit(self: *Page) void {
session.arena_pool.release(self.frame_arena);
}
pub fn cleanupClosedPopups(self: *Page) void {
for (self.queued_close.items) |popup| {
popup.deinit();
}
self.queued_close = .empty;
}
pub fn getArena(self: *Page, size_or_bucket: anytype, debug: []const u8) !Allocator {
return self.session.getArena(size_or_bucket, debug);
}

View File

@@ -97,10 +97,9 @@ fn _wait(self: *Runner, comptime is_cdp: bool, opts: WaitOpts) !WaitResult {
if (gc_hint_timer.read() >= gc_hint_period_ns) {
gc_hint_timer.reset();
self.frame._page.cleanupClosedPopups();
browser.env.memoryPressureNotification(.moderate);
}
session.processQueuedDestroyed();
session.processDestroyQueues();
const tick_result = self._tick(is_cdp, tick_opts) catch |err| {
switch (err) {

View File

@@ -25,6 +25,7 @@ const HttpClient = @import("HttpClient.zig");
const js = @import("js/js.zig");
const URL = @import("URL.zig");
const Frame = @import("Frame.zig");
const ImportMap = @import("ImportMap.zig");
const ScriptManagerBase = @import("ScriptManagerBase.zig");
const Element = @import("webapi/Element.zig");
@@ -305,36 +306,6 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
script.eval();
}
pub fn parseImportmap(self: *ScriptManager, script: *const Script) !void {
const content = script.source.content();
const Imports = struct {
imports: std.json.ArrayHashMap([]const u8),
};
const imports = try std.json.parseFromSliceLeaky(
Imports,
self.frame.arena,
content,
.{ .allocate = .alloc_always },
);
var iter = imports.imports.map.iterator();
while (iter.next()) |entry| {
// > Relative URLs are resolved to absolute URL addresses using the
// > base URL of the document containing the import map.
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules#importing_modules_using_import_maps
const resolved_url = try URL.resolve(
self.frame.arena,
self.frame.base(),
entry.value_ptr.*,
.{},
);
try self.base.importmap.put(self.frame.arena, entry.key_ptr.*, resolved_url);
}
}
pub fn staticScriptsDone(self: *ScriptManager) void {
self.base.staticScriptsDone();
}

View File

@@ -27,6 +27,7 @@ const js = @import("js/js.zig");
const URL = @import("URL.zig");
const Session = @import("Session.zig");
const Frame = @import("Frame.zig");
const ImportMap = @import("ImportMap.zig");
const WorkerGlobalScope = @import("webapi/WorkerGlobalScope.zig");
const Element = @import("webapi/Element.zig");
@@ -118,11 +119,8 @@ allocator: Allocator,
// See ScriptManager.zig for the type's documentation.
imported_modules: std.StringHashMapUnmanaged(ImportedModule),
// Mapping between module specifier and resolution.
// see https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script/type/importmap
// For workers this stays empty (only Frame authors importmaps via
// ScriptManager.parseImportmap).
importmap: std.StringHashMapUnmanaged([:0]const u8),
// For workers this stays empty
importmap: ImportMap,
// Called at the end of evaluate() after all Base-owned work has run. Frame
// wrapper uses this to drain defer_scripts and fire documentIsLoaded /
@@ -150,8 +148,6 @@ pub fn deinit(self: *ScriptManagerBase) void {
self.reset();
self.imported_modules.deinit(self.allocator);
// we don't deinit self.importmap b/c we use the owner's arena for its
// allocations.
}
pub fn reset(self: *ScriptManagerBase) void {
@@ -164,9 +160,8 @@ pub fn reset(self: *ScriptManagerBase) void {
}
self.imported_modules.clearRetainingCapacity();
// The importmap's keys/values were allocated from the owner's arena, which
// has been reset. Can't use clearAndRetainCapacity — that space is no
// longer ours.
// The importmap's contents were allocated from the owner's arena, which
// has been reset, so just zero the struct.
self.importmap = .empty;
clearList(&self.defer_scripts);
@@ -209,13 +204,12 @@ pub fn scriptList(self: *ScriptManagerBase, script: *const Script) *std.DoublyLi
// Resolve a module specifier to a valid URL.
pub fn resolveSpecifier(self: *ScriptManagerBase, arena: Allocator, base: [:0]const u8, specifier: [:0]const u8) ![:0]const u8 {
// If the specifier is mapped in the importmap, return the pre-resolved
// value. For workers this map is empty.
if (self.importmap.get(specifier)) |s| {
return s;
if (try self.importmap.resolve(arena, base, specifier)) |url| {
return url;
}
return URL.resolve(arena, base, specifier, .{ .always_dupe = true });
// The importmap _always_ resolves specifies if they're valid, falling back
// to the base + specifier itself. So we can only be here on something invalid.
return error.SpecifierResolutionFailed;
}
pub fn preloadImport(self: *ScriptManagerBase, url: [:0]const u8, referrer: []const u8) !void {
@@ -736,10 +730,10 @@ pub const Script = struct {
const local = &ls.local;
// Handle importmap special case here: the content is a JSON containing
// imports.
// Handle importmap special case here: the content is a JSON containing imports.
// Multiple <script type="importmap"> elements merge with first-wins semantics.
if (fe.kind == .importmap) {
frame._script_manager.parseImportmap(self) catch |err| {
self.manager.importmap.merge(frame.arena, frame.base(), self.source.content()) catch |err| {
log.err(.browser, "parse importmap script", .{
.err = err,
.src = url,

View File

@@ -73,7 +73,8 @@ _active: ?*Page = null,
// In-flight root navigation
_pending: ?*Page = null,
_queued_destroy: std.ArrayList(*Page) = .{},
_page_destruction_queue: std.ArrayList(*Page) = .{},
_frame_destruction_queue: std.ArrayList(*Frame) = .{},
// Loader IDs are scoped to the Session: each new BrowserContext gets a
// fresh counter. Frame IDs (`frame_id_gen`) live on `Browser` instead so
@@ -160,7 +161,7 @@ pub fn deinit(self: *Session) void {
if (self._active != null) {
self.removePage();
}
self.processQueuedDestroyed();
self.processDestroyQueues();
self.cookie_jar.deinit();
@@ -215,12 +216,27 @@ pub fn drainConsoleMessages(self: *Session) []const u8 {
return text;
}
pub fn processQueuedDestroyed(self: *Session) void {
for (self._queued_destroy.items) |page| {
page.deinit();
self.browser.page_pool.destroy(page);
pub fn processDestroyQueues(self: *Session) void {
{
const queue = self._frame_destruction_queue.items;
if (queue.len > 0) {
for (queue) |frame| {
frame.deinit();
}
self._frame_destruction_queue.clearRetainingCapacity();
}
}
{
const queue = self._page_destruction_queue.items;
if (queue.len > 0) {
for (queue) |page| {
page.deinit();
self.browser.page_pool.destroy(page);
}
self._page_destruction_queue.clearRetainingCapacity();
}
}
self._queued_destroy.clearRetainingCapacity();
}
// True iff there is an active Page. CDP / external callers should use this
@@ -239,8 +255,12 @@ fn allocatePage(self: *Session, frame_id: u32) !*Page {
}
// Tear down and free a Page allocated via allocatePage.
fn destroyPage(self: *Session, page: *Page) void {
self._queued_destroy.append(self.arena, page) catch @panic("OOM");
fn queuePageDestruction(self: *Session, page: *Page) void {
self._page_destruction_queue.append(self.arena, page) catch @panic("OOM");
}
pub fn queueFrameDestruction(self: *Session, frame: *Frame) void {
self._frame_destruction_queue.append(self.arena, frame) catch @panic("OOM");
}
// Tear down the currently-active Page. Dispatches `frame_remove` first
@@ -271,7 +291,7 @@ fn tearDownActivePage(self: *Session) void {
};
page.frame.abortTransfers();
self.destroyPage(page);
self.queuePageDestruction(page);
self._active = null;
self.navigation.onRemoveFrame();
}
@@ -287,7 +307,7 @@ fn tearDownActivePage(self: *Session) void {
// for any prior teardown of an old page).
fn installNewActivePage(self: *Session, frame_id: u32) !*Frame {
const page = try self.allocatePage(frame_id);
errdefer self.destroyPage(page);
errdefer self.queuePageDestruction(page);
self._active = page;
errdefer self._active = null;
@@ -305,7 +325,7 @@ pub fn createPage(self: *Session) !*Frame {
lp.assert(self._active == null, "Session.createPage - page not null", .{});
// Drain any pending Page deinits now, while we're at a known-safe point
self.processQueuedDestroyed();
self.processDestroyQueues();
if (comptime IS_DEBUG) {
log.debug(.browser, "create page", .{});
@@ -615,7 +635,7 @@ pub fn initiateRootNavigation(self: *Session, frame_id: u32, url: [:0]const u8,
}
const page = try self.allocatePage(frame_id);
errdefer self.destroyPage(page);
errdefer self.queuePageDestruction(page);
page._state = .pending;
self._pending = page;
@@ -697,7 +717,7 @@ pub fn commitPendingPage(self: *Session) !void {
// done_callback after this point would re-enter against the new
// _active and trip the half-torn-down session.
old_active.frame.abortTransfers();
self.destroyPage(old_active);
self.queuePageDestruction(old_active);
}
// Discard a pending Page without committing. Used for failure paths
@@ -715,7 +735,7 @@ pub fn discardPendingPage(self: *Session) void {
page.frame.abortTransfers();
self._pending = null;
self.destroyPage(page);
self.queuePageDestruction(page);
}
// Frame IDs come from `Browser` (per-CDP-connection scope), not

View File

@@ -35,6 +35,20 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, o
const needs_dupe = comptime !isNullTerminated(PT);
var path: [:0]const u8 = if (needs_dupe or opts.always_dupe) try allocator.dupeZ(u8, source_path) else source_path;
if (std.mem.indexOfAny(u8, path, "\t\r\n")) |first| {
path = blk: {
var buf: std.ArrayList(u8) = try .initCapacity(allocator, path.len);
buf.appendSliceAssumeCapacity(path[0..first]);
for (path[first + 1 ..]) |c| {
if (c != '\t' and c != '\r' and c != '\n') {
buf.appendAssumeCapacity(c);
}
}
buf.appendAssumeCapacity(0);
break :blk buf.items[0 .. buf.items.len - 1 :0];
};
}
if (base.len == 0) {
return processResolved(allocator, path, opts);
}
@@ -124,21 +138,23 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, o
const path_start = std.mem.indexOfAnyPos(u8, base, authority_start, "/?#") orelse base.len;
const path_end = std.mem.indexOfAnyPos(u8, base, path_start, "?#") orelse base.len;
var out: []u8 = undefined;
if (path[0] == '/') {
const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path });
return processResolved(allocator, result, opts);
}
var normalized_base: []const u8 = base[0..path_start];
if (path_start < path_end) {
if (std.mem.lastIndexOfScalar(u8, base[path_start + 1 .. path_end], '/')) |pos| {
normalized_base = base[0 .. path_start + 1 + pos];
// Absolute path — keep base authority, replace path. Two trailing
// spaces give us safe lookahead for the dot-segment loop below.
out = try std.mem.join(allocator, "", &.{ base[0..path_start], path, " " });
} else {
var normalized_base: []const u8 = base[0..path_start];
if (path_start < path_end) {
if (std.mem.lastIndexOfScalar(u8, base[path_start + 1 .. path_end], '/')) |pos| {
normalized_base = base[0 .. path_start + 1 + pos];
}
}
}
// trailing space so that we always have space to append the null terminator
// and so that we can compare the next two characters without needing to length check
var out = try std.mem.join(allocator, "", &.{ normalized_base, "/", path, " " });
// trailing space so that we always have space to append the null terminator
// and so that we can compare the next two characters without needing to length check
out = try std.mem.join(allocator, "", &.{ normalized_base, "/", path, " " });
}
const end = out.len - 2;
@@ -156,8 +172,10 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, source_path: anytype, o
in_i += 2;
continue;
}
if (out[in_i + 1] == '.' and out[in_i + 2] == '/') { // always safe, because we added two whitespaces
// /../
if (out[in_i + 1] == '.' and (out[in_i + 2] == '/' or in_i + 2 == end)) {
// /../ or trailing /.. — both step up one segment. The
// trailing slash stays implicit (out_i ends up right after
// the previous '/'), matching `new URL("..", base)`.
if (out_i > path_marker) {
// go back before the /
out_i -= 2;
@@ -197,11 +215,13 @@ fn processResolved(allocator: Allocator, url: [:0]const u8, opts: ResolveOpts) !
return ensureEncoded(allocator, url, encoding);
}
/// IDNA-only pass: converts a non-ASCII host (`räksmörgås.se`) to its
/// punycode form (`xn--rksmrgs-5wao1o.se`) and leaves everything else alone.
/// IDNA pass: converts a non-ASCII host (`räksmörgås.se`) to its punycode form
/// (`xn--rksmrgs-5wao1o.se`), validates any ASCII punycode (`xn--…`) labels,
/// and leaves everything else alone. Returns `error.Idna` for an invalid
/// domain (e.g. malformed punycode), which surfaces as a URL parse failure.
fn ensureHostAscii(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
const hostname = getHostname(url);
if (hostname.len == 0 or !idna.needsAscii(hostname)) {
if (hostname.len == 0 or (!idna.needsAscii(hostname) and !hasAceLabel(hostname))) {
return url;
}
@@ -218,6 +238,30 @@ fn ensureHostAscii(allocator: Allocator, url: [:0]const u8) ![:0]const u8 {
return buf.items[0 .. buf.items.len - 1 :0];
}
/// True if any dot-separated label of `host` begins with the IDNA ACE prefix
/// "xn--" (case-insensitive). Such labels are punycode: even though they're
/// pure ASCII, UTS#46 must decode and validate them, so they can't take the
/// `needsAscii` fast path.
fn hasAceLabel(host: []const u8) bool {
var pos: usize = 0;
while (std.mem.indexOfScalarPos(u8, host, pos, '-')) |i| {
pos = i + 1;
if (i < 2 or i + 1 >= host.len or host[i + 1] != '-') {
continue;
}
if (!std.ascii.eqlIgnoreCase(host[i - 2 .. i], "xn")) {
continue;
}
const label_start = i - 2;
if (label_start == 0 or host[label_start - 1] == '.') {
return true;
}
}
return false;
}
pub fn ensureEncoded(allocator: Allocator, url_in: [:0]const u8, encoding: []const u8) ![:0]const u8 {
// Resolve any IDN host first; everything below operates on the ASCII form.
const url = try ensureHostAscii(allocator, url_in);
@@ -1157,6 +1201,36 @@ test "URL: resolve" {
.path = "../../../../example/about",
.expected = "https://www.example.com/example/about",
},
.{
.base = "https://example.com/a/b/c/",
.path = "..",
.expected = "https://example.com/a/b/",
},
.{
.base = "https://example.com/a/b/c",
.path = "..",
.expected = "https://example.com/a/",
},
.{
.base = "https://example.com/js/app.mjs",
.path = "/test/..",
.expected = "https://example.com/",
},
.{
.base = "https://example.com/js/app.mjs",
.path = "/a/b/../c",
.expected = "https://example.com/a/c",
},
.{
.base = "https://example.com/js/app.mjs",
.path = "/../../foo/bar",
.expected = "https://example.com/foo/bar",
},
.{
.base = "https://example.com/js/app.mjs",
.path = "/../foo/../bar",
.expected = "https://example.com/bar",
},
};
for (cases) |case| {
@@ -1165,6 +1239,70 @@ test "URL: resolve" {
}
}
test "URL: resolve strips tab and newline from input" {
defer testing.reset();
const Case = struct {
base: [:0]const u8,
path: [:0]const u8,
expected: [:0]const u8,
};
const cases = [_]Case{
// Control char inside the host of an absolute URL.
.{ .base = "https://x/", .path = "https://exa\tmple.com/p", .expected = "https://example.com/p" },
.{ .base = "https://x/", .path = "https://example.com/\n\rp", .expected = "https://example.com/p" },
// Leading control char (first == 0).
.{ .base = "https://example/", .path = "\tfoo.js", .expected = "https://example/foo.js" },
// Consecutive control chars.
.{ .base = "https://example/", .path = "a\t\r\nb.js", .expected = "https://example/ab.js" },
// Control chars spread through the path.
.{ .base = "https://example/", .path = "a\tb\nc\rd.js", .expected = "https://example/abcd.js" },
// Trailing control char.
.{ .base = "https://example/", .path = "foo.js\n", .expected = "https://example/foo.js" },
// All-strippable relative path collapses to the base.
.{ .base = "https://example/dir/", .path = "\t\r\n", .expected = "https://example/dir/" },
// No control chars: unchanged (the fast path).
.{ .base = "https://example/", .path = "clean.js", .expected = "https://example/clean.js" },
};
for (cases) |case| {
const result = try resolve(testing.arena_allocator, case.base, case.path, .{});
try testing.expectString(case.expected, result);
}
}
test "URL: resolve validates ASCII punycode (xn--) labels" {
defer testing.reset();
// Valid punycode is left untouched (the needsAscii fast path would skip it,
// so this exercises the xn-- gate going through toAscii and back).
const ok = try resolve(testing.arena_allocator, "", "https://xn--rksmrgs-5wao1o.se/x", .{});
try testing.expectString("https://xn--rksmrgs-5wao1o.se/x", ok);
// Malformed punycode must be rejected rather than passed through verbatim.
// (URL.init remaps this error.Idna to TypeError for `new URL`.)
try testing.expectError(error.Idna, resolve(testing.arena_allocator, "", "https://xn--0.pt/x", .{}));
try testing.expectError(error.Idna, resolve(testing.arena_allocator, "", "https://xn--a.pt/x", .{}));
}
test "URL: hasAceLabel" {
// ACE prefix at a label start (case-insensitive).
try testing.expectEqual(true, hasAceLabel("xn--a"));
try testing.expectEqual(true, hasAceLabel("xn--rksmrgs-5wao1o.se"));
try testing.expectEqual(true, hasAceLabel("a.xn--b.com"));
try testing.expectEqual(true, hasAceLabel("XN--ab.com"));
try testing.expectEqual(true, hasAceLabel("foo.example.xn--p1ai"));
// Has '-', but no ACE label.
try testing.expectEqual(false, hasAceLabel("example.com"));
try testing.expectEqual(false, hasAceLabel("my-site.com"));
try testing.expectEqual(false, hasAceLabel("axn--b.com")); // xn-- not at a label start
try testing.expectEqual(false, hasAceLabel("x-n--a.com")); // not "xn" before the '-'
try testing.expectEqual(false, hasAceLabel("-.com"));
try testing.expectEqual(false, hasAceLabel(""));
}
test "URL: ensureEncoded" {
defer testing.reset();

View File

@@ -372,6 +372,7 @@ fn handleError(comptime T: type, comptime F: type, local: *const Local, err: any
error.TryCatchRethrow => return,
error.InvalidArgument => isolate.createTypeError("invalid argument"),
error.TypeError => isolate.createTypeError(""),
error.Idna => isolate.createTypeError("invalid domain"),
error.RangeError => isolate.createRangeError(""),
error.OutOfMemory => isolate.createError("out of memory"),
error.IllegalConstructor => isolate.createError("Illegal Constructor"),

View File

@@ -502,11 +502,16 @@ fn postCompileModule(self: *Context, mod: js.Module, url: [:0]const u8, local: *
const script_manager = self.script_manager;
for (0..request_len) |i| {
const specifier = requests.get(i).specifier(local);
const normalized_specifier = try script_manager.resolveSpecifier(
const normalized_specifier = script_manager.resolveSpecifier(
self.call_arena,
url,
try specifier.toSliceZ(),
);
) catch |err| switch (err) {
error.SpecifierResolutionFailed => {
_ = self.isolate.throwException(self.isolate.createTypeError("Failed to resolve module specifier"));
return err;
},
};
const nested_gop = try self.module_cache.getOrPut(self.arena, normalized_specifier);
if (!nested_gop.found_existing) {
const owned_specifier = try self.arena.dupeZ(u8, normalized_specifier);
@@ -560,6 +565,9 @@ fn resolveModuleCallback(
const referrer = js.Module{ .local = &local, .handle = c_referrer.? };
return self._resolveModuleCallback(referrer, specifier, &local) catch |err| {
if (err == error.SpecifierResolutionFailed) {
_ = self.isolate.throwException(self.isolate.createTypeError("Failed to resolve module specifier"));
}
log.err(.js, "resolve module", .{
.err = err,
.specifier = specifier,
@@ -609,9 +617,10 @@ pub fn dynamicModuleCallback(
self.arena, // might need to survive until the module is loaded
resource,
specifier,
) catch |err| {
log.err(.app, "OOM", .{ .err = err, .src = "dynamicModuleCallback3" });
return @constCast(local.rejectPromise(.{ .generic_error = "Out of memory" }).handle);
) catch |err| switch (err) {
error.SpecifierResolutionFailed => {
return @constCast(local.rejectPromise(.{ .type_error = "Failed to resolve module specifier" }).handle);
},
};
const promise = self._dynamicModuleCallback(normalized_specifier, resource, &local) catch |err| blk: {

View File

@@ -767,14 +767,8 @@ fn jsValueToStruct(self: *const Local, comptime T: type, js_val: js.Value) !?T {
if (!js_str.containsOnlyOneByte()) return error.InvalidCharacterError;
return .{ .bytes = try js_str.toOneByteSlice(self.call_arena) };
},
string.String => {
const js_str = js_val.isString() orelse return null;
return try js_str.toSSO(false);
},
string.Global => {
const js_str = js_val.isString() orelse return null;
return try js_str.toSSO(true);
},
string.String => try js_val.toSSO(false),
string.Global => try js_val.toSSO(true),
else => {
if (!js_val.isObject()) {
return null;

View File

@@ -129,8 +129,10 @@
}
};
// Empty name.
await expectReject('empty name', '', 'v');
// Empty name only rejects when the value is empty too or carries an '='.
// A nameless cookie with an ordinary value is valid (cf. `Set-Cookie: =v`).
await expectReject('empty name and empty value', '', '');
await expectReject('empty name with = in value', '', 'a=b');
// Forbidden chars in name.
await expectReject('name with =', 'a=b', 'v');
await expectReject('name with ;', 'a;b', 'v');
@@ -142,6 +144,13 @@
await expectReject('path with ;', { name: 'k', value: 'v', path: '/;evil' });
await expectReject('domain with newline', { name: 'k', value: 'v', domain: 'bad\n.example' });
// A nameless cookie with an ordinary value is accepted, and delete('')
// targets it without rejecting on the empty name.
await cookieStore.set('', 'nameless');
testing.expectEqual('nameless', (await cookieStore.get('')).value);
await cookieStore.delete('');
testing.expectEqual(null, await cookieStore.get(''));
// A clean call still works after the rejections.
await cookieStore.set('after-validation', 'ok');
const item = await cookieStore.get('after-validation');
@@ -150,7 +159,7 @@
});
</script>
<script id=change-event-delete-reports-removed-value>
<script id=change-event-delete-omits-value>
testing.async(async () => {
const events = [];
const handler = (e) => events.push(e);
@@ -165,12 +174,11 @@
await cookieStore.delete('ev-delete-val');
await new Promise(r => setTimeout(r, 0));
// The deleted CookieListItem should describe the cookie that was
// removed, so its value must be what the jar held ("original") —
// not the deletion payload's value ("").
// Per spec, deletion change events identify the removed cookie by name
// but carry no value (it is reported as undefined).
testing.expectEqual(1, events[1].deleted.length);
testing.expectEqual('ev-delete-val', events[1].deleted[0].name);
testing.expectEqual('original', events[1].deleted[0].value);
testing.expectEqual(undefined, events[1].deleted[0].value);
cookieStore.removeEventListener('change', handler);
});

View File

@@ -36,6 +36,11 @@
testing.expectEqual('value', el.dataset.newAttr);
testing.expectEqual('value', el.getAttribute('data-new-attr'));
el.dataset.newAttr = true;
testing.expectEqual('true', el.dataset.newAttr);
testing.expectEqual('true', el.getAttribute('data-new-attr'));
}
</script>

View File

@@ -0,0 +1,27 @@
<!DOCTYPE html>
<script src="../../testing.js"></script>
<base href="https://www.example.com">
<a id=a href=spice></a>
<script id=base>
{
const b0 = $('base');
// initial condition
const a = $('#a');
testing.expectEqual('https://www.example.com/spice', a.href);
// add base AFTER the existing one
const b1 = document.createElement('base');
b0.after(b1);
b1.href = 'https://www.example.com/1/';
testing.expectEqual('https://www.example.com/spice', a.href);
// add base BEFORE existing one
const b2 = document.createElement('base');
b0.before(b2);
b2.href = 'https://www.example.com/2/';
testing.expectEqual('https://www.example.com/2/spice', a.href);
}
</script>

View File

@@ -139,16 +139,9 @@
</script>
<script id=getBattery type=module>
{
const state = await testing.async();
try {
await navigator.getBattery();
state.resolve()
await state.done(() => { testing.fail('getBattery should reject'); });
} catch (err) {
state.resolve()
await state.done(() => { testing.expectEqual('NotSupportedError', err.name); });
}
}
// Like Firefox, we don't expose navigator.getBattery. Sites already
// have to handle it being undefined, so this is a real-browser shape
// rather than a stub that always rejects.
testing.expectEqual('undefined', typeof navigator.getBattery);
</script>

View File

@@ -97,6 +97,28 @@
testing.expectEqual('http://example.com/a/b/foo', url.toString());
}
{
// IDN hosts are converted to punycode (UTS#46).
const url = new URL('https://räksmörgås.se/x');
testing.expectEqual('xn--rksmrgs-5wao1o.se', url.hostname);
testing.expectEqual('https://xn--rksmrgs-5wao1o.se/x', url.href);
}
{
// Valid punycode passes through unchanged.
const url = new URL('https://xn--rksmrgs-5wao1o.se/x');
testing.expectEqual('xn--rksmrgs-5wao1o.se', url.hostname);
}
{
// An invalid domain (malformed punycode) is a parse failure -> TypeError.
testing.withError((err) => {
testing.expectEqual(true, err.toString().includes('TypeError'));
}, () => {
const url = new URL('https://xn--0.pt/x');
});
}
{
const base = 'http://example.com/a/b/c/d';
const url = new URL('../../../../../foo', base);

View File

@@ -139,11 +139,6 @@ pub fn getModelContext(_: *const Navigator, frame: *Frame) *ModelContext {
return &frame.window._model_context;
}
pub fn getBattery(_: *const Navigator, frame: *Frame) !js.Promise {
log.info(.not_implemented, "navigator.getBattery", .{});
return frame.js.local.?.rejectErrorPromise(.{ .dom_exception = .{ .err = error.NotSupported } });
}
pub fn registerProtocolHandler(_: *const Navigator, scheme: []const u8, url: [:0]const u8, frame: *const Frame) !void {
try validateProtocolHandlerScheme(scheme);
try validateProtocolHandlerURL(url, frame);
@@ -248,7 +243,6 @@ pub const JsApi = struct {
// Methods
pub const javaEnabled = bridge.function(Navigator.javaEnabled, .{});
pub const getBattery = bridge.function(Navigator.getBattery, .{});
pub const permissions = bridge.accessor(Navigator.getPermissions, null, .{});
pub const storage = bridge.accessor(Navigator.getStorage, null, .{});
pub const userAgentData = bridge.accessor(Navigator.getUserAgentData, null, .{});

View File

@@ -21,6 +21,7 @@ const lp = @import("lightpanda");
const builtin = @import("builtin");
const js = @import("../js/js.zig");
const URL = @import("../URL.zig");
const Frame = @import("../Frame.zig");
const Console = @import("Console.zig");
const History = @import("History.zig");
@@ -506,6 +507,14 @@ pub fn open(self: *Window, url_: ?[]const u8, target_: ?[]const u8, features_: ?
const no_opener = hasFeatureToken(features, "noopener") or hasFeatureToken(features, "noreferrer");
if (raw_url.len > 0) {
// Per spec, we should validate the url
_ = URL.resolve(frame.call_arena, frame.base(), raw_url, .{}) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
else => return error.SyntaxError,
};
}
// _self / _parent / _top navigate the current browsing context.
if (std.ascii.eqlIgnoreCase(target, "_self") or
std.ascii.eqlIgnoreCase(target, "_parent") or
@@ -610,9 +619,7 @@ pub fn close(self: *Window) void {
// eval whose parser is still holding the Frame. Destroying the context
// now leaves dangling pointers in the unwinding script eval (load event
// dispatch, runMacrotasks, etc.). Defer to Page.deinit instead.
page.queued_close.append(page.frame_arena, frame) catch |err| {
log.err(.frame, "queue popup close", .{ .err = err });
};
page.session.queueFrameDestruction(frame);
}
pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]const u8, transfer: ?[]const *MessagePort, frame: *Frame) !void {
@@ -1001,7 +1008,7 @@ pub const JsApi = struct {
pub const opener = bridge.accessor(Window.getOpener, null, .{});
pub const closed = bridge.accessor(Window.getClosed, null, .{});
pub const name = bridge.accessor(Window.getName, Window.setName, .{});
pub const open = bridge.function(Window.open, .{});
pub const open = bridge.function(Window.open, .{ .dom_exception = true });
pub const close = bridge.function(Window.close, .{});
pub const alert = bridge.function(struct {

View File

@@ -165,6 +165,28 @@ pub fn setProtocol(self: *Anchor, value: []const u8, frame: *Frame) !void {
try setHref(self, new_href, frame);
}
pub fn getUsername(self: *Anchor, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getUsername(href);
}
pub fn setUsername(self: *Anchor, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setUsername(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getPassword(self: *Anchor, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getPassword(href);
}
pub fn setPassword(self: *Anchor, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setPassword(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getType(self: *Anchor) []const u8 {
return self.asElement().getAttributeSafe(comptime .wrap("type")) orelse "";
}
@@ -221,6 +243,8 @@ pub const JsApi = struct {
pub const protocol = bridge.accessor(Anchor.getProtocol, Anchor.setProtocol, .{ .ce_reactions = true });
pub const host = bridge.accessor(Anchor.getHost, Anchor.setHost, .{ .ce_reactions = true });
pub const hostname = bridge.accessor(Anchor.getHostname, Anchor.setHostname, .{ .ce_reactions = true });
pub const username = bridge.accessor(Anchor.getUsername, Anchor.setUsername, .{ .ce_reactions = true });
pub const password = bridge.accessor(Anchor.getPassword, Anchor.setPassword, .{ .ce_reactions = true });
pub const port = bridge.accessor(Anchor.getPort, Anchor.setPort, .{ .ce_reactions = true });
pub const pathname = bridge.accessor(Anchor.getPathname, Anchor.setPathname, .{ .ce_reactions = true });
pub const search = bridge.accessor(Anchor.getSearch, Anchor.setSearch, .{ .ce_reactions = true });

View File

@@ -1,4 +1,23 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const js = @import("../../../js/js.zig");
const Frame = @import("../../../Frame.zig");
const URL = @import("../../../URL.zig");
const Node = @import("../../Node.zig");
const Element = @import("../../Element.zig");
const HtmlElement = @import("../Html.zig");
@@ -14,6 +33,155 @@ pub fn asNode(self: *Area) *Node {
return self.asElement().asNode();
}
pub fn getHref(self: *Area, frame: *Frame) ![]const u8 {
const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return "";
if (href.len == 0) {
return "";
}
return self.asNode().resolveURL(href, frame, .{});
}
pub fn setHref(self: *Area, value: []const u8, frame: *Frame) !void {
try self.asElement().setAttributeSafe(comptime .wrap("href"), .wrap(value), frame);
}
pub fn getOrigin(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return (try URL.getOrigin(frame.call_arena, href)) orelse "null";
}
pub fn getHost(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
const host = URL.getHost(href);
const protocol = URL.getProtocol(href);
const port = URL.getPort(href);
// Strip default ports
if (port.len > 0) {
if ((std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port, "443")) or
(std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port, "80")))
{
return URL.getHostname(href);
}
}
return host;
}
pub fn setHost(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setHost(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getHostname(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getHostname(href);
}
pub fn setHostname(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setHostname(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getUsername(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getUsername(href);
}
pub fn setUsername(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setUsername(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getPassword(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getPassword(href);
}
pub fn setPassword(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setPassword(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getPort(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
const port = URL.getPort(href);
const protocol = URL.getProtocol(href);
// Return empty string for default ports
if (port.len > 0) {
if ((std.mem.eql(u8, protocol, "https:") and std.mem.eql(u8, port, "443")) or
(std.mem.eql(u8, protocol, "http:") and std.mem.eql(u8, port, "80")))
{
return "";
}
}
return port;
}
pub fn setPort(self: *Area, value: ?[]const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setPort(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getSearch(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getSearch(href);
}
pub fn setSearch(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setSearch(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getHash(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getHash(href);
}
pub fn setHash(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setHash(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getPathname(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getPathname(href);
}
pub fn setPathname(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setPathname(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
pub fn getProtocol(self: *Area, frame: *Frame) ![]const u8 {
const href = try getResolvedHref(self, frame) orelse return "";
return URL.getProtocol(href);
}
pub fn setProtocol(self: *Area, value: []const u8, frame: *Frame) !void {
const href = try getResolvedHref(self, frame) orelse return;
const new_href = try URL.setProtocol(href, value, frame.call_arena);
try setHref(self, new_href, frame);
}
fn getResolvedHref(self: *Area, frame: *Frame) !?[:0]const u8 {
const href = self.asElement().getAttributeSafe(comptime .wrap("href")) orelse return null;
if (href.len == 0) {
return null;
}
return try self.asNode().resolveURL(href, frame, .{});
}
pub const JsApi = struct {
pub const bridge = js.Bridge(Area);
@@ -22,4 +190,17 @@ pub const JsApi = struct {
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const href = bridge.accessor(Area.getHref, Area.setHref, .{ .ce_reactions = true });
pub const origin = bridge.accessor(Area.getOrigin, null, .{});
pub const protocol = bridge.accessor(Area.getProtocol, Area.setProtocol, .{ .ce_reactions = true });
pub const host = bridge.accessor(Area.getHost, Area.setHost, .{ .ce_reactions = true });
pub const hostname = bridge.accessor(Area.getHostname, Area.setHostname, .{ .ce_reactions = true });
pub const username = bridge.accessor(Area.getUsername, Area.setUsername, .{ .ce_reactions = true });
pub const password = bridge.accessor(Area.getPassword, Area.setPassword, .{ .ce_reactions = true });
pub const port = bridge.accessor(Area.getPort, Area.setPort, .{ .ce_reactions = true });
pub const pathname = bridge.accessor(Area.getPathname, Area.setPathname, .{ .ce_reactions = true });
pub const search = bridge.accessor(Area.getSearch, Area.setSearch, .{ .ce_reactions = true });
pub const hash = bridge.accessor(Area.getHash, Area.setHash, .{ .ce_reactions = true });
pub const toString = bridge.function(Area.getHref, .{});
};

View File

@@ -1,4 +1,7 @@
const js = @import("../../../js/js.zig");
const URL = @import("../../../URL.zig");
const Frame = @import("../../../Frame.zig");
const Node = @import("../../Node.zig");
const Element = @import("../../Element.zig");
const HtmlElement = @import("../Html.zig");
@@ -14,6 +17,44 @@ pub fn asNode(self: *Base) *Node {
return self.asElement().asNode();
}
pub fn getHref(self: *Base, frame: *Frame) ![]const u8 {
const element = self.asElement();
const href = element.getAttributeSafe(comptime .wrap("href")) orelse return "";
if (href.len == 0) {
return "";
}
return URL.resolve(frame.call_arena, frame.url, href, .{});
}
pub fn setHref(self: *Base, value: []const u8, frame: *Frame) !void {
const element = self.asElement();
try element.setAttributeSafe(comptime .wrap("href"), .wrap(value), frame);
// Per HTML spec, the document's base URL is the href of the FIRST <base>
// element in tree order that has an href attribute — not necessarily this
// one. Re-derive from scratch so that setting href on a non-authoritative
// <base>, or clearing href on the authoritative one, both work correctly.
const node = element.asNode();
if (!node.isConnected()) {
return;
}
const owner = node.ownerFrame(frame);
const first = (try owner.document.querySelector(comptime .wrap("base[href]"), owner)) orelse {
owner.base_url = null;
return;
};
const href = first.getAttributeSafe(comptime .wrap("href")) orelse {
owner.base_url = null;
return;
};
if (href.len == 0) {
owner.base_url = null;
return;
}
owner.base_url = try URL.resolve(owner.arena, owner.url, href, .{});
}
pub const JsApi = struct {
pub const bridge = js.Bridge(Base);
@@ -22,4 +63,11 @@ pub const JsApi = struct {
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const href = bridge.accessor(Base.getHref, Base.setHref, .{ .ce_reactions = true });
};
const testing = @import("../../../../testing.zig");
test "WebApi: HTML.Base" {
try testing.htmlRunner("element/html/base.html", .{});
}

View File

@@ -80,7 +80,9 @@ pub fn initSingle(
const item = try arena.create(CookieStore.CookieListItem);
item.* = .{
.name = try String.init(arena, snapshot.name, .{}),
.value = try String.init(arena, snapshot.value, .{}),
// Deletions report no value (the `deleted` accessor serializes the
// resulting null as undefined); changes carry the new value.
.value = if (kind == .deleted) null else try String.init(arena, snapshot.value, .{}),
.domain = if (snapshot.domain.len > 0 and snapshot.domain[0] == '.')
try String.init(arena, snapshot.domain[1..], .{})
else
@@ -89,9 +91,9 @@ pub fn initSingle(
.expires = null,
.secure = snapshot.secure,
.sameSite = switch (snapshot.same_site) {
.strict => .strict,
.lax => .lax,
.none => .none,
.strict => "strict",
.lax => "lax",
.none => "none",
},
.partitioned = false,
};
@@ -133,5 +135,6 @@ pub const JsApi = struct {
pub const constructor = bridge.constructor(CookieChangeEvent.init, .{});
pub const changed = bridge.accessor(CookieChangeEvent.getChanged, null, .{});
pub const deleted = bridge.accessor(CookieChangeEvent.getDeleted, null, .{});
// null_as_undefined trickles down to the serialization of the CookieListItem fields
pub const deleted = bridge.accessor(CookieChangeEvent.getDeleted, null, .{ .null_as_undefined = true });
};

View File

@@ -33,10 +33,6 @@ const Allocator = std.mem.Allocator;
const Execution = js.Execution;
const String = lp.String;
pub fn registerTypes() []const type {
return &.{ CookieStore, CookieListItem };
}
// https://developer.mozilla.org/en-US/docs/Web/API/CookieStore
const CookieStore = @This();
@@ -259,7 +255,7 @@ pub fn get(_: *CookieStore, input: GetInput, exec: *const Execution) !js.Promise
};
if (items.len == 0) {
return local.resolvePromise(@as(?*CookieListItem, null));
return local.resolvePromise(@as(?CookieListItem, null));
}
return local.resolvePromise(items[0]);
}
@@ -289,7 +285,7 @@ pub fn set(_: *CookieStore, input: SetInput, value: ?[]const u8, exec: *const Ex
},
};
storeCookie(exec, init) catch |err| {
storeCookie(exec, init, false) catch |err| {
return local.rejectPromise(.{ .type_error = @errorName(err) });
};
@@ -315,7 +311,7 @@ pub fn delete(_: *CookieStore, input: DeleteInput, exec: *const Execution) !js.P
.path = opts.path,
.sameSite = .strict,
.partitioned = opts.partitioned,
}) catch |err| {
}, true) catch |err| {
return local.rejectPromise(.{ .type_error = @errorName(err) });
};
@@ -346,7 +342,7 @@ fn matchCookies(
name: ?[]const u8,
url: ?[]const u8,
first_only: bool,
) ![]*CookieListItem {
) ![]CookieListItem {
const session = exec.session;
const url_resolved = try resolveQueryUrl(exec, url);
@@ -359,7 +355,7 @@ fn matchCookies(
session.cookie_jar.removeExpired(null);
var items: std.ArrayList(*CookieListItem) = .empty;
var items: std.ArrayList(CookieListItem) = .empty;
for (session.cookie_jar.cookies.items) |*cookie| {
// CookieStore exposes only cookies that script would see for the
// current document. HttpOnly cookies stay hidden.
@@ -368,8 +364,7 @@ fn matchCookies(
if (!std.mem.eql(u8, cookie.name, n)) continue;
}
const item = try exec.arena.create(CookieListItem);
item.* = .{
try items.append(exec.call_arena, .{
.name = String.wrap(cookie.name),
.value = String.wrap(cookie.value),
.domain = if (cookie.domain.len > 0 and cookie.domain[0] == '.')
@@ -380,32 +375,73 @@ fn matchCookies(
.expires = if (cookie.expires) |e| e * 1000.0 else null,
.secure = cookie.secure,
.sameSite = switch (cookie.same_site) {
.strict => .strict,
.lax => .lax,
.none => .none,
.strict => "strict",
.lax => "lax",
.none => "none",
},
.partitioned = false,
};
try items.append(exec.call_arena, item);
});
if (first_only) break;
}
return items.items;
}
fn storeCookie(exec: *const Execution, init: CookieInit) !void {
fn storeCookie(exec: *const Execution, init_: CookieInit, is_delete: bool) !void {
const session = exec.session;
const url = exec.url.*;
var init = init_;
init.name = std.mem.trim(u8, init.name, " \t");
init.value = std.mem.trim(u8, init.value, " \t");
// delete() may legitimately target a nameless cookie — its value is always empty.
if (!is_delete and init.name.len == 0) {
if (init.value.len == 0) {
return error.InvalidCookieName;
}
if (std.mem.indexOfScalar(u8, init.value, '=') != null) {
return error.InvalidCookieName;
}
}
// Reject inputs the cookie model can't represent. `=` is allowed in
// values but not in names; `;`/CR/LF/NUL break the cookie wire format
// everywhere and so are forbidden in every field.
if (init.name.len == 0) return error.InvalidCookieName;
if (std.mem.indexOfAny(u8, init.name, "=;\r\n\x00") != null) return error.InvalidCookieName;
if (std.mem.indexOfAny(u8, init.value, ";\r\n\x00") != null) return error.InvalidCookieValue;
if (std.mem.indexOfAny(u8, init.path, ";\r\n\x00") != null) return error.InvalidCookiePath;
// values but not in names; `;` and the control characters (U+0000U+001F,
// U+007F) break the cookie wire format and so are forbidden in both.
if (std.mem.indexOfScalar(u8, init.name, '=') != null) {
return error.InvalidCookieName;
}
if (hasForbiddenChar(init.name)) {
return error.InvalidCookieName;
}
if (hasForbiddenChar(init.value)) {
return error.InvalidCookieValue;
}
// A path attribute, when given, must be absolute. The Cookie path/domain
// attribute values are also capped at 1024 bytes per spec.
// https://cookiestore.spec.whatwg.org/#cookie-maximum-attribute-value-size
if (init.path.len > 0 and init.path[0] != '/') {
return error.InvalidCookiePath;
}
if (init.path.len > 1024) {
return error.InvalidCookiePath;
}
if (std.mem.indexOfAny(u8, init.path, ";\r\n\x00") != null) {
return error.InvalidCookiePath;
}
if (init.domain) |d| {
if (std.mem.indexOfAny(u8, d, ";\r\n\x00") != null) return error.InvalidCookieDomain;
// CookieStore (unlike the HTTP cookie syntax) rejects a leading dot.
if (d.len > 0 and d[0] == '.') {
return error.InvalidCookieDomain;
}
if (d.len > 1024) {
return error.InvalidCookieDomain;
}
if (std.mem.indexOfAny(u8, d, ";\r\n\x00") != null) {
return error.InvalidCookieDomain;
}
}
const is_https = URL.isSecure(url);
@@ -413,18 +449,32 @@ fn storeCookie(exec: *const Execution, init: CookieInit) !void {
// marks any cookie written from an HTTPS document as Secure.
const secure = is_https or init.sameSite == .none;
// The `__Http-` and `__Host-Http-` prefixes are reserved for HTTP-state
// cookies; the (script) CookieStore API can never set them, on any origin.
if (std.ascii.startsWithIgnoreCase(init.name, "__Http-") or std.ascii.startsWithIgnoreCase(init.name, "__Host-Http-")) {
return error.InvalidPrefixedCookie;
}
// Cookie-name-prefix rules — match Cookie.parse, case-insensitive to
// catch impersonation attempts (e.g. "__HoSt-").
// https://datatracker.ietf.org/doc/html/draft-ietf-httpbis-rfc6265bis#name-cookie-name-prefixes
if (std.ascii.startsWithIgnoreCase(init.name, "__Host-")) {
if (!is_https) return error.InvalidPrefixedCookie;
if (!is_https) {
return error.InvalidPrefixedCookie;
}
if (init.domain) |d| {
if (d.len > 0) return error.InvalidPrefixedCookie;
if (d.len > 0) {
return error.InvalidPrefixedCookie;
}
}
const effective_path = if (init.path.len > 0) init.path else "/";
if (!std.mem.eql(u8, effective_path, "/")) return error.InvalidPrefixedCookie;
if (!std.mem.eql(u8, effective_path, "/")) {
return error.InvalidPrefixedCookie;
}
} else if (std.ascii.startsWithIgnoreCase(init.name, "__Secure-")) {
if (!is_https) return error.InvalidPrefixedCookie;
if (!is_https) {
return error.InvalidPrefixedCookie;
}
}
// The errdefer only protects construction failures. Once we `break :blk`
@@ -465,6 +515,18 @@ fn storeCookie(exec: *const Execution, init: CookieInit) !void {
try session.cookie_jar.add(cookie, std.time.timestamp(), false);
}
// Control characters (U+0000U+001F and U+007F DEL) and `;` cannot appear in
// a cookie name or value. The whitespace chars TAB and SPACE are trimmed
// before this check, so the surviving controls are all genuinely invalid.
fn hasForbiddenChar(s: []const u8) bool {
for (s) |c| {
if (c <= 0x1F or c == 0x7F or c == ';') {
return true;
}
}
return false;
}
pub const JsApi = struct {
pub const bridge = js.Bridge(CookieStore);
@@ -481,59 +543,21 @@ pub const JsApi = struct {
pub const onchange = bridge.accessor(CookieStore.getOnChange, CookieStore.setOnChange, .{});
};
// CookieListItem: per CookieStore.get / getAll return shape, documented inline on
// https://developer.mozilla.org/en-US/docs/Web/API/CookieStore
// CookieListItem is an plain JavaScript object, not an interface. The bridge
// automatically translate a Zig struct -> JS Object This should _not_ have a
// JsApi.
pub const CookieListItem = struct {
name: String,
value: String,
// Optional because a deletion change-event reports the removed cookie with
// `value` omitted (serialized as undefined via the `deleted` accessor's
// null_as_undefined). For get/getAll and `changed` items it is always set.
value: ?String,
domain: ?String,
path: String,
expires: ?f64,
secure: bool,
sameSite: SameSite,
sameSite: []const u8,
partitioned: bool,
fn getName(self: *const CookieListItem) String {
return self.name;
}
fn getValue(self: *const CookieListItem) String {
return self.value;
}
fn getDomain(self: *const CookieListItem) ?String {
return self.domain;
}
fn getPath(self: *const CookieListItem) String {
return self.path;
}
fn getExpires(self: *const CookieListItem) ?f64 {
return self.expires;
}
fn getSecure(self: *const CookieListItem) bool {
return self.secure;
}
fn getSameSite(self: *const CookieListItem) []const u8 {
return @tagName(self.sameSite);
}
fn getPartitioned(self: *const CookieListItem) bool {
return self.partitioned;
}
pub const JsApi = struct {
pub const bridge = js.Bridge(CookieListItem);
pub const Meta = struct {
pub const name = "CookieListItem";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const name = bridge.accessor(CookieListItem.getName, null, .{});
pub const value = bridge.accessor(CookieListItem.getValue, null, .{});
pub const domain = bridge.accessor(CookieListItem.getDomain, null, .{});
pub const path = bridge.accessor(CookieListItem.getPath, null, .{});
pub const expires = bridge.accessor(CookieListItem.getExpires, null, .{});
pub const secure = bridge.accessor(CookieListItem.getSecure, null, .{});
pub const sameSite = bridge.accessor(CookieListItem.getSameSite, null, .{});
pub const partitioned = bridge.accessor(CookieListItem.getPartitioned, null, .{});
};
};
const testing = @import("../../../testing.zig");

250
src/html5ever/Cargo.lock generated
View File

@@ -30,6 +30,17 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "displaydoc"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "encoding_rs"
version = "0.8.35"
@@ -61,6 +72,109 @@ dependencies = [
"markup5ever",
]
[[package]]
name = "icu_collections"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
dependencies = [
"displaydoc",
"potential_utf",
"utf8_iter",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locale_core"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_normalizer"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
dependencies = [
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
[[package]]
name = "icu_properties"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
dependencies = [
"icu_collections",
"icu_locale_core",
"icu_properties_data",
"icu_provider",
"zerotrie",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
[[package]]
name = "icu_provider"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
dependencies = [
"displaydoc",
"icu_locale_core",
"writeable",
"yoke",
"zerofrom",
"zerotrie",
"zerovec",
]
[[package]]
name = "idna"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "libc"
version = "0.2.172"
@@ -73,6 +187,7 @@ version = "0.1.0"
dependencies = [
"encoding_rs",
"html5ever",
"idna",
"string_cache",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
@@ -80,6 +195,12 @@ dependencies = [
"xml5ever",
]
[[package]]
name = "litemap"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
[[package]]
name = "lock_api"
version = "0.4.13"
@@ -181,6 +302,15 @@ dependencies = [
"siphasher",
]
[[package]]
name = "potential_utf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
dependencies = [
"zerovec",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
@@ -198,9 +328,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.40"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
@@ -258,6 +388,12 @@ version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "string_cache"
version = "0.9.0"
@@ -294,6 +430,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tendril"
version = "0.5.0"
@@ -335,6 +482,16 @@ dependencies = [
"tikv-jemalloc-sys",
]
[[package]]
name = "tinystr"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "typed-arena"
version = "2.0.2"
@@ -353,6 +510,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "web_atoms"
version = "0.2.3"
@@ -429,6 +592,12 @@ version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "writeable"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
[[package]]
name = "xml5ever"
version = "0.39.0"
@@ -438,3 +607,80 @@ dependencies = [
"log",
"markup5ever",
]
[[package]]
name = "yoke"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
dependencies = [
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerofrom"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerotrie"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
]
[[package]]
name = "zerovec"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -16,6 +16,7 @@ tikv-jemallocator = {version = "0.6.1", features = ["stats"]}
tikv-jemalloc-ctl = {version = "0.6.1", features = ["stats"]}
xml5ever = "0.39.0"
encoding_rs = "0.8"
idna = "1.1.0"
[profile.release]
lto = true

View File

@@ -18,6 +18,7 @@
mod sink;
mod types;
mod url;
#[cfg(debug_assertions)]
#[global_allocator]
@@ -159,8 +160,7 @@ pub extern "C" fn html5ever_parse_document_with_encoding(
};
// Parse directly from decoded string
parse_document(sink, Default::default())
.one(StrTendril::from(decoded.as_ref()));
parse_document(sink, Default::default()).one(StrTendril::from(decoded.as_ref()));
}
// === Encoding API for TextDecoder ===
@@ -180,10 +180,7 @@ pub struct EncodingInfo {
/// Look up an encoding by its label (case-insensitive, whitespace-trimmed)
#[no_mangle]
pub extern "C" fn encoding_for_label(
label: *const c_uchar,
label_len: usize,
) -> EncodingInfo {
pub extern "C" fn encoding_for_label(label: *const c_uchar, label_len: usize) -> EncodingInfo {
if label.is_null() || label_len == 0 {
return EncodingInfo {
found: 0,

83
src/html5ever/url.rs Normal file
View File

@@ -0,0 +1,83 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// WHATWG "domain to ASCII" backed by the `idna` crate (UTS#46, the same engine
// rust-url/Servo use). Pairs with src/sys/idna.zig. Replaced libidn2, whose
// IDNA-2008 behavior diverged from the spec. Value-in / value-out: a UTF-8
// host string becomes its punycode form, or an error.
use std::os::raw::c_uchar;
use std::slice;
fn str_from(ptr: *const c_uchar, len: usize) -> Option<&'static str> {
// Zig hands empty slices a non-null but dangling pointer, so length must
// be checked before forming a slice from raw parts.
if ptr.is_null() || len == 0 {
return Some("");
}
let bytes = unsafe { slice::from_raw_parts(ptr, len) };
std::str::from_utf8(bytes).ok()
}
// Catch any panic from the IDNA code so it never unwinds across the extern "C"
// boundary and aborts the whole process; a panic becomes error code 1.
fn ffi_guard<F: FnOnce() -> i32>(f: F) -> i32 {
std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)).unwrap_or(1)
}
/// WHATWG "domain to ASCII" (UTS#46, non-transitional, beStrict=false). Writes
/// a NUL-terminated owned buffer to *out_ptr / *out_len (caller frees with
/// lpurl_free). Returns 0 on success, 1 if `host` is not a valid domain.
#[no_mangle]
pub extern "C" fn lpurl_domain_to_ascii(
host_ptr: *const c_uchar,
host_len: usize,
out_ptr: *mut *mut c_uchar,
out_len: *mut usize,
) -> i32 {
ffi_guard(move || {
let host = match str_from(host_ptr, host_len) {
Some(s) => s,
None => return 1,
};
let ascii = match idna::domain_to_ascii(host) {
Ok(s) => s,
Err(_) => return 1,
};
let len = ascii.len();
let mut bytes = ascii.into_bytes();
bytes.push(0);
let boxed = bytes.into_boxed_slice();
unsafe {
*out_ptr = Box::into_raw(boxed) as *mut c_uchar;
*out_len = len;
}
0
})
}
/// Free a NUL-terminated buffer handed out by lpurl_domain_to_ascii.
#[no_mangle]
pub extern "C" fn lpurl_free(ptr: *mut c_uchar, len: usize) {
if ptr.is_null() {
return;
}
// The buffer included a NUL terminator, so its length is len + 1 and its
// capacity matches exactly (it was a boxed slice).
unsafe {
let slice = std::ptr::slice_from_raw_parts_mut(ptr, len + 1);
drop(Box::from_raw(slice));
}
}

View File

@@ -15,12 +15,19 @@
const std = @import("std");
const c = @cImport({
@cInclude("idn2.h");
});
const Allocator = std.mem.Allocator;
pub const Error = error{Idna} || Allocator.Error;
// WHATWG "domain to ASCII" lives in the rust-url FFI (src/html5ever/url.rs),
// which uses the UTS#46-conformant `idna` crate — the same engine rust-url
// itself uses.
extern "c" fn lpurl_domain_to_ascii(
host_ptr: [*]const u8,
host_len: usize,
out_ptr: *?[*]u8,
out_len: *usize,
) i32;
extern "c" fn lpurl_free(ptr: ?[*]u8, len: usize) void;
/// True if `host` contains any non-ASCII byte and therefore needs IDNA
/// processing. Pure-ASCII hostnames are returned unchanged by `toAscii`,
@@ -35,21 +42,16 @@ pub fn needsAscii(host: []const u8) bool {
}
/// Convert a UTF-8 hostname to its ASCII (Punycode) form per UTS#46
/// IDNA 2008 with non-transitional processing — the algorithm WHATWG URL
/// invokes as "domain to ASCII". Returns an allocator-owned slice.
pub fn toAscii(allocator: Allocator, host: []const u8) Error![]u8 {
const host_z = try allocator.dupeZ(u8, host);
defer allocator.free(host_z);
var out_ptr: [*c]u8 = undefined;
const flags: c_int = c.IDN2_NFC_INPUT | c.IDN2_NONTRANSITIONAL;
const rc = c.idn2_to_ascii_8z(host_z.ptr, &out_ptr, flags);
if (rc != c.IDN2_OK) {
/// non-transitional processing — the algorithm WHATWG URL invokes as
/// "domain to ASCII". Returns an allocator-owned slice.
pub fn toAscii(allocator: Allocator, host: []const u8) ![]u8 {
var out_len: usize = 0;
var out_ptr: ?[*]u8 = null;
if (lpurl_domain_to_ascii(host.ptr, host.len, &out_ptr, &out_len) != 0) {
return error.Idna;
}
defer c.idn2_free(out_ptr);
return try allocator.dupe(u8, std.mem.span(@as([*:0]const u8, @ptrCast(out_ptr))));
defer lpurl_free(out_ptr, out_len);
return allocator.dupe(u8, out_ptr.?[0..out_len]);
}
const testing = @import("../testing.zig");
@@ -74,3 +76,39 @@ test "idna: German sharp s with non-transitional processing" {
defer testing.allocator.free(out);
try testing.expectString("xn--fa-hia.de", out);
}
test "idna: needsAscii" {
try testing.expectEqual(false, needsAscii(""));
try testing.expectEqual(false, needsAscii("xn--fa-hia.de"));
try testing.expectEqual(true, needsAscii("faß.de"));
try testing.expectEqual(true, needsAscii("\xff"));
}
test "idna: UTS#46 lowercases ASCII" {
const out = try toAscii(testing.allocator, "EXAMPLE.COM");
defer testing.allocator.free(out);
try testing.expectString("example.com", out);
}
test "idna: already-punycode is idempotent" {
const out = try toAscii(testing.allocator, "xn--rksmrgs-5wao1o.se");
defer testing.allocator.free(out);
try testing.expectString("xn--rksmrgs-5wao1o.se", out);
}
test "idna: mixed ASCII and non-ASCII labels" {
const out = try toAscii(testing.allocator, "münchen.example.com");
defer testing.allocator.free(out);
try testing.expectString("xn--mnchen-3ya.example.com", out);
}
test "idna: multi-label CJK" {
const out = try toAscii(testing.allocator, "日本.jp");
defer testing.allocator.free(out);
try testing.expectString("xn--wgv71a.jp", out);
}
test "idna: invalid domain returns error" {
// U+FFFD (REPLACEMENT CHARACTER) is disallowed under UTS#46.
try testing.expectError(error.Idna, toAscii(testing.allocator, "\u{FFFD}.com"));
}

1915
vendor/libidn2/config.h vendored
View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,20 +0,0 @@
/* Darwin-only strchrnul shim for libidn2.
strchrnul is a glibc extension. macOS libc lacks it before 15.4, and
libidn2's lib/lookup.c never includes <string.h> — so even on newer
macOS the declaration would not reach the call site. The matching
prototype is declared next to the strverscmp shim in
vendor/libidn2/config.h (within the _LIBIDN2_LP_DECLS block, gated on
__APPLE__), so callers compile; this file provides the symbol so
they link.
gnulib's strchrnul.c falls through to rawmemchr() when the search byte
is NUL — also a glibc extension. libidn2 only ever searches for '.', so
a straight byte scan is enough and avoids dragging in a second shim. */
char *strchrnul(const char *s, int c_in) {
const unsigned char c = (unsigned char) c_in;
const unsigned char *p = (const unsigned char *) s;
while (*p && *p != c) p++;
return (char *) p;
}