Files
browser/src/sys/idna.zig
Karl Seguin 320ffa2819 Improve WPT /url/ tests
This is a bit all over the place.

1 - Replace libidn2 with rust-idna. It looks like there are different idna
    profiles, and rust-idna (from the servo project) implements the whatwg
    one. libidn2 would be too strict in some cases and not strict enough in
    others. (Gemini says I could use libidn2 for this, but what it suggested
    didn't work, and I couldn't figure it out myself, and claude insisted it
    _did not_ have the correct implementation for what we want).

2 - We previously only ran a URL through idna if it wasn't ascii. Turns out
    we also need to run it if there's a "xn--" (aka, an IDNA ACE prefix) in
    there. This helps us pass hundreds of WPT cases, and it's pretty cheap.

3 - Implement more of the Area WebAPI. Mostly copied from Anchor.

4 - Add username/password accessor to Anchor/Area

5 - window.open validates the URL (i.e. tries to resolve it and handles the
    error)

6 - Invalid idna conversion maps to a TypeError

7 - Cleanup closed popups on the next tick (like destroyed pages), rather than
    at an interval or on shutdown. This one seems unrelated, but some of these
    tests are opening hundreds (thousands?) of popups and then closing them.
2026-05-29 18:06:44 +08:00

115 lines
4.1 KiB
Zig

// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const Allocator = std.mem.Allocator;
// WHATWG "domain to ASCII" lives in the rust-url FFI (src/html5ever/url.rs),
// which uses the UTS#46-conformant `idna` crate — the same engine rust-url
// itself uses.
extern "c" fn lpurl_domain_to_ascii(
host_ptr: [*]const u8,
host_len: usize,
out_ptr: *?[*]u8,
out_len: *usize,
) i32;
extern "c" fn lpurl_free(ptr: ?[*]u8, len: usize) void;
/// True if `host` contains any non-ASCII byte and therefore needs IDNA
/// processing. Pure-ASCII hostnames are returned unchanged by `toAscii`,
/// so callers can use this as a fast path to skip the C call entirely.
pub fn needsAscii(host: []const u8) bool {
for (host) |byte| {
if (byte >= 0x80) {
return true;
}
}
return false;
}
/// Convert a UTF-8 hostname to its ASCII (Punycode) form per UTS#46
/// non-transitional processing — the algorithm WHATWG URL invokes as
/// "domain to ASCII". Returns an allocator-owned slice.
pub fn toAscii(allocator: Allocator, host: []const u8) ![]u8 {
var out_len: usize = 0;
var out_ptr: ?[*]u8 = null;
if (lpurl_domain_to_ascii(host.ptr, host.len, &out_ptr, &out_len) != 0) {
return error.Idna;
}
defer lpurl_free(out_ptr, out_len);
return allocator.dupe(u8, out_ptr.?[0..out_len]);
}
const testing = @import("../testing.zig");
test "idna: ASCII passthrough" {
try testing.expectEqual(false, needsAscii("example.com"));
const out = try toAscii(testing.allocator, "example.com");
defer testing.allocator.free(out);
try testing.expectString("example.com", out);
}
test "idna: non-ASCII to punycode" {
try testing.expectEqual(true, needsAscii("räksmörgås.se"));
const out = try toAscii(testing.allocator, "räksmörgås.se");
defer testing.allocator.free(out);
try testing.expectString("xn--rksmrgs-5wao1o.se", out);
}
test "idna: German sharp s with non-transitional processing" {
// UTS#46 non-transitional preserves ß rather than mapping to ss.
const out = try toAscii(testing.allocator, "faß.de");
defer testing.allocator.free(out);
try testing.expectString("xn--fa-hia.de", out);
}
test "idna: needsAscii" {
try testing.expectEqual(false, needsAscii(""));
try testing.expectEqual(false, needsAscii("xn--fa-hia.de"));
try testing.expectEqual(true, needsAscii("faß.de"));
try testing.expectEqual(true, needsAscii("\xff"));
}
test "idna: UTS#46 lowercases ASCII" {
const out = try toAscii(testing.allocator, "EXAMPLE.COM");
defer testing.allocator.free(out);
try testing.expectString("example.com", out);
}
test "idna: already-punycode is idempotent" {
const out = try toAscii(testing.allocator, "xn--rksmrgs-5wao1o.se");
defer testing.allocator.free(out);
try testing.expectString("xn--rksmrgs-5wao1o.se", out);
}
test "idna: mixed ASCII and non-ASCII labels" {
const out = try toAscii(testing.allocator, "münchen.example.com");
defer testing.allocator.free(out);
try testing.expectString("xn--mnchen-3ya.example.com", out);
}
test "idna: multi-label CJK" {
const out = try toAscii(testing.allocator, "日本.jp");
defer testing.allocator.free(out);
try testing.expectString("xn--wgv71a.jp", out);
}
test "idna: invalid domain returns error" {
// U+FFFD (REPLACEMENT CHARACTER) is disallowed under UTS#46.
try testing.expectError(error.Idna, toAscii(testing.allocator, "\u{FFFD}.com"));
}