Merge pull request #2318 from lightpanda-io/atob_btoa

Fix atob/btoa
This commit is contained in:
Karl Seguin
2026-05-04 08:29:11 +08:00
committed by GitHub
15 changed files with 194 additions and 27 deletions

View File

@@ -13,7 +13,7 @@ inputs:
zig-v8:
description: 'zig v8 version to install'
required: false
default: 'v0.4.2'
default: 'v0.4.3'
v8:
description: 'v8 version to install'
required: false

View File

@@ -3,7 +3,7 @@ FROM debian:stable-slim
ARG MINISIG=0.12
ARG ZIG_MINISIG=RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U
ARG V8=14.0.365.4
ARG ZIG_V8=v0.4.2
ARG ZIG_V8=v0.4.3
ARG TARGETPLATFORM
RUN apt-get update -yq && \

View File

@@ -5,8 +5,8 @@
.minimum_zig_version = "0.15.2",
.dependencies = .{
.v8 = .{
.url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.2.tar.gz",
.hash = "v8-0.0.0-xddH672HBABNrbtyNk9o4QXxQJTlpjiCscmdEQuMvKnR",
.url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.3.tar.gz",
.hash = "v8-0.0.0-xddH61GNBABFJ11FJ8KDYXITyjKh4jQ54taEenYek2xJ",
},
// .v8 = .{ .path = "../zig-v8-fork" },
.brotli = .{

View File

@@ -846,7 +846,23 @@ fn getArgs(comptime F: type, comptime offset: usize, local: *const Local, info:
@field(args, tupleFieldName(field_index)) = null;
} else {
const js_val = info.getArg(@intCast(i), local);
@field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch {
// Only fold errors we don't recognize into InvalidArgument; let
// domain-meaningful ones (e.g. InvalidCharacterError from a
// String.OneByte param) propagate so handleError can map them
// to the right DOMException. Compared by name because the per-
// type instantiation of jsValueToZig may not include such errors
// in its inferred error set.
@field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch |err| {
const DOMException = @import("../webapi/DOMException.zig");
if (DOMException.fromError(err) != null) {
// I don't love this. But we have [a few] cases when trying to
// map a JS Value that we have a specific DOMException to throw.
// Ideally we should only do this if dom_exception = true in the
// bridge definition. But we don't have access to that here.
// Instead, we just rely on the fact that local.jsValueToZig
// only throws a DOMException-known error when it should.
return err;
}
return error.InvalidArgument;
};
}

View File

@@ -73,6 +73,10 @@ pub fn initStringHandle(self: Isolate, str: []const u8) *const v8.String {
return v8.v8__String__NewFromUtf8(self.handle, str.ptr, v8.kNormal, @as(c_int, @intCast(str.len))).?;
}
pub fn initOneByteStringHandle(self: Isolate, bytes: []const u8) *const v8.String {
return v8.v8__String__NewFromOneByte(self.handle, bytes.ptr, v8.kNormal, @as(c_int, @intCast(bytes.len))).?;
}
pub fn createError(self: Isolate, msg: []const u8) *const v8.Value {
const message = self.initStringHandle(msg);
return v8.v8__Exception__Error(message).?;

View File

@@ -62,6 +62,17 @@ pub fn newString(self: *const Local, str: []const u8) js.String {
};
}
// Creates a JS string by mapping each input byte 0..255 directly to a JS
// code unit, with no UTF-8 decoding. Use this when handing back binary data
// (e.g. atob output) — passing those bytes through `newString` would treat
// any byte 0x80..0xFF as malformed UTF-8 and replace it with U+FFFD.
pub fn newOneByteString(self: *const Local, bytes: []const u8) js.String {
return .{
.local = self,
.handle = self.isolate.initOneByteStringHandle(bytes),
};
}
pub fn newObject(self: *const Local) js.Object {
return .{
.local = self,
@@ -745,6 +756,15 @@ fn jsValueToStruct(self: *const Local, comptime T: type, js_val: js.Value) !?T {
};
},
js.String => return js_val.isString(),
js.String.OneByte => {
// Receives a "binary string": each JS code unit must fit in a byte
// (0..255). Throws InvalidCharacterError if any code unit is out
// of range, matching the WHATWG btoa spec — which is the main
// intended caller, but applicable to any binary-string input.
const js_str = js_val.isString() orelse return null;
if (!js_str.containsOnlyOneByte()) return error.InvalidCharacterError;
return .{ .bytes = try js_str.toOneByteSlice(self.call_arena) };
},
string.String => {
const js_str = js_val.isString() orelse return null;
return try js_str.toSSO(false);

View File

@@ -31,6 +31,22 @@ const String = @This();
local: *const js.Local,
handle: *const v8.String,
// A byte slice that should be handed to JS as a "binary string" — each byte
// 0..255 becomes a JS code unit 0..255 (Latin-1), with no UTF-8 decoding.
// Return this from a Web API method whenever the contract is "one byte per
// JS character" (atob, FileReader.readAsBinaryString, etc.). The framework
// turns it into a V8 string via `String::NewFromOneByte`.
pub const OneByte = struct {
bytes: []const u8,
};
pub fn toValue(self: String) js.Value {
return .{
.local = self.local,
.handle = @ptrCast(self.handle),
};
}
pub fn toSlice(self: String) ![]u8 {
return self._toSlice(false, self.local.call_arena);
}
@@ -114,3 +130,32 @@ pub fn format(self: String, writer: *std.Io.Writer) !void {
pub fn len(self: String) usize {
return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle));
}
// JS-level character (code unit) count, independent of encoding. Equivalent
// to `s.length` in JavaScript. Use this — not `len()` — when allocating a
// buffer for one-byte / Latin-1 reads.
pub fn lenChars(self: String) usize {
return @intCast(v8.v8__String__Length(self.handle));
}
// True iff every code unit in the string fits in a single byte (codepoint
// <= 0xFF, i.e. Latin-1). Used by btoa to reject strings with codepoints
// outside the binary-string range.
pub fn containsOnlyOneByte(self: String) bool {
return v8.v8__String__ContainsOnlyOneByte(self.handle);
}
// Read the string as Latin-1 bytes — each output byte equals the
// corresponding code unit. Caller must have already established (via
// `containsOnlyOneByte`) that no code unit exceeds 0xFF; otherwise V8
// silently truncates to the low byte.
pub fn toOneByteSlice(self: String, allocator: Allocator) ![]u8 {
const handle = self.handle;
const isolate = self.local.isolate.handle;
const length: u32 = @intCast(v8.v8__String__Length(handle));
const buf = try allocator.alloc(u8, length);
if (length > 0) {
v8.v8__String__WriteOneByte(handle, isolate, 0, length, buf.ptr);
}
return buf;
}

View File

@@ -234,6 +234,7 @@ pub fn simpleZigValueToJs(isolate: Isolate, value: anytype, comptime fail: bool,
.@"struct" => {
switch (@TypeOf(value)) {
string.String => return isolate.initStringHandle(value.str()),
String.OneByte => return @ptrCast(isolate.initOneByteStringHandle(value.bytes)),
ArrayBuffer => {
const values = value.values;
const len = values.len;

View File

@@ -40,6 +40,15 @@
}
</script>
<script id=messageEventPorts>
{
// ports must always be a (frozen) array, never undefined.
const evt = new MessageEvent('message');
testing.expectEqual(0, evt.ports.length);
testing.expectEqual(true, Array.isArray(evt.ports) || typeof evt.ports.length === 'number');
}
</script>
<script id=messageEventInheritance>
{
const evt4 = new MessageEvent('custom');

View File

@@ -88,6 +88,73 @@
});
</script>
<script id=atob_binary_string>
// atob must produce a "binary string": each output byte (0-255) becomes
// a JS string character whose codepoint equals that byte. High-bit bytes
// must NOT be folded through UTF-8 (which would replace them with U+FFFD).
// /w== decodes to a single 0xFF byte.
const ff = atob('/w==');
testing.expectEqual(1, ff.length);
testing.expectEqual(0xff, ff.charCodeAt(0));
// 0x00..0x07
const lo = atob('AAECAwQFBgc=');
testing.expectEqual(8, lo.length);
for (let i = 0; i < 8; i++) testing.expectEqual(i, lo.charCodeAt(i));
// 0xFC 0xFD 0xFE 0xFF — the bytes that previously turned into U+FFFD.
const hi = atob('/P3+/w==');
testing.expectEqual(4, hi.length);
testing.expectEqual(0xfc, hi.charCodeAt(0));
testing.expectEqual(0xfd, hi.charCodeAt(1));
testing.expectEqual(0xfe, hi.charCodeAt(2));
testing.expectEqual(0xff, hi.charCodeAt(3));
// Boundary at 0x7F/0x80.
const boundary = atob('f4A=');
testing.expectEqual(2, boundary.length);
testing.expectEqual(0x7f, boundary.charCodeAt(0));
testing.expectEqual(0x80, boundary.charCodeAt(1));
// Ensure the result is usable as input to a Uint8Array, the way recaptcha
// and many libraries decode binary blobs.
const bytes = new Uint8Array(hi.length);
for (let i = 0; i < hi.length; i++) bytes[i] = hi.charCodeAt(i);
testing.expectEqual(0xfc, bytes[0]);
testing.expectEqual(0xfd, bytes[1]);
testing.expectEqual(0xfe, bytes[2]);
testing.expectEqual(0xff, bytes[3]);
</script>
<script id=btoa_binary_string>
// btoa must accept a "binary string": each char's codepoint (0-255) is one
// input byte. Chars > 0xFF must throw InvalidCharacterError.
testing.expectEqual('/w==', btoa(String.fromCharCode(0xff)));
testing.expectEqual('/P3+/w==', btoa(String.fromCharCode(0xfc, 0xfd, 0xfe, 0xff)));
testing.expectEqual('AAECAwQFBgc=', btoa(String.fromCharCode(0, 1, 2, 3, 4, 5, 6, 7)));
testing.expectEqual('f4A=', btoa(String.fromCharCode(0x7f, 0x80)));
// Codepoints above 0xFF are not representable as a binary string.
testing.expectError('InvalidCharacterError', () => {
btoa(String.fromCharCode(0x100));
});
testing.expectError('InvalidCharacterError', () => {
btoa('a' + String.fromCharCode(0x1ff) + 'b');
});
</script>
<script id=btoa_atob_binary_roundtrip>
// Roundtrip every byte value 0..255.
let all = '';
for (let i = 0; i < 256; i++) all += String.fromCharCode(i);
const encoded = btoa(all);
const decoded = atob(encoded);
testing.expectEqual(256, decoded.length);
for (let i = 0; i < 256; i++) testing.expectEqual(i, decoded.charCodeAt(i));
</script>
<script id=btoa_atob_roundtrip>
const testStrings = [
'Hello World!',

View File

@@ -59,6 +59,7 @@ const ReadyState = enum(u8) {
const Result = union(enum) {
string: []const u8,
binary_string: js.String.OneByte,
arraybuffer: js.ArrayBuffer,
};
@@ -209,7 +210,7 @@ fn readInternal(self: *FileReader, blob: *Blob, read_type: ReadType) !void {
// Process the data based on read type
self._result = switch (read_type) {
.arraybuffer => .{ .arraybuffer = .{ .values = data } },
.binary_string => .{ .string = data },
.binary_string => .{ .binary_string = .{ .bytes = data } },
.text => .{ .string = data },
.data_url => blk: {
// Create data URL with base64 encoding

View File

@@ -607,12 +607,13 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons
});
}
pub fn btoa(_: *const Window, input: []const u8, frame: *Frame) ![]const u8 {
return @import("encoding/base64.zig").encode(frame.call_arena, input);
pub fn btoa(_: *const Window, input: js.String.OneByte, frame: *Frame) ![]const u8 {
return @import("encoding/base64.zig").encode(frame.call_arena, input.bytes);
}
pub fn atob(_: *const Window, input: []const u8, frame: *Frame) ![]const u8 {
return @import("encoding/base64.zig").decode(frame.call_arena, input);
pub fn atob(_: *const Window, input: js.String.OneByte, frame: *Frame) !js.String.OneByte {
const bytes = try @import("encoding/base64.zig").decode(frame.call_arena, input.bytes);
return .{ .bytes = bytes };
}
pub fn structuredClone(_: *const Window, value: js.Value) !js.Value {
@@ -1052,7 +1053,7 @@ pub const JsApi = struct {
pub const cancelIdleCallback = bridge.function(Window.cancelIdleCallback, .{});
pub const matchMedia = bridge.function(Window.matchMedia, .{});
pub const postMessage = bridge.function(Window.postMessage, .{});
pub const btoa = bridge.function(Window.btoa, .{});
pub const btoa = bridge.function(Window.btoa, .{ .dom_exception = true });
pub const atob = bridge.function(Window.atob, .{ .dom_exception = true });
pub const reportError = bridge.function(Window.reportError, .{});
pub const structuredClone = bridge.function(Window.structuredClone, .{});

View File

@@ -264,14 +264,13 @@ pub fn receiveMessage(self: *WorkerGlobalScope, data: JS.Value) !void {
});
}
pub fn btoa(_: *const WorkerGlobalScope, input: []const u8, exec: *JS.Execution) ![]const u8 {
const base64 = @import("encoding/base64.zig");
return base64.encode(exec.call_arena, input);
pub fn btoa(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) ![]const u8 {
return @import("encoding/base64.zig").encode(exec.call_arena, input.bytes);
}
pub fn atob(_: *const WorkerGlobalScope, input: []const u8, exec: *JS.Execution) ![]const u8 {
const base64 = @import("encoding/base64.zig");
return base64.decode(exec.call_arena, input);
pub fn atob(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) !JS.String.OneByte {
const bytes = try @import("encoding/base64.zig").decode(exec.call_arena, input.bytes);
return .{ .bytes = bytes };
}
pub fn structuredClone(_: *const WorkerGlobalScope, value: JS.Value) !JS.Value {
@@ -449,7 +448,7 @@ pub const JsApi = struct {
pub const onrejectionhandled = bridge.accessor(WorkerGlobalScope.getOnRejectionHandled, WorkerGlobalScope.setOnRejectionHandled, .{});
pub const onunhandledrejection = bridge.accessor(WorkerGlobalScope.getOnUnhandledRejection, WorkerGlobalScope.setOnUnhandledRejection, .{});
pub const btoa = bridge.function(WorkerGlobalScope.btoa, .{});
pub const btoa = bridge.function(WorkerGlobalScope.btoa, .{ .dom_exception = true });
pub const atob = bridge.function(WorkerGlobalScope.atob, .{ .dom_exception = true });
pub const structuredClone = bridge.function(WorkerGlobalScope.structuredClone, .{});
pub const postMessage = bridge.function(WorkerGlobalScope.postMessage, .{});

View File

@@ -16,29 +16,27 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! Base64 encoding/decoding helpers for btoa/atob.
//! Used by both Window and WorkerGlobalScope.
//! Pure-byte base64 helpers for btoa/atob. The "binary string" semantics
//! (each JS code unit 0..255 = one byte) are handled at the JS boundary in
//! Window.atob / Window.btoa via the one-byte string APIs — this module
//! just deals in bytes.
const std = @import("std");
const Allocator = std.mem.Allocator;
/// Encodes input to base64 (btoa).
pub fn encode(alloc: Allocator, input: []const u8) ![]const u8 {
const encoded_len = std.base64.standard.Encoder.calcSize(input.len);
const encoded = try alloc.alloc(u8, encoded_len);
return std.base64.standard.Encoder.encode(encoded, input);
}
/// Decodes base64 input (atob).
/// Implements forgiving base64 decode per WHATWG spec.
/// Forgiving base64 decode per WHATWG spec:
/// https://infra.spec.whatwg.org/#forgiving-base64-decode
pub fn decode(alloc: Allocator, input: []const u8) ![]const u8 {
const trimmed = std.mem.trim(u8, input, &std.ascii.whitespace);
// Forgiving base64 decode per WHATWG spec:
// https://infra.spec.whatwg.org/#forgiving-base64-decode
// Remove trailing padding to use standard_no_pad decoder
const unpadded = std.mem.trimRight(u8, trimmed, "=");
// Length % 4 == 1 is invalid (can't represent valid base64)
// Length % 4 == 1 is invalid (can't represent valid base64).
if (unpadded.len % 4 == 1) {
return error.InvalidCharacterError;
}

View File

@@ -23,6 +23,7 @@ const js = @import("../../js/js.zig");
const Page = @import("../../Page.zig");
const Event = @import("../Event.zig");
const MessagePort = @import("../MessagePort.zig");
const Window = @import("../Window.zig");
const String = lp.String;
@@ -116,6 +117,10 @@ pub fn getSource(self: *const MessageEvent) ?*Window {
return self._source;
}
pub fn getPorts(_: *const MessageEvent) []*MessagePort {
return &.{};
}
pub const JsApi = struct {
pub const bridge = js.Bridge(MessageEvent);
@@ -129,6 +134,7 @@ pub const JsApi = struct {
pub const data = bridge.accessor(MessageEvent.getData, null, .{});
pub const origin = bridge.accessor(MessageEvent.getOrigin, null, .{});
pub const source = bridge.accessor(MessageEvent.getSource, null, .{});
pub const ports = bridge.accessor(MessageEvent.getPorts, null, .{});
};
const testing = @import("../../../testing.zig");