From 21e9c5184cf56d895df58ca6b8e03366f937bc3e Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Wed, 29 Apr 2026 17:01:07 +0800 Subject: [PATCH 1/2] Fix atob/btoa Depends on https://github.com/lightpanda-io/zig-v8-fork/pull/175 This started as an investigation into errors when loading recaptcha. Turns out our atob/btoa is naive. Specifically, we take a `[]const u8` input which, via the bridge, goes through the v8 utf8 path. This can result in the input being fundamentally altered. This commit introduces a new discriminatory type, js.String.OneByte which uses the String::NewFromOneByte when going from js->zig and String:WriteOneByte when going from zig->js, avoiding any changes to the raw data. Also adds a placeholder MessageEvent.ports getter (which showed up in debugging) --- build.zig.zon | 4 +- src/browser/js/Caller.zig | 18 +++++- src/browser/js/Isolate.zig | 4 ++ src/browser/js/Local.zig | 20 +++++++ src/browser/js/String.zig | 45 +++++++++++++++ src/browser/js/js.zig | 1 + src/browser/tests/event/message.html | 9 +++ src/browser/tests/window/window.html | 67 +++++++++++++++++++++++ src/browser/webapi/FileReader.zig | 3 +- src/browser/webapi/Window.zig | 11 ++-- src/browser/webapi/WorkerGlobalScope.zig | 13 ++--- src/browser/webapi/encoding/base64.zig | 16 +++--- src/browser/webapi/event/MessageEvent.zig | 6 ++ 13 files changed, 192 insertions(+), 25 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index ce289daa..90ae56ee 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -5,8 +5,8 @@ .minimum_zig_version = "0.15.2", .dependencies = .{ .v8 = .{ - .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.2.tar.gz", - .hash = "v8-0.0.0-xddH672HBABNrbtyNk9o4QXxQJTlpjiCscmdEQuMvKnR", + .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/6b9f3d5b419ff3bed9aa403b36883744d21057e5.tar.gz", + .hash = "v8-0.0.0-xddH61GNBABFJ11FJ8KDYXITyjKh4jQ54taEenYek2xJ", }, // .v8 = .{ .path = "../zig-v8-fork" }, .brotli = .{ diff --git a/src/browser/js/Caller.zig b/src/browser/js/Caller.zig index 2b29c036..798157df 100644 --- a/src/browser/js/Caller.zig +++ b/src/browser/js/Caller.zig @@ -846,7 +846,23 @@ fn getArgs(comptime F: type, comptime offset: usize, local: *const Local, info: @field(args, tupleFieldName(field_index)) = null; } else { const js_val = info.getArg(@intCast(i), local); - @field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch { + // Only fold errors we don't recognize into InvalidArgument; let + // domain-meaningful ones (e.g. InvalidCharacterError from a + // String.OneByte param) propagate so handleError can map them + // to the right DOMException. Compared by name because the per- + // type instantiation of jsValueToZig may not include such errors + // in its inferred error set. + @field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch |err| { + const DOMException = @import("../webapi/DOMException.zig"); + if (DOMException.fromError(err) != null) { + // I don't love this. But we have [a few] cases when trying to + // map a JS Value that we have a specific DOMException to throw. + // Ideally we should only do this if dom_exception = true in the + // bridge definition. But we don't have access to that here. + // Instead, we just rely on the fact that local.jsValueToZig + // only throws a DOMException-known error when it should. + return err; + } return error.InvalidArgument; }; } diff --git a/src/browser/js/Isolate.zig b/src/browser/js/Isolate.zig index 08df142a..fcb8eead 100644 --- a/src/browser/js/Isolate.zig +++ b/src/browser/js/Isolate.zig @@ -73,6 +73,10 @@ pub fn initStringHandle(self: Isolate, str: []const u8) *const v8.String { return v8.v8__String__NewFromUtf8(self.handle, str.ptr, v8.kNormal, @as(c_int, @intCast(str.len))).?; } +pub fn initOneByteStringHandle(self: Isolate, bytes: []const u8) *const v8.String { + return v8.v8__String__NewFromOneByte(self.handle, bytes.ptr, v8.kNormal, @as(c_int, @intCast(bytes.len))).?; +} + pub fn createError(self: Isolate, msg: []const u8) *const v8.Value { const message = self.initStringHandle(msg); return v8.v8__Exception__Error(message).?; diff --git a/src/browser/js/Local.zig b/src/browser/js/Local.zig index 0a7b120f..9a09cbcc 100644 --- a/src/browser/js/Local.zig +++ b/src/browser/js/Local.zig @@ -62,6 +62,17 @@ pub fn newString(self: *const Local, str: []const u8) js.String { }; } +// Creates a JS string by mapping each input byte 0..255 directly to a JS +// code unit, with no UTF-8 decoding. Use this when handing back binary data +// (e.g. atob output) — passing those bytes through `newString` would treat +// any byte 0x80..0xFF as malformed UTF-8 and replace it with U+FFFD. +pub fn newOneByteString(self: *const Local, bytes: []const u8) js.String { + return .{ + .local = self, + .handle = self.isolate.initOneByteStringHandle(bytes), + }; +} + pub fn newObject(self: *const Local) js.Object { return .{ .local = self, @@ -745,6 +756,15 @@ fn jsValueToStruct(self: *const Local, comptime T: type, js_val: js.Value) !?T { }; }, js.String => return js_val.isString(), + js.String.OneByte => { + // Receives a "binary string": each JS code unit must fit in a byte + // (0..255). Throws InvalidCharacterError if any code unit is out + // of range, matching the WHATWG btoa spec — which is the main + // intended caller, but applicable to any binary-string input. + const js_str = js_val.isString() orelse return null; + if (!js_str.containsOnlyOneByte()) return error.InvalidCharacterError; + return .{ .bytes = try js_str.toOneByteSlice(self.call_arena) }; + }, string.String => { const js_str = js_val.isString() orelse return null; return try js_str.toSSO(false); diff --git a/src/browser/js/String.zig b/src/browser/js/String.zig index 5c51863f..c405b0e1 100644 --- a/src/browser/js/String.zig +++ b/src/browser/js/String.zig @@ -31,6 +31,22 @@ const String = @This(); local: *const js.Local, handle: *const v8.String, +// A byte slice that should be handed to JS as a "binary string" — each byte +// 0..255 becomes a JS code unit 0..255 (Latin-1), with no UTF-8 decoding. +// Return this from a Web API method whenever the contract is "one byte per +// JS character" (atob, FileReader.readAsBinaryString, etc.). The framework +// turns it into a V8 string via `String::NewFromOneByte`. +pub const OneByte = struct { + bytes: []const u8, +}; + +pub fn toValue(self: String) js.Value { + return .{ + .local = self.local, + .handle = @ptrCast(self.handle), + }; +} + pub fn toSlice(self: String) ![]u8 { return self._toSlice(false, self.local.call_arena); } @@ -114,3 +130,32 @@ pub fn format(self: String, writer: *std.Io.Writer) !void { pub fn len(self: String) usize { return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle)); } + +// JS-level character (code unit) count, independent of encoding. Equivalent +// to `s.length` in JavaScript. Use this — not `len()` — when allocating a +// buffer for one-byte / Latin-1 reads. +pub fn lenChars(self: String) usize { + return @intCast(v8.v8__String__Length(self.handle)); +} + +// True iff every code unit in the string fits in a single byte (codepoint +// <= 0xFF, i.e. Latin-1). Used by btoa to reject strings with codepoints +// outside the binary-string range. +pub fn containsOnlyOneByte(self: String) bool { + return v8.v8__String__ContainsOnlyOneByte(self.handle); +} + +// Read the string as Latin-1 bytes — each output byte equals the +// corresponding code unit. Caller must have already established (via +// `containsOnlyOneByte`) that no code unit exceeds 0xFF; otherwise V8 +// silently truncates to the low byte. +pub fn toOneByteSlice(self: String, allocator: Allocator) ![]u8 { + const handle = self.handle; + const isolate = self.local.isolate.handle; + const length: u32 = @intCast(v8.v8__String__Length(handle)); + const buf = try allocator.alloc(u8, length); + if (length > 0) { + v8.v8__String__WriteOneByte(handle, isolate, 0, length, buf.ptr); + } + return buf; +} diff --git a/src/browser/js/js.zig b/src/browser/js/js.zig index cda8f9d5..28a1fb51 100644 --- a/src/browser/js/js.zig +++ b/src/browser/js/js.zig @@ -234,6 +234,7 @@ pub fn simpleZigValueToJs(isolate: Isolate, value: anytype, comptime fail: bool, .@"struct" => { switch (@TypeOf(value)) { string.String => return isolate.initStringHandle(value.str()), + String.OneByte => return @ptrCast(isolate.initOneByteStringHandle(value.bytes)), ArrayBuffer => { const values = value.values; const len = values.len; diff --git a/src/browser/tests/event/message.html b/src/browser/tests/event/message.html index ad1f0596..88be4650 100644 --- a/src/browser/tests/event/message.html +++ b/src/browser/tests/event/message.html @@ -40,6 +40,15 @@ } + + + + + + + +