diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index 34927f6f..136c06ab 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -13,7 +13,7 @@ inputs: zig-v8: description: 'zig v8 version to install' required: false - default: 'v0.4.2' + default: 'v0.4.3' v8: description: 'v8 version to install' required: false diff --git a/Dockerfile b/Dockerfile index 75c3e3fb..4f387993 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM debian:stable-slim ARG MINISIG=0.12 ARG ZIG_MINISIG=RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U ARG V8=14.0.365.4 -ARG ZIG_V8=v0.4.2 +ARG ZIG_V8=v0.4.3 ARG TARGETPLATFORM RUN apt-get update -yq && \ diff --git a/build.zig.zon b/build.zig.zon index 54c4ed95..02377e35 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -5,8 +5,8 @@ .minimum_zig_version = "0.15.2", .dependencies = .{ .v8 = .{ - .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.2.tar.gz", - .hash = "v8-0.0.0-xddH672HBABNrbtyNk9o4QXxQJTlpjiCscmdEQuMvKnR", + .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.4.3.tar.gz", + .hash = "v8-0.0.0-xddH61GNBABFJ11FJ8KDYXITyjKh4jQ54taEenYek2xJ", }, // .v8 = .{ .path = "../zig-v8-fork" }, .brotli = .{ diff --git a/src/browser/js/Caller.zig b/src/browser/js/Caller.zig index 2b29c036..798157df 100644 --- a/src/browser/js/Caller.zig +++ b/src/browser/js/Caller.zig @@ -846,7 +846,23 @@ fn getArgs(comptime F: type, comptime offset: usize, local: *const Local, info: @field(args, tupleFieldName(field_index)) = null; } else { const js_val = info.getArg(@intCast(i), local); - @field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch { + // Only fold errors we don't recognize into InvalidArgument; let + // domain-meaningful ones (e.g. InvalidCharacterError from a + // String.OneByte param) propagate so handleError can map them + // to the right DOMException. Compared by name because the per- + // type instantiation of jsValueToZig may not include such errors + // in its inferred error set. + @field(args, tupleFieldName(field_index)) = local.jsValueToZig(param.type.?, js_val) catch |err| { + const DOMException = @import("../webapi/DOMException.zig"); + if (DOMException.fromError(err) != null) { + // I don't love this. But we have [a few] cases when trying to + // map a JS Value that we have a specific DOMException to throw. + // Ideally we should only do this if dom_exception = true in the + // bridge definition. But we don't have access to that here. + // Instead, we just rely on the fact that local.jsValueToZig + // only throws a DOMException-known error when it should. + return err; + } return error.InvalidArgument; }; } diff --git a/src/browser/js/Isolate.zig b/src/browser/js/Isolate.zig index 08df142a..fcb8eead 100644 --- a/src/browser/js/Isolate.zig +++ b/src/browser/js/Isolate.zig @@ -73,6 +73,10 @@ pub fn initStringHandle(self: Isolate, str: []const u8) *const v8.String { return v8.v8__String__NewFromUtf8(self.handle, str.ptr, v8.kNormal, @as(c_int, @intCast(str.len))).?; } +pub fn initOneByteStringHandle(self: Isolate, bytes: []const u8) *const v8.String { + return v8.v8__String__NewFromOneByte(self.handle, bytes.ptr, v8.kNormal, @as(c_int, @intCast(bytes.len))).?; +} + pub fn createError(self: Isolate, msg: []const u8) *const v8.Value { const message = self.initStringHandle(msg); return v8.v8__Exception__Error(message).?; diff --git a/src/browser/js/Local.zig b/src/browser/js/Local.zig index 0a7b120f..9a09cbcc 100644 --- a/src/browser/js/Local.zig +++ b/src/browser/js/Local.zig @@ -62,6 +62,17 @@ pub fn newString(self: *const Local, str: []const u8) js.String { }; } +// Creates a JS string by mapping each input byte 0..255 directly to a JS +// code unit, with no UTF-8 decoding. Use this when handing back binary data +// (e.g. atob output) — passing those bytes through `newString` would treat +// any byte 0x80..0xFF as malformed UTF-8 and replace it with U+FFFD. +pub fn newOneByteString(self: *const Local, bytes: []const u8) js.String { + return .{ + .local = self, + .handle = self.isolate.initOneByteStringHandle(bytes), + }; +} + pub fn newObject(self: *const Local) js.Object { return .{ .local = self, @@ -745,6 +756,15 @@ fn jsValueToStruct(self: *const Local, comptime T: type, js_val: js.Value) !?T { }; }, js.String => return js_val.isString(), + js.String.OneByte => { + // Receives a "binary string": each JS code unit must fit in a byte + // (0..255). Throws InvalidCharacterError if any code unit is out + // of range, matching the WHATWG btoa spec — which is the main + // intended caller, but applicable to any binary-string input. + const js_str = js_val.isString() orelse return null; + if (!js_str.containsOnlyOneByte()) return error.InvalidCharacterError; + return .{ .bytes = try js_str.toOneByteSlice(self.call_arena) }; + }, string.String => { const js_str = js_val.isString() orelse return null; return try js_str.toSSO(false); diff --git a/src/browser/js/String.zig b/src/browser/js/String.zig index 5c51863f..c405b0e1 100644 --- a/src/browser/js/String.zig +++ b/src/browser/js/String.zig @@ -31,6 +31,22 @@ const String = @This(); local: *const js.Local, handle: *const v8.String, +// A byte slice that should be handed to JS as a "binary string" — each byte +// 0..255 becomes a JS code unit 0..255 (Latin-1), with no UTF-8 decoding. +// Return this from a Web API method whenever the contract is "one byte per +// JS character" (atob, FileReader.readAsBinaryString, etc.). The framework +// turns it into a V8 string via `String::NewFromOneByte`. +pub const OneByte = struct { + bytes: []const u8, +}; + +pub fn toValue(self: String) js.Value { + return .{ + .local = self.local, + .handle = @ptrCast(self.handle), + }; +} + pub fn toSlice(self: String) ![]u8 { return self._toSlice(false, self.local.call_arena); } @@ -114,3 +130,32 @@ pub fn format(self: String, writer: *std.Io.Writer) !void { pub fn len(self: String) usize { return @intCast(v8.v8__String__Utf8Length(self.handle, self.local.isolate.handle)); } + +// JS-level character (code unit) count, independent of encoding. Equivalent +// to `s.length` in JavaScript. Use this — not `len()` — when allocating a +// buffer for one-byte / Latin-1 reads. +pub fn lenChars(self: String) usize { + return @intCast(v8.v8__String__Length(self.handle)); +} + +// True iff every code unit in the string fits in a single byte (codepoint +// <= 0xFF, i.e. Latin-1). Used by btoa to reject strings with codepoints +// outside the binary-string range. +pub fn containsOnlyOneByte(self: String) bool { + return v8.v8__String__ContainsOnlyOneByte(self.handle); +} + +// Read the string as Latin-1 bytes — each output byte equals the +// corresponding code unit. Caller must have already established (via +// `containsOnlyOneByte`) that no code unit exceeds 0xFF; otherwise V8 +// silently truncates to the low byte. +pub fn toOneByteSlice(self: String, allocator: Allocator) ![]u8 { + const handle = self.handle; + const isolate = self.local.isolate.handle; + const length: u32 = @intCast(v8.v8__String__Length(handle)); + const buf = try allocator.alloc(u8, length); + if (length > 0) { + v8.v8__String__WriteOneByte(handle, isolate, 0, length, buf.ptr); + } + return buf; +} diff --git a/src/browser/js/js.zig b/src/browser/js/js.zig index cda8f9d5..28a1fb51 100644 --- a/src/browser/js/js.zig +++ b/src/browser/js/js.zig @@ -234,6 +234,7 @@ pub fn simpleZigValueToJs(isolate: Isolate, value: anytype, comptime fail: bool, .@"struct" => { switch (@TypeOf(value)) { string.String => return isolate.initStringHandle(value.str()), + String.OneByte => return @ptrCast(isolate.initOneByteStringHandle(value.bytes)), ArrayBuffer => { const values = value.values; const len = values.len; diff --git a/src/browser/tests/event/message.html b/src/browser/tests/event/message.html index ad1f0596..88be4650 100644 --- a/src/browser/tests/event/message.html +++ b/src/browser/tests/event/message.html @@ -40,6 +40,15 @@ } + + + + + + + +