From 14aec37652e54c5aaca8cc5efedc28baab4f86fd Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 5 Jun 2026 20:44:19 +0800 Subject: [PATCH 1/2] Fix atob/btoa Passes WPT /html/webappapis/atob/base64.html Two changes 1 - Use the forgiving decoder already in data_url 2 - Coerce input (3 => "3") The 2nd change was more interesting. These take a js.String.OneByte as an optimization, which doesn't coerce. To preserve this optimization a union was used with a `raw: []const u8` fallback (and our bridge always coerces to a `[]const u8`) --- src/browser/data_url.zig | 78 +---------------- src/browser/webapi/Window.zig | 11 +-- src/browser/webapi/WorkerGlobalScope.zig | 9 +- src/browser/webapi/encoding/base64.zig | 105 ++++++++++++++++++++--- 4 files changed, 109 insertions(+), 94 deletions(-) diff --git a/src/browser/data_url.zig b/src/browser/data_url.zig index 4c39d1b1..dae889bb 100644 --- a/src/browser/data_url.zig +++ b/src/browser/data_url.zig @@ -19,6 +19,7 @@ const std = @import("std"); const URL = @import("URL.zig"); +const base64 = @import("webapi/encoding/base64.zig"); const Allocator = std.mem.Allocator; @@ -63,81 +64,10 @@ pub fn parse(arena: Allocator, url: []const u8) !Parsed { return .{ .content_type = content_type, .body = body }; } -fn base64Decode(arena: Allocator, input: []const u8) ![]u8 { +fn base64Decode(arena: Allocator, input: []const u8) ![]const u8 { // Forgiving-base64 decode — https://infra.spec.whatwg.org/#forgiving-base64-decode. - // std's decoders reject non-canonical trailing bits (e.g. "ab"), which - // forgiving-base64 tolerates, so decode by hand after validating padding. - const buf = try arena.alloc(u8, input.len); - var n: usize = 0; - for (input) |c| switch (c) { - ' ', '\t', '\n', '\r', std.ascii.control_code.ff => {}, - else => { - buf[n] = c; - n += 1; - }, - }; - var src = buf[0..n]; - - // Only a multiple-of-4 length may carry (and shed) up to two "=" of padding. - if (src.len % 4 == 0) { - if (std.mem.endsWith(u8, src, "==")) { - src = src[0 .. src.len - 2]; - } else if (std.mem.endsWith(u8, src, "=")) { - src = src[0 .. src.len - 1]; - } - } - if (src.len % 4 == 1) return error.InvalidBase64; - // Any "=" still present is misplaced padding. - if (std.mem.indexOfScalar(u8, src, '=') != null) return error.InvalidBase64; - - const out_len = src.len / 4 * 3 + switch (src.len % 4) { - 0 => @as(usize, 0), - 2 => 1, - 3 => 2, - else => unreachable, - }; - const out = try arena.alloc(u8, out_len); - - var oi: usize = 0; - var i: usize = 0; - while (i + 4 <= src.len) : (i += 4) { - const a = try b64Val(src[i]); - const b = try b64Val(src[i + 1]); - const c = try b64Val(src[i + 2]); - const d = try b64Val(src[i + 3]); - out[oi] = (a << 2) | (b >> 4); - out[oi + 1] = (b << 4) | (c >> 2); - out[oi + 2] = (c << 6) | d; - oi += 3; - } - switch (src.len - i) { - 0 => {}, - 2 => { - const a = try b64Val(src[i]); - const b = try b64Val(src[i + 1]); - out[oi] = (a << 2) | (b >> 4); - }, - 3 => { - const a = try b64Val(src[i]); - const b = try b64Val(src[i + 1]); - const c = try b64Val(src[i + 2]); - out[oi] = (a << 2) | (b >> 4); - out[oi + 1] = (b << 4) | (c >> 2); - }, - else => unreachable, - } - return out; -} - -fn b64Val(c: u8) !u8 { - return switch (c) { - 'A'...'Z' => c - 'A', - 'a'...'z' => c - 'a' + 26, - '0'...'9' => c - '0' + 52, - '+' => 62, - '/' => 63, - else => error.InvalidBase64, - }; + // Shared with atob via the encoding helper; remap to this module's error name. + return base64.decode(arena, .{.raw = input}) catch return error.InvalidBase64; } const testing = @import("../testing.zig"); diff --git a/src/browser/webapi/Window.zig b/src/browser/webapi/Window.zig index c32a7eb1..fb268b64 100644 --- a/src/browser/webapi/Window.zig +++ b/src/browser/webapi/Window.zig @@ -652,13 +652,14 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons }); } -pub fn btoa(_: *const Window, input: js.String.OneByte, frame: *Frame) ![]const u8 { - return @import("encoding/base64.zig").encode(frame.call_arena, input.bytes); +const base64 = @import("encoding/base64.zig"); +pub fn btoa(_: *const Window, input: base64.BinInput, frame: *Frame) ![]const u8 { + return base64.encode(frame.call_arena, input); } -pub fn atob(_: *const Window, input: js.String.OneByte, frame: *Frame) !js.String.OneByte { - const bytes = try @import("encoding/base64.zig").decode(frame.call_arena, input.bytes); - return .{ .bytes = bytes }; +pub fn atob(_: *const Window, input: base64.BinInput, frame: *Frame) !js.String.OneByte { + const decoded = try base64.decode(frame.call_arena, input); + return .{ .bytes = decoded }; } pub fn structuredClone(_: *const Window, value: js.Value) !js.Value { diff --git a/src/browser/webapi/WorkerGlobalScope.zig b/src/browser/webapi/WorkerGlobalScope.zig index 2955777b..62e96419 100644 --- a/src/browser/webapi/WorkerGlobalScope.zig +++ b/src/browser/webapi/WorkerGlobalScope.zig @@ -384,12 +384,13 @@ pub fn drainPendingMessages(self: *WorkerGlobalScope) void { self._pending_messages.clearRetainingCapacity(); } -pub fn btoa(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) ![]const u8 { - return @import("encoding/base64.zig").encode(exec.call_arena, input.bytes); +const base64 = @import("encoding/base64.zig"); +pub fn btoa(_: *const WorkerGlobalScope, input: base64.BinInput, exec: *JS.Execution) ![]const u8 { + return base64.encode(exec.call_arena, input); } -pub fn atob(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) !JS.String.OneByte { - const bytes = try @import("encoding/base64.zig").decode(exec.call_arena, input.bytes); +pub fn atob(_: *const WorkerGlobalScope, input: base64.BinInput, exec: *JS.Execution) !JS.String.OneByte { + const bytes = try base64.decode(exec.call_arena, input); return .{ .bytes = bytes }; } diff --git a/src/browser/webapi/encoding/base64.zig b/src/browser/webapi/encoding/base64.zig index 51e3b228..f04653d2 100644 --- a/src/browser/webapi/encoding/base64.zig +++ b/src/browser/webapi/encoding/base64.zig @@ -22,9 +22,25 @@ //! just deals in bytes. const std = @import("std"); +const js = @import("../../js/js.zig"); + const Allocator = std.mem.Allocator; -pub fn encode(alloc: Allocator, input: []const u8) ![]const u8 { +pub const BinInput = union(enum) { + // order matters + js_string: js.String.OneByte, + raw: []const u8, + + fn bytes(self: BinInput) []const u8 { + return switch (self) { + .js_string => |v| v.bytes, + .raw => |v| v, + }; + } +}; + +pub fn encode(alloc: Allocator, in: BinInput) ![]const u8 { + const input = in.bytes(); const encoded_len = std.base64.standard.Encoder.calcSize(input.len); const encoded = try alloc.alloc(u8, encoded_len); return std.base64.standard.Encoder.encode(encoded, input); @@ -32,17 +48,84 @@ pub fn encode(alloc: Allocator, input: []const u8) ![]const u8 { /// Forgiving base64 decode per WHATWG spec: /// https://infra.spec.whatwg.org/#forgiving-base64-decode -pub fn decode(alloc: Allocator, input: []const u8) ![]const u8 { - const trimmed = std.mem.trim(u8, input, &std.ascii.whitespace); - const unpadded = std.mem.trimRight(u8, trimmed, "="); +/// +/// std's decoders reject non-canonical trailing bits (e.g. "ab") and only trim +/// padding from the ends, neither of which match forgiving-base64 — so decode by +/// hand: strip *all* ASCII whitespace, validate padding, tolerate trailing bits. +pub fn decode(alloc: Allocator, in: BinInput) ![]const u8 { + const input = in.bytes(); - // Length % 4 == 1 is invalid (can't represent valid base64). - if (unpadded.len % 4 == 1) { - return error.InvalidCharacterError; + // Step 1: remove all ASCII whitespace (tab, LF, FF, CR, space) from anywhere. + const buf = try alloc.alloc(u8, input.len); + var n: usize = 0; + for (input) |c| switch (c) { + ' ', '\t', '\n', '\r', std.ascii.control_code.ff => {}, + else => { + buf[n] = c; + n += 1; + }, + }; + var src = buf[0..n]; + + // Step 2: only a multiple-of-4 length may carry (and shed) up to two "=". + if (src.len % 4 == 0) { + if (std.mem.endsWith(u8, src, "==")) { + src = src[0 .. src.len - 2]; + } else if (std.mem.endsWith(u8, src, "=")) { + src = src[0 .. src.len - 1]; + } } + // Step 3: a length % 4 == 1 can't represent valid base64. + if (src.len % 4 == 1) return error.InvalidCharacterError; + // Any "=" still present is misplaced padding. + if (std.mem.indexOfScalar(u8, src, '=') != null) return error.InvalidCharacterError; - const decoded_len = std.base64.standard_no_pad.Decoder.calcSizeForSlice(unpadded) catch return error.InvalidCharacterError; - const decoded = try alloc.alloc(u8, decoded_len); - std.base64.standard_no_pad.Decoder.decode(decoded, unpadded) catch return error.InvalidCharacterError; - return decoded; + const out_len = src.len / 4 * 3 + switch (src.len % 4) { + 0 => @as(usize, 0), + 2 => 1, + 3 => 2, + else => unreachable, + }; + const out = try alloc.alloc(u8, out_len); + + var oi: usize = 0; + var i: usize = 0; + while (i + 4 <= src.len) : (i += 4) { + const a = try b64Val(src[i]); + const b = try b64Val(src[i + 1]); + const c = try b64Val(src[i + 2]); + const d = try b64Val(src[i + 3]); + out[oi] = (a << 2) | (b >> 4); + out[oi + 1] = (b << 4) | (c >> 2); + out[oi + 2] = (c << 6) | d; + oi += 3; + } + switch (src.len - i) { + 0 => {}, + 2 => { + const a = try b64Val(src[i]); + const b = try b64Val(src[i + 1]); + out[oi] = (a << 2) | (b >> 4); + }, + 3 => { + const a = try b64Val(src[i]); + const b = try b64Val(src[i + 1]); + const c = try b64Val(src[i + 2]); + out[oi] = (a << 2) | (b >> 4); + out[oi + 1] = (b << 4) | (c >> 2); + }, + else => unreachable, + } + return out; +} + +fn b64Val(c: u8) !u8 { + return switch (c) { + 'A'...'Z' => c - 'A', + 'a'...'z' => c - 'a' + 26, + '0'...'9' => c - '0' + 52, + '+' => 62, + '/' => 63, + else => error.InvalidCharacterError, + }; } From b0ebbf7b3558f81921d869918c4910e0110f85a6 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 5 Jun 2026 20:46:48 +0800 Subject: [PATCH 2/2] zig fmt --- src/browser/data_url.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/data_url.zig b/src/browser/data_url.zig index dae889bb..74e56257 100644 --- a/src/browser/data_url.zig +++ b/src/browser/data_url.zig @@ -67,7 +67,7 @@ pub fn parse(arena: Allocator, url: []const u8) !Parsed { fn base64Decode(arena: Allocator, input: []const u8) ![]const u8 { // Forgiving-base64 decode — https://infra.spec.whatwg.org/#forgiving-base64-decode. // Shared with atob via the encoding helper; remap to this module's error name. - return base64.decode(arena, .{.raw = input}) catch return error.InvalidBase64; + return base64.decode(arena, .{ .raw = input }) catch return error.InvalidBase64; } const testing = @import("../testing.zig");