Fix atob/btoa

Passes WPT /html/webappapis/atob/base64.html

Two changes
1 - Use the forgiving decoder already in data_url
2 - Coerce input (3 => "3")

The 2nd change was more interesting. These take a js.String.OneByte as an
optimization, which doesn't coerce. To preserve this optimization a union was
used with a `raw: []const u8` fallback (and our bridge always coerces to
a `[]const u8`)
This commit is contained in:
Karl Seguin
2026-06-05 20:44:19 +08:00
parent 23120885be
commit 14aec37652
4 changed files with 109 additions and 94 deletions

View File

@@ -19,6 +19,7 @@
const std = @import("std");
const URL = @import("URL.zig");
const base64 = @import("webapi/encoding/base64.zig");
const Allocator = std.mem.Allocator;
@@ -63,81 +64,10 @@ pub fn parse(arena: Allocator, url: []const u8) !Parsed {
return .{ .content_type = content_type, .body = body };
}
fn base64Decode(arena: Allocator, input: []const u8) ![]u8 {
fn base64Decode(arena: Allocator, input: []const u8) ![]const u8 {
// Forgiving-base64 decode — https://infra.spec.whatwg.org/#forgiving-base64-decode.
// std's decoders reject non-canonical trailing bits (e.g. "ab"), which
// forgiving-base64 tolerates, so decode by hand after validating padding.
const buf = try arena.alloc(u8, input.len);
var n: usize = 0;
for (input) |c| switch (c) {
' ', '\t', '\n', '\r', std.ascii.control_code.ff => {},
else => {
buf[n] = c;
n += 1;
},
};
var src = buf[0..n];
// Only a multiple-of-4 length may carry (and shed) up to two "=" of padding.
if (src.len % 4 == 0) {
if (std.mem.endsWith(u8, src, "==")) {
src = src[0 .. src.len - 2];
} else if (std.mem.endsWith(u8, src, "=")) {
src = src[0 .. src.len - 1];
}
}
if (src.len % 4 == 1) return error.InvalidBase64;
// Any "=" still present is misplaced padding.
if (std.mem.indexOfScalar(u8, src, '=') != null) return error.InvalidBase64;
const out_len = src.len / 4 * 3 + switch (src.len % 4) {
0 => @as(usize, 0),
2 => 1,
3 => 2,
else => unreachable,
};
const out = try arena.alloc(u8, out_len);
var oi: usize = 0;
var i: usize = 0;
while (i + 4 <= src.len) : (i += 4) {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
const c = try b64Val(src[i + 2]);
const d = try b64Val(src[i + 3]);
out[oi] = (a << 2) | (b >> 4);
out[oi + 1] = (b << 4) | (c >> 2);
out[oi + 2] = (c << 6) | d;
oi += 3;
}
switch (src.len - i) {
0 => {},
2 => {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
out[oi] = (a << 2) | (b >> 4);
},
3 => {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
const c = try b64Val(src[i + 2]);
out[oi] = (a << 2) | (b >> 4);
out[oi + 1] = (b << 4) | (c >> 2);
},
else => unreachable,
}
return out;
}
fn b64Val(c: u8) !u8 {
return switch (c) {
'A'...'Z' => c - 'A',
'a'...'z' => c - 'a' + 26,
'0'...'9' => c - '0' + 52,
'+' => 62,
'/' => 63,
else => error.InvalidBase64,
};
// Shared with atob via the encoding helper; remap to this module's error name.
return base64.decode(arena, .{.raw = input}) catch return error.InvalidBase64;
}
const testing = @import("../testing.zig");

View File

@@ -652,13 +652,14 @@ pub fn postMessage(self: *Window, message: js.Value.Temp, target_origin: ?[]cons
});
}
pub fn btoa(_: *const Window, input: js.String.OneByte, frame: *Frame) ![]const u8 {
return @import("encoding/base64.zig").encode(frame.call_arena, input.bytes);
const base64 = @import("encoding/base64.zig");
pub fn btoa(_: *const Window, input: base64.BinInput, frame: *Frame) ![]const u8 {
return base64.encode(frame.call_arena, input);
}
pub fn atob(_: *const Window, input: js.String.OneByte, frame: *Frame) !js.String.OneByte {
const bytes = try @import("encoding/base64.zig").decode(frame.call_arena, input.bytes);
return .{ .bytes = bytes };
pub fn atob(_: *const Window, input: base64.BinInput, frame: *Frame) !js.String.OneByte {
const decoded = try base64.decode(frame.call_arena, input);
return .{ .bytes = decoded };
}
pub fn structuredClone(_: *const Window, value: js.Value) !js.Value {

View File

@@ -384,12 +384,13 @@ pub fn drainPendingMessages(self: *WorkerGlobalScope) void {
self._pending_messages.clearRetainingCapacity();
}
pub fn btoa(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) ![]const u8 {
return @import("encoding/base64.zig").encode(exec.call_arena, input.bytes);
const base64 = @import("encoding/base64.zig");
pub fn btoa(_: *const WorkerGlobalScope, input: base64.BinInput, exec: *JS.Execution) ![]const u8 {
return base64.encode(exec.call_arena, input);
}
pub fn atob(_: *const WorkerGlobalScope, input: JS.String.OneByte, exec: *JS.Execution) !JS.String.OneByte {
const bytes = try @import("encoding/base64.zig").decode(exec.call_arena, input.bytes);
pub fn atob(_: *const WorkerGlobalScope, input: base64.BinInput, exec: *JS.Execution) !JS.String.OneByte {
const bytes = try base64.decode(exec.call_arena, input);
return .{ .bytes = bytes };
}

View File

@@ -22,9 +22,25 @@
//! just deals in bytes.
const std = @import("std");
const js = @import("../../js/js.zig");
const Allocator = std.mem.Allocator;
pub fn encode(alloc: Allocator, input: []const u8) ![]const u8 {
pub const BinInput = union(enum) {
// order matters
js_string: js.String.OneByte,
raw: []const u8,
fn bytes(self: BinInput) []const u8 {
return switch (self) {
.js_string => |v| v.bytes,
.raw => |v| v,
};
}
};
pub fn encode(alloc: Allocator, in: BinInput) ![]const u8 {
const input = in.bytes();
const encoded_len = std.base64.standard.Encoder.calcSize(input.len);
const encoded = try alloc.alloc(u8, encoded_len);
return std.base64.standard.Encoder.encode(encoded, input);
@@ -32,17 +48,84 @@ pub fn encode(alloc: Allocator, input: []const u8) ![]const u8 {
/// Forgiving base64 decode per WHATWG spec:
/// https://infra.spec.whatwg.org/#forgiving-base64-decode
pub fn decode(alloc: Allocator, input: []const u8) ![]const u8 {
const trimmed = std.mem.trim(u8, input, &std.ascii.whitespace);
const unpadded = std.mem.trimRight(u8, trimmed, "=");
///
/// std's decoders reject non-canonical trailing bits (e.g. "ab") and only trim
/// padding from the ends, neither of which match forgiving-base64 — so decode by
/// hand: strip *all* ASCII whitespace, validate padding, tolerate trailing bits.
pub fn decode(alloc: Allocator, in: BinInput) ![]const u8 {
const input = in.bytes();
// Length % 4 == 1 is invalid (can't represent valid base64).
if (unpadded.len % 4 == 1) {
return error.InvalidCharacterError;
// Step 1: remove all ASCII whitespace (tab, LF, FF, CR, space) from anywhere.
const buf = try alloc.alloc(u8, input.len);
var n: usize = 0;
for (input) |c| switch (c) {
' ', '\t', '\n', '\r', std.ascii.control_code.ff => {},
else => {
buf[n] = c;
n += 1;
},
};
var src = buf[0..n];
// Step 2: only a multiple-of-4 length may carry (and shed) up to two "=".
if (src.len % 4 == 0) {
if (std.mem.endsWith(u8, src, "==")) {
src = src[0 .. src.len - 2];
} else if (std.mem.endsWith(u8, src, "=")) {
src = src[0 .. src.len - 1];
}
}
// Step 3: a length % 4 == 1 can't represent valid base64.
if (src.len % 4 == 1) return error.InvalidCharacterError;
// Any "=" still present is misplaced padding.
if (std.mem.indexOfScalar(u8, src, '=') != null) return error.InvalidCharacterError;
const decoded_len = std.base64.standard_no_pad.Decoder.calcSizeForSlice(unpadded) catch return error.InvalidCharacterError;
const decoded = try alloc.alloc(u8, decoded_len);
std.base64.standard_no_pad.Decoder.decode(decoded, unpadded) catch return error.InvalidCharacterError;
return decoded;
const out_len = src.len / 4 * 3 + switch (src.len % 4) {
0 => @as(usize, 0),
2 => 1,
3 => 2,
else => unreachable,
};
const out = try alloc.alloc(u8, out_len);
var oi: usize = 0;
var i: usize = 0;
while (i + 4 <= src.len) : (i += 4) {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
const c = try b64Val(src[i + 2]);
const d = try b64Val(src[i + 3]);
out[oi] = (a << 2) | (b >> 4);
out[oi + 1] = (b << 4) | (c >> 2);
out[oi + 2] = (c << 6) | d;
oi += 3;
}
switch (src.len - i) {
0 => {},
2 => {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
out[oi] = (a << 2) | (b >> 4);
},
3 => {
const a = try b64Val(src[i]);
const b = try b64Val(src[i + 1]);
const c = try b64Val(src[i + 2]);
out[oi] = (a << 2) | (b >> 4);
out[oi + 1] = (b << 4) | (c >> 2);
},
else => unreachable,
}
return out;
}
fn b64Val(c: u8) !u8 {
return switch (c) {
'A'...'Z' => c - 'A',
'a'...'z' => c - 'a' + 26,
'0'...'9' => c - '0' + 52,
'+' => 62,
'/' => 63,
else => error.InvalidCharacterError,
};
}