mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
Merge pull request #2308 from navidemad/fix-a26-textarea-crlf
forms: normalize CR/LF to CRLF in form-data set encoding
This commit is contained in:
@@ -259,7 +259,12 @@ fn urlEncodeValueUtf8(value: []const u8, comptime mode: URLEncodeMode, writer: *
|
||||
return writer.writeAll(value);
|
||||
}
|
||||
|
||||
for (value) |b| {
|
||||
var i: usize = 0;
|
||||
while (i < value.len) : (i += 1) {
|
||||
const b = value[i];
|
||||
if (comptime mode == .form) {
|
||||
if (try writeFormLineEnd(value, &i, b, writer)) continue;
|
||||
}
|
||||
if (urlEncodeUnreserved(b, mode)) {
|
||||
try writer.writeByte(b);
|
||||
} else if (b == ' ') {
|
||||
@@ -272,7 +277,12 @@ fn urlEncodeValueUtf8(value: []const u8, comptime mode: URLEncodeMode, writer: *
|
||||
|
||||
/// Percent-encode a legacy-encoded value - must also encode & and ; to preserve NCRs.
|
||||
fn urlEncodeValueLegacy(value: []const u8, comptime mode: URLEncodeMode, writer: *std.Io.Writer) !void {
|
||||
for (value) |b| {
|
||||
var i: usize = 0;
|
||||
while (i < value.len) : (i += 1) {
|
||||
const b = value[i];
|
||||
if (comptime mode == .form) {
|
||||
if (try writeFormLineEnd(value, &i, b, writer)) continue;
|
||||
}
|
||||
if (urlEncodeUnreserved(b, mode)) {
|
||||
try writer.writeByte(b);
|
||||
} else if (b == ' ') {
|
||||
@@ -286,6 +296,25 @@ fn urlEncodeValueLegacy(value: []const u8, comptime mode: URLEncodeMode, writer:
|
||||
}
|
||||
}
|
||||
|
||||
// HTML form-data set encoding algorithm: every U+000D (CR) not followed by
|
||||
// U+000A (LF), and every U+000A (LF) not preceded by U+000D (CR), is replaced
|
||||
// with the two-byte sequence CR+LF before percent-encoding. Returns true (and
|
||||
// emits "%0D%0A") when `b` is CR or LF; on CR, advances the caller's index
|
||||
// past a following LF so existing CRLF pairs aren't doubled.
|
||||
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#url-encoded-form-data
|
||||
fn writeFormLineEnd(value: []const u8, i: *usize, b: u8, writer: *std.Io.Writer) !bool {
|
||||
if (b == '\r') {
|
||||
try writer.writeAll("%0D%0A");
|
||||
if (i.* + 1 < value.len and value[i.* + 1] == '\n') i.* += 1;
|
||||
return true;
|
||||
}
|
||||
if (b == '\n') {
|
||||
try writer.writeAll("%0D%0A");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn urlEncodeShouldEscape(value: []const u8, comptime mode: URLEncodeMode) bool {
|
||||
for (value) |b| {
|
||||
if (!urlEncodeUnreserved(b, mode)) {
|
||||
@@ -409,3 +438,90 @@ test "KeyValueList: urlEncode Big5 unmappable character" {
|
||||
// 炣 percent-encoded is %26%2328835%3B
|
||||
try testing.expectString("q=%26%2328835%3B", buf.written());
|
||||
}
|
||||
|
||||
// HTML form-data set encoding algorithm: line endings in entry names and values
|
||||
// are normalized to CRLF — every stray LF (not preceded by CR) and every stray
|
||||
// CR (not followed by LF) is replaced with CR+LF before percent-encoding. The
|
||||
// normalization applies only to .form mode; URLSearchParams (.query) follows
|
||||
// the URL standard's serializer, which doesn't normalize.
|
||||
// https://html.spec.whatwg.org/multipage/form-control-infrastructure.html#url-encoded-form-data
|
||||
test "KeyValueList: urlEncode .form normalizes stray LF to CRLF" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "msg", "line1\nline2\nline3");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=line1%0D%0Aline2%0D%0Aline3", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .form normalizes stray CR to CRLF" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "msg", "line1\rline2");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=line1%0D%0Aline2", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .form preserves existing CRLF" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "msg", "line1\r\nline2");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=line1%0D%0Aline2", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .form handles mixed line endings" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
// CR LF, then bare LF, then bare CR -> three CRLF sequences.
|
||||
try list.append(allocator, "msg", "a\r\nb\nc\rd");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=a%0D%0Ab%0D%0Ac%0D%0Ad", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .form normalizes line endings in entry names" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "n\nm", "v");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("n%0D%0Am=v", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .form normalizes legacy charsets too" {
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "msg", "a\nb");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.form, allocator, "GBK", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=a%0D%0Ab", buf.written());
|
||||
}
|
||||
|
||||
test "KeyValueList: urlEncode .query does NOT normalize line endings" {
|
||||
// URL standard's application/x-www-form-urlencoded serializer (used by
|
||||
// URLSearchParams) does not perform CRLF normalization — only the HTML
|
||||
// form-data set encoding wrapper does. https://url.spec.whatwg.org/#concept-urlencoded-serializer
|
||||
const allocator = testing.arena_allocator;
|
||||
var list = KeyValueList.init();
|
||||
try list.append(allocator, "msg", "a\nb\rc");
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(allocator);
|
||||
try list.urlEncode(.query, null, "UTF-8", &buf.writer);
|
||||
|
||||
try testing.expectString("msg=a%0Ab%0Dc", buf.written());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user