From bc4fcdf8b9d114b9b0184346f88ff1a7de30b632 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Mon, 27 Apr 2026 08:33:05 +0200 Subject: [PATCH 1/2] selector: decode CSS escape sequences inside quoted attribute values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Parser.attributeValue` walked quoted attribute values with `indexOfScalarPos` to locate the closing quote and returned the raw byte slice between the quotes — backslash escapes were neither honored as escape boundaries nor decoded. As a result, `[data-x="abc\\def"]` matched against the literal 8-byte string `abc\\def` instead of the 7-byte string `abc\def` the author intended, and `[data-x="foo\"bar"]` truncated the value at the escaped inner quote. Walk the string char-by-char respecting backslash escapes per CSS Syntax Level 3 §4.3.5 (consume-string-token), reusing the existing `parseEscape` helper that already powers `parseIdentifier` for `#id` / `.class` selectors. Decode `\\`, `\"`/`\'`, and `\` (1–6 hex digits with optional whitespace terminator), drop `\` line continuations, and surface bare newlines / trailing backslashes as `InvalidAttributeSelector`. Closes #2268 --- .../element/attribute_value_escapes.html | 31 ++++ src/browser/webapi/selector/Parser.zig | 136 +++++++++++++++++- 2 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 src/browser/tests/element/attribute_value_escapes.html diff --git a/src/browser/tests/element/attribute_value_escapes.html b/src/browser/tests/element/attribute_value_escapes.html new file mode 100644 index 00000000..8ccefbd9 --- /dev/null +++ b/src/browser/tests/element/attribute_value_escapes.html @@ -0,0 +1,31 @@ + + + +

backslash

+

embedded quote

+ + diff --git a/src/browser/webapi/selector/Parser.zig b/src/browser/webapi/selector/Parser.zig index 947b0211..c24144c9 100644 --- a/src/browser/webapi/selector/Parser.zig +++ b/src/browser/webapi/selector/Parser.zig @@ -921,7 +921,7 @@ fn attribute(self: *Parser, arena: Allocator) !Selector.Attribute { const matcher_type = try self.attributeMatcher(); _ = self.skipSpaces(); - const value_raw = try self.attributeValue(); + const value_raw = try self.attributeValue(arena); const value = try arena.dupe(u8, value_raw); _ = self.skipSpaces(); @@ -1002,7 +1002,7 @@ fn attributeMatcher(self: *Parser) !std.meta.FieldEnum(Selector.AttributeMatcher }; } -fn attributeValue(self: *Parser) ![]const u8 { +fn attributeValue(self: *Parser, arena: Allocator) ![]const u8 { const input = self.input; if (input.len == 0) { return error.InvalidAttributeSelector; @@ -1010,10 +1010,41 @@ fn attributeValue(self: *Parser) ![]const u8 { const quote = input[0]; if (quote == '"' or quote == '\'') { - const end = std.mem.indexOfScalarPos(u8, input, 1, quote) orelse return error.InvalidAttributeSelector; - const value = input[1..end]; - self.input = input[end + 1 ..]; - return value; + // Walk the string respecting backslash escapes per CSS Syntax Level 3 §4.3.5. + // Decode \\ \" \' and \, treat \ as a line continuation, + // and stop at the matching unescaped closing quote. + // https://drafts.csswg.org/css-syntax/#consume-string-token + var result: std.ArrayList(u8) = .empty; + var i: usize = 1; + while (i < input.len and input[i] != quote) { + const b = input[i]; + if (b == '\\') { + if (i + 1 >= input.len) { + // Backslash at EOF inside a string is a parse error per spec; + // surface it as a missing closing quote. + return error.InvalidAttributeSelector; + } + const after = input[i + 1]; + if (after == '\n') { + // Escaped newline inside a string is a line continuation: drop both. + i += 2; + continue; + } + const escape = try parseEscape(input[i + 1 ..], arena); + try result.appendSlice(arena, escape.bytes); + i += 1 + escape.consumed; + continue; + } + if (b == '\n') { + // Bare newline terminates a string token (parse error). + return error.InvalidAttributeSelector; + } + try result.append(arena, b); + i += 1; + } + if (i >= input.len) return error.InvalidAttributeSelector; + self.input = input[i + 1 ..]; + return result.items; } var i: usize = 0; @@ -1546,3 +1577,96 @@ test "Selector: Parser.parseNthPattern" { try testing.expectEqual(" )", parser.input); } } + +test "Selector: Parser.attributeValue" { + defer testing.reset(); + const arena = testing.arena_allocator; + + // Unquoted identifier value (unchanged path). + { + var parser = Parser{ .input = "abc]" }; + try testing.expectEqual("abc", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Plain double-quoted value with no escapes. + { + var parser = Parser{ .input = "\"abc\"]" }; + try testing.expectEqual("abc", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Plain single-quoted value with no escapes. + { + var parser = Parser{ .input = "'abc']" }; + try testing.expectEqual("abc", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Escaped backslash inside a double-quoted value: "abc\\def" -> abc\def. + { + var parser = Parser{ .input = "\"abc\\\\def\"]" }; + try testing.expectEqual("abc\\def", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Escaped quote inside a double-quoted value: "foo\"bar" -> foo"bar. + { + var parser = Parser{ .input = "\"foo\\\"bar\"]" }; + try testing.expectEqual("foo\"bar", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Escaped single quote inside a single-quoted value: 'foo\'bar' -> foo'bar. + { + var parser = Parser{ .input = "'foo\\'bar']" }; + try testing.expectEqual("foo'bar", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Hex escape with explicit space terminator: "\41 B" -> "AB" (space is consumed). + { + var parser = Parser{ .input = "\"\\41 B\"]" }; + try testing.expectEqual("AB", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Hex escape consumes up to 6 hex digits with no delimiter: "\41B" -> "ƛ" (U+041B). + { + var parser = Parser{ .input = "\"\\41B\"]" }; + try testing.expectEqual("\u{041B}", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Hex escape decoding to a multi-byte UTF-8 sequence: "\1F3A8" -> "🎨". + { + var parser = Parser{ .input = "\"\\1F3A8\"]" }; + try testing.expectEqual("🎨", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Escaped newline inside a string is a line continuation (drops the newline). + { + var parser = Parser{ .input = "\"foo\\\nbar\"]" }; + try testing.expectEqual("foobar", try parser.attributeValue(arena)); + try testing.expectEqual("]", parser.input); + } + + // Missing closing quote. + { + var parser = Parser{ .input = "\"abc" }; + try testing.expectError(error.InvalidAttributeSelector, parser.attributeValue(arena)); + } + + // Unescaped newline inside a string terminates with a parse error. + { + var parser = Parser{ .input = "\"abc\ndef\"]" }; + try testing.expectError(error.InvalidAttributeSelector, parser.attributeValue(arena)); + } + + // Trailing backslash before EOF is a parse error. + { + var parser = Parser{ .input = "\"abc\\" }; + try testing.expectError(error.InvalidAttributeSelector, parser.attributeValue(arena)); + } +} From a5e5639a14ec22170c09627b9cf1a62d8f897bac Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Mon, 27 Apr 2026 21:41:13 +0200 Subject: [PATCH 2/2] selector: own attributeValue result in-function, avoid double dupe Per review feedback. attributeValue now guarantees an arena-owned return: the quoted path already allocates into the arena via the ArrayList, so we just dupe in the unquoted path and drop the redundant arena.dupe at the call site. Pre-size the ArrayList to input.len since escapes only shrink, skipping grow-from-zero reallocs. --- src/browser/webapi/selector/Parser.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/browser/webapi/selector/Parser.zig b/src/browser/webapi/selector/Parser.zig index c24144c9..d327e65f 100644 --- a/src/browser/webapi/selector/Parser.zig +++ b/src/browser/webapi/selector/Parser.zig @@ -921,8 +921,7 @@ fn attribute(self: *Parser, arena: Allocator) !Selector.Attribute { const matcher_type = try self.attributeMatcher(); _ = self.skipSpaces(); - const value_raw = try self.attributeValue(arena); - const value = try arena.dupe(u8, value_raw); + const value = try self.attributeValue(arena); _ = self.skipSpaces(); // Parse optional case-sensitivity flag @@ -1014,7 +1013,7 @@ fn attributeValue(self: *Parser, arena: Allocator) ![]const u8 { // Decode \\ \" \' and \, treat \ as a line continuation, // and stop at the matching unescaped closing quote. // https://drafts.csswg.org/css-syntax/#consume-string-token - var result: std.ArrayList(u8) = .empty; + var result = try std.ArrayList(u8).initCapacity(arena, input.len); var i: usize = 1; while (i < input.len and input[i] != quote) { const b = input[i]; @@ -1063,7 +1062,7 @@ fn attributeValue(self: *Parser, arena: Allocator) ![]const u8 { const value = input[0..i]; self.input = input[i..]; - return value; + return arena.dupe(u8, value); } fn asUint(comptime string: anytype) std.meta.Int(