mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
url: add component encode set and use in tools
This commit is contained in:
@@ -269,9 +269,21 @@ pub fn ensureEncoded(allocator: Allocator, url_in: [:0]const u8, encoding: []con
|
||||
return buf.items[0 .. buf.items.len - 1 :0];
|
||||
}
|
||||
|
||||
const EncodeSet = enum { path, query, query_legacy, userinfo, fragment };
|
||||
/// Selects which RFC 3986 / WHATWG URL Standard percent-encode set to apply.
|
||||
///
|
||||
/// The `path`, `query`, `query_legacy`, `userinfo`, and `fragment` variants
|
||||
/// match the corresponding URL spec sets — they assume the input is already
|
||||
/// structured (e.g. `key=val&key=val` for `query`) and only encode characters
|
||||
/// disallowed in that location.
|
||||
///
|
||||
/// `component` is stricter: it encodes everything outside the RFC 3986
|
||||
/// unreserved set, including sub-delims (`& = + ! * ' ( ) , $ ;`). Use this
|
||||
/// when embedding an arbitrary string as a single URI component such as a
|
||||
/// query-parameter value, where reserved characters in the input would
|
||||
/// otherwise change the URL's structure.
|
||||
pub const EncodeSet = enum { path, query, query_legacy, userinfo, fragment, component };
|
||||
|
||||
fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 {
|
||||
pub fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime encode_set: EncodeSet) ![]const u8 {
|
||||
// Check if encoding is needed
|
||||
var needs_encoding = false;
|
||||
for (segment) |c| {
|
||||
@@ -290,8 +302,11 @@ fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime enco
|
||||
while (i < segment.len) : (i += 1) {
|
||||
const c = segment[i];
|
||||
|
||||
// Check if this is an already-encoded sequence (%XX)
|
||||
if (c == '%' and i + 2 < segment.len) {
|
||||
// For URL-canonicalization sets, preserve existing %XX sequences so
|
||||
// already-encoded inputs round-trip cleanly. The `component` set treats
|
||||
// input as opaque and re-encodes `%` itself, since the caller is
|
||||
// embedding raw user data and a literal '%' must not be misread.
|
||||
if (encode_set != .component and c == '%' and i + 2 < segment.len) {
|
||||
const end = i + 2;
|
||||
const h1 = segment[i + 1];
|
||||
const h2 = segment[end];
|
||||
@@ -362,13 +377,14 @@ fn shouldPercentEncode(c: u8, comptime encode_set: EncodeSet) bool {
|
||||
return switch (c) {
|
||||
// Unreserved characters (RFC 3986)
|
||||
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false,
|
||||
// sub-delims allowed in path/query but some must be encoded in userinfo/query_legacy
|
||||
'!', '$', '\'', '(', ')', '*', '+', ',' => false,
|
||||
// sub-delims allowed in path/query but some must be encoded in userinfo/query_legacy/component
|
||||
'!', '$', '\'', '(', ')', '*', '+', ',' => encode_set == .component,
|
||||
// '&' and ';' must be encoded for legacy encoding (to preserve NCRs like &#nnnnn;)
|
||||
'&', ';' => encode_set == .userinfo or encode_set == .query_legacy,
|
||||
'=' => encode_set == .userinfo,
|
||||
// Separators: userinfo must encode these
|
||||
'/', ':', '@' => encode_set == .userinfo,
|
||||
// and for component encoding (so a value can't break out into a new param)
|
||||
'&', ';' => encode_set == .userinfo or encode_set == .query_legacy or encode_set == .component,
|
||||
'=' => encode_set == .userinfo or encode_set == .component,
|
||||
// Separators: userinfo and component must encode these
|
||||
'/', ':', '@' => encode_set == .userinfo or encode_set == .component,
|
||||
// '?' is allowed in queries only
|
||||
'?' => encode_set != .query and encode_set != .query_legacy,
|
||||
// '#' is allowed in fragments only
|
||||
@@ -1210,6 +1226,37 @@ test "URL: ensureEncoded" {
|
||||
}
|
||||
}
|
||||
|
||||
test "URL: percentEncodeSegment component passes unreserved chars through" {
|
||||
defer testing.reset();
|
||||
const r = try percentEncodeSegment(testing.arena_allocator, "abcXYZ012-._~", .component);
|
||||
try testing.expectString("abcXYZ012-._~", r);
|
||||
}
|
||||
|
||||
test "URL: percentEncodeSegment component encodes spaces, sub-delims and reserved chars" {
|
||||
defer testing.reset();
|
||||
const r = try percentEncodeSegment(testing.arena_allocator, "hello world&q=1", .component);
|
||||
try testing.expectString("hello%20world%26q%3D1", r);
|
||||
|
||||
const r2 = try percentEncodeSegment(testing.arena_allocator, "a+b!c*d", .component);
|
||||
try testing.expectString("a%2Bb%21c%2Ad", r2);
|
||||
}
|
||||
|
||||
test "URL: percentEncodeSegment component encodes UTF-8 bytes" {
|
||||
defer testing.reset();
|
||||
const r = try percentEncodeSegment(testing.arena_allocator, "café", .component);
|
||||
try testing.expectString("caf%C3%A9", r);
|
||||
}
|
||||
|
||||
test "URL: percentEncodeSegment component re-encodes literal '%' in raw input" {
|
||||
defer testing.reset();
|
||||
const r = try percentEncodeSegment(testing.arena_allocator, "100%", .component);
|
||||
try testing.expectString("100%25", r);
|
||||
|
||||
// Even when followed by hex digits, treat as opaque user data.
|
||||
const r2 = try percentEncodeSegment(testing.arena_allocator, "100%2A", .component);
|
||||
try testing.expectString("100%252A", r2);
|
||||
}
|
||||
|
||||
test "URL: resolve with encoding" {
|
||||
defer testing.reset();
|
||||
|
||||
|
||||
@@ -417,7 +417,7 @@ fn execSearch(session: *lp.Session, arena: std.mem.Allocator, registry: *CDPNode
|
||||
const args = try parseArgsOrErr(SearchParams, arena, arguments) orelse return ToolError.InvalidParams;
|
||||
if (args.query.len == 0) return ToolError.InvalidParams;
|
||||
|
||||
const encoded = percentEncodeQuery(arena, args.query) catch return ToolError.OutOfMemory;
|
||||
const encoded = lp.URL.percentEncodeSegment(arena, args.query, .component) catch return ToolError.OutOfMemory;
|
||||
const google_url = std.fmt.allocPrintSentinel(
|
||||
arena,
|
||||
"https://www.google.com/search?q={s}&hl=en&gl=us",
|
||||
@@ -459,21 +459,6 @@ fn renderFrameMarkdown(arena: std.mem.Allocator, frame: *lp.Frame) ToolError![]c
|
||||
return aw.written();
|
||||
}
|
||||
|
||||
fn percentEncodeQuery(arena: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
|
||||
var out: std.ArrayList(u8) = .empty;
|
||||
for (input) |c| {
|
||||
switch (c) {
|
||||
'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => try out.append(arena, c),
|
||||
else => {
|
||||
var hex: [3]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&hex, "%{X:0>2}", .{c}) catch unreachable;
|
||||
try out.appendSlice(arena, &hex);
|
||||
},
|
||||
}
|
||||
}
|
||||
return out.toOwnedSlice(arena);
|
||||
}
|
||||
|
||||
fn execMarkdown(session: *lp.Session, arena: std.mem.Allocator, registry: *CDPNode.Registry, arguments: ?std.json.Value) ToolError![]const u8 {
|
||||
const args = try parseArgsOrDefault(UrlParams, arena, arguments);
|
||||
const page = try ensurePage(session, registry, args.url, args.timeout, args.waitUntil);
|
||||
@@ -1014,31 +999,6 @@ test "substituteEnvVars missing var kept literal" {
|
||||
try std.testing.expectEqualStrings("$UNLIKELY_VAR_12345", r);
|
||||
}
|
||||
|
||||
test "percentEncodeQuery passes unreserved chars through" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const r = try percentEncodeQuery(arena.allocator(), "abcXYZ012-._~");
|
||||
try std.testing.expectEqualStrings("abcXYZ012-._~", r);
|
||||
}
|
||||
|
||||
test "percentEncodeQuery encodes spaces and reserved chars" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const r = try percentEncodeQuery(arena.allocator(), "hello world&q=1");
|
||||
try std.testing.expectEqualStrings("hello%20world%26q%3D1", r);
|
||||
}
|
||||
|
||||
test "percentEncodeQuery encodes UTF-8 bytes" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
// "café" → c, a, f, then 0xC3 0xA9 for é
|
||||
const r = try percentEncodeQuery(arena.allocator(), "café");
|
||||
try std.testing.expectEqualStrings("caf%C3%A9", r);
|
||||
}
|
||||
|
||||
test "substituteEnvVars bare dollar" {
|
||||
var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
Reference in New Issue
Block a user