add internal requests and use for RobotsLayer

This commit is contained in:
Muki Kiboigo
2026-04-29 14:23:05 -07:00
parent 7d4342846f
commit e9599d87ec
3 changed files with 46 additions and 32 deletions

View File

@@ -199,12 +199,12 @@ pub fn init(allocator: Allocator, network: *Network) !*Client {
var next = client.layer();
next = layerWith(&client.interception_layer, next);
if (network.config.obeyRobots()) {
next = layerWith(&client.robots_layer, next);
}
next = layerWith(&client.interception_layer, next);
if (network.config.httpCacheDir() != null) {
next = layerWith(&client.cache_layer, next);
}
@@ -404,10 +404,12 @@ pub fn request(self: *Client, in_req: Request) !void {
const arena = try self.network.app.arena_pool.acquire(.small, "Request.arena");
req.params.arena = arena;
req.params.notification.dispatch(
.http_request_start,
&.{ .request = &req },
);
if (!req.params.internal) {
req.params.notification.dispatch(
.http_request_start,
&.{ .request = &req },
);
}
return self.entry_layer.request(self, req) catch |err| {
req.error_callback(req.ctx, err);
@@ -882,6 +884,7 @@ pub const RequestParams = struct {
credentials: ?[:0]const u8 = null,
notification: *Notification,
timeout_ms: u32 = 0,
internal: bool = false,
const ResourceType = enum {
document,

View File

@@ -54,6 +54,10 @@ pub fn layer(self: *InterceptionLayer) Layer {
fn request(ptr: *anyopaque, client: *Client, in_req: Request) anyerror!void {
const self: *InterceptionLayer = @ptrCast(@alignCast(ptr));
if (in_req.params.internal) {
return try self.next.request(client, in_req);
}
const intercept_ctx = try in_req.params.arena.create(InterceptContext);
intercept_ctx.* = .{
.client = client,

View File

@@ -57,6 +57,10 @@ fn request(ptr: *anyopaque, client: *Client, req: Request) anyerror!void {
const arena = req.params.arena;
const robots_url = try URL.getRobotsUrl(arena, req.params.url);
if (req.params.internal) {
return self.next.request(client, req);
}
if (client.network.robot_store.get(robots_url)) |robot_entry| {
switch (robot_entry) {
.present => |robots| {
@@ -104,28 +108,33 @@ fn fetchRobotsThenRequest(
const headers = try client.newHeaders();
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
try self.next.request(client, .{
.ctx = robots_ctx,
.params = .{
// We have to do this ourselves because we are not going through the top level `request`.
.arena = new_arena,
.request_id = client.incrReqId(),
.url = robots_url,
.method = .GET,
.headers = headers,
.frame_id = req.params.frame_id,
.loader_id = req.params.loader_id,
.cookie_jar = req.params.cookie_jar,
.cookie_origin = req.params.cookie_origin,
.notification = req.params.notification,
.resource_type = .fetch,
try self.next.request(
client,
.{
.ctx = robots_ctx,
.params = .{
// We have to do these ourselves since we don't go through request.
.arena = new_arena,
.request_id = client.nextReqId(),
.url = robots_url,
.method = .GET,
.headers = headers,
.frame_id = req.params.frame_id,
.loader_id = req.params.loader_id,
.cookie_jar = req.params.cookie_jar,
.cookie_origin = req.params.cookie_origin,
.notification = req.params.notification,
.resource_type = .fetch,
.internal = true,
},
.header_callback = RobotsContext.headerCallback,
.data_callback = RobotsContext.dataCallback,
.done_callback = RobotsContext.doneCallback,
.error_callback = RobotsContext.errorCallback,
.shutdown_callback = RobotsContext.shutdownCallback,
},
.header_callback = RobotsContext.headerCallback,
.data_callback = RobotsContext.dataCallback,
.done_callback = RobotsContext.doneCallback,
.error_callback = RobotsContext.errorCallback,
.shutdown_callback = RobotsContext.shutdownCallback,
});
);
}
try entry.value_ptr.append(self.allocator, req);
@@ -169,11 +178,6 @@ const RobotsContext = struct {
buffer: std.ArrayListUnmanaged(u8),
status: u16 = 0,
fn deinit(self: *RobotsContext) void {
self.buffer.deinit(self.layer.allocator);
self.layer.allocator.destroy(self);
}
fn headerCallback(response: Response) anyerror!bool {
const self: *RobotsContext = @ptrCast(@alignCast(response.ctx));
switch (response.inner) {
@@ -198,6 +202,7 @@ const RobotsContext = struct {
fn doneCallback(ctx_ptr: *anyopaque) anyerror!void {
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
const l = self.layer;
const client = self.client;
const robots_url = self.robots_url;
@@ -241,6 +246,7 @@ const RobotsContext = struct {
fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
const l = self.layer;
const client = self.client;
const robots_url = self.robots_url;
@@ -251,6 +257,7 @@ const RobotsContext = struct {
fn shutdownCallback(ctx_ptr: *anyopaque) void {
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
const l = self.layer;
const client = self.client;
const robots_url = self.robots_url;