mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
use layers for Cache, Robots and WebBotAuth
This commit is contained in:
@@ -27,7 +27,6 @@ const CookieJar = @import("webapi/storage/Cookie.zig").Jar;
|
||||
const http = @import("../network/http.zig");
|
||||
const Network = @import("../network/Network.zig");
|
||||
const Robots = @import("../network/Robots.zig");
|
||||
const Cache = @import("../network/cache/Cache.zig");
|
||||
const timestamp = @import("../datetime.zig").timestamp;
|
||||
|
||||
const log = lp.log;
|
||||
@@ -40,8 +39,11 @@ pub const Method = http.Method;
|
||||
pub const Headers = http.Headers;
|
||||
pub const ResponseHead = http.ResponseHead;
|
||||
pub const HeaderIterator = http.HeaderIterator;
|
||||
const CacheMetadata = Cache.CachedMetadata;
|
||||
const CachedResponse = Cache.CachedResponse;
|
||||
const CachedResponse = @import("../network/cache/Cache.zig").CachedResponse;
|
||||
|
||||
pub const CacheLayer = @import("../network/layer/CacheLayer.zig");
|
||||
pub const RobotsLayer = @import("../network/layer/RobotsLayer.zig");
|
||||
pub const WebBotAuthLayer = @import("../network/layer/WebBotAuthLayer.zig");
|
||||
|
||||
// This is loosely tied to a browser Page. Loading all the <scripts>, doing
|
||||
// XHR requests, and loading imports all happens through here. Sine the app
|
||||
@@ -101,10 +103,6 @@ allocator: Allocator,
|
||||
|
||||
network: *Network,
|
||||
|
||||
// Queue of requests that depend on a robots.txt.
|
||||
// Allows us to fetch the robots.txt just once.
|
||||
pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empty,
|
||||
|
||||
// Once we have a handle/easy to process a request with, we create a Transfer
|
||||
// which contains the Request as well as any state we need to process the
|
||||
// request. These will come and go with each request.
|
||||
@@ -134,6 +132,37 @@ cdp_client: ?CDPClient = null,
|
||||
|
||||
max_response_size: usize,
|
||||
|
||||
cache_layer: CacheLayer,
|
||||
robots_layer: RobotsLayer,
|
||||
web_bot_auth_layer: WebBotAuthLayer,
|
||||
entry_layer: Layer,
|
||||
|
||||
pub const Context = struct {
|
||||
network: *Network,
|
||||
|
||||
pub fn newHeaders(self: Context) !http.Headers {
|
||||
return http.Headers.init(self.network.config.http_headers.user_agent_header);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Layer = struct {
|
||||
ptr: *anyopaque,
|
||||
vtable: *const VTable,
|
||||
|
||||
pub const VTable = struct {
|
||||
request: *const fn (*anyopaque, Context, Request) anyerror!void,
|
||||
};
|
||||
|
||||
pub fn request(self: Layer, ctx: Context, req: Request) !void {
|
||||
return self.vtable.request(self.ptr, ctx, req);
|
||||
}
|
||||
};
|
||||
|
||||
fn layerWith(self: anytype, next: Layer) Layer {
|
||||
self.next = next;
|
||||
return self.layer();
|
||||
}
|
||||
|
||||
// libcurl can monitor arbitrary sockets, this lets us use libcurl to poll
|
||||
// both HTTP data as well as messages from an CDP connection.
|
||||
// Furthermore, we have some tension between blocking scripts and request
|
||||
@@ -175,8 +204,29 @@ pub fn init(allocator: Allocator, network: *Network) !*Client {
|
||||
.tls_verify = network.config.tlsVerifyHost(),
|
||||
.obey_robots = network.config.obeyRobots(),
|
||||
.max_response_size = network.config.httpMaxResponseSize() orelse std.math.maxInt(u32),
|
||||
|
||||
.cache_layer = .{},
|
||||
.robots_layer = .{ .allocator = allocator },
|
||||
.web_bot_auth_layer = .{},
|
||||
.entry_layer = undefined,
|
||||
};
|
||||
|
||||
var next = client.layer();
|
||||
|
||||
if (network.config.webBotAuth() != null) {
|
||||
next = layerWith(&client.web_bot_auth_layer, next);
|
||||
}
|
||||
|
||||
if (network.config.obeyRobots()) {
|
||||
next = layerWith(&client.robots_layer, next);
|
||||
}
|
||||
|
||||
if (network.config.httpCacheDir() != null) {
|
||||
next = layerWith(&client.cache_layer, next);
|
||||
}
|
||||
|
||||
client.entry_layer = next;
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
@@ -185,17 +235,20 @@ pub fn deinit(self: *Client) void {
|
||||
self.handles.deinit();
|
||||
|
||||
self.transfer_pool.deinit();
|
||||
|
||||
var robots_iter = self.pending_robots_queue.iterator();
|
||||
while (robots_iter.next()) |entry| {
|
||||
entry.value_ptr.deinit(self.allocator);
|
||||
}
|
||||
self.pending_robots_queue.deinit(self.allocator);
|
||||
|
||||
self.clearUserAgentOverride();
|
||||
|
||||
self.robots_layer.deinit(self.allocator);
|
||||
|
||||
self.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn layer(self: *Client) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{ .request = _request },
|
||||
};
|
||||
}
|
||||
|
||||
// Set a user agent override. Both the raw UA string and the pre-formatted
|
||||
// "User-Agent: <ua>" header string are allocated from self.allocator.
|
||||
pub fn setUserAgentOverride(self: *Client, ua: []const u8) !void {
|
||||
@@ -350,102 +403,12 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus {
|
||||
}
|
||||
|
||||
pub fn request(self: *Client, req: Request) !void {
|
||||
if (self.obey_robots == false) {
|
||||
return self.processRequest(req);
|
||||
}
|
||||
|
||||
const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
|
||||
errdefer self.allocator.free(robots_url);
|
||||
|
||||
// If we have this robots cached, we can take a fast path.
|
||||
if (self.network.robot_store.get(robots_url)) |robot_entry| {
|
||||
defer self.allocator.free(robots_url);
|
||||
|
||||
switch (robot_entry) {
|
||||
// If we have a found robots entry, we check it.
|
||||
.present => |robots| {
|
||||
const path = URL.getPathname(req.url);
|
||||
if (!robots.isAllowed(path)) {
|
||||
req.error_callback(req.ctx, error.RobotsBlocked);
|
||||
return;
|
||||
}
|
||||
},
|
||||
// Otherwise, we assume we won't find it again.
|
||||
.absent => {},
|
||||
}
|
||||
|
||||
return self.processRequest(req);
|
||||
}
|
||||
return self.fetchRobotsThenProcessRequest(robots_url, req);
|
||||
const ctx = Context{ .network = self.network };
|
||||
return self.entry_layer.request(ctx, req);
|
||||
}
|
||||
|
||||
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
|
||||
const response = Response.fromCached(req.ctx, cached);
|
||||
defer switch (cached.data) {
|
||||
.buffer => |_| {},
|
||||
.file => |f| f.file.close(),
|
||||
};
|
||||
|
||||
if (req.start_callback) |cb| {
|
||||
try cb(response);
|
||||
}
|
||||
|
||||
const proceed = try req.header_callback(response);
|
||||
if (!proceed) {
|
||||
req.error_callback(req.ctx, error.Abort);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (cached.data) {
|
||||
.buffer => |data| {
|
||||
if (data.len > 0) {
|
||||
try req.data_callback(response, data);
|
||||
}
|
||||
},
|
||||
.file => |f| {
|
||||
const file = f.file;
|
||||
|
||||
var buf: [1024]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
const reader = &file_reader.interface;
|
||||
|
||||
var read_buf: [1024]u8 = undefined;
|
||||
var remaining = f.len;
|
||||
|
||||
while (remaining > 0) {
|
||||
const read_len = @min(read_buf.len, remaining);
|
||||
const n = try reader.readSliceShort(read_buf[0..read_len]);
|
||||
if (n == 0) break;
|
||||
remaining -= n;
|
||||
try req.data_callback(response, read_buf[0..n]);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
try req.done_callback(req.ctx);
|
||||
}
|
||||
|
||||
fn processRequest(self: *Client, req: Request) !void {
|
||||
if (self.network.cache) |*cache| {
|
||||
if (req.method == .GET) {
|
||||
// cache is only used to read the meta data
|
||||
const arena = try self.network.app.arena_pool.acquire(.small, "HttpClient.cache");
|
||||
defer self.network.app.arena_pool.release(arena);
|
||||
|
||||
var iter = req.headers.iterator();
|
||||
const req_header_list = try iter.collect(arena);
|
||||
|
||||
if (cache.get(arena, .{
|
||||
.url = req.url,
|
||||
.timestamp = std.time.timestamp(),
|
||||
.request_headers = req_header_list.items,
|
||||
})) |cached| {
|
||||
defer req.headers.deinit();
|
||||
return serveFromCache(req, &cached);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn _request(ptr: *anyopaque, _: Context, req: Request) !void {
|
||||
const self: *Client = @ptrCast(@alignCast(ptr));
|
||||
|
||||
const transfer = try self.makeTransfer(req);
|
||||
|
||||
@@ -479,176 +442,6 @@ fn processRequest(self: *Client, req: Request) !void {
|
||||
}
|
||||
}
|
||||
|
||||
const RobotsRequestContext = struct {
|
||||
client: *Client,
|
||||
req: Request,
|
||||
robots_url: [:0]const u8,
|
||||
buffer: std.ArrayList(u8),
|
||||
status: u16 = 0,
|
||||
|
||||
pub fn deinit(self: *RobotsRequestContext) void {
|
||||
self.client.allocator.free(self.robots_url);
|
||||
self.buffer.deinit(self.client.allocator);
|
||||
self.client.allocator.destroy(self);
|
||||
}
|
||||
};
|
||||
|
||||
fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: Request) !void {
|
||||
const entry = try self.pending_robots_queue.getOrPut(self.allocator, robots_url);
|
||||
|
||||
if (!entry.found_existing) {
|
||||
errdefer self.allocator.free(robots_url);
|
||||
|
||||
// If we aren't already fetching this robots,
|
||||
// we want to create a new queue for it and add this request into it.
|
||||
entry.value_ptr.* = .empty;
|
||||
|
||||
const ctx = try self.allocator.create(RobotsRequestContext);
|
||||
errdefer self.allocator.destroy(ctx);
|
||||
ctx.* = .{ .client = self, .req = req, .robots_url = robots_url, .buffer = .empty };
|
||||
const headers = try self.newHeaders();
|
||||
|
||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||
try self.processRequest(.{
|
||||
.ctx = ctx,
|
||||
.url = robots_url,
|
||||
.method = .GET,
|
||||
.headers = headers,
|
||||
.blocking = false,
|
||||
.frame_id = req.frame_id,
|
||||
.loader_id = req.loader_id,
|
||||
.cookie_jar = req.cookie_jar,
|
||||
.cookie_origin = req.cookie_origin,
|
||||
.notification = req.notification,
|
||||
.resource_type = .fetch,
|
||||
.header_callback = robotsHeaderCallback,
|
||||
.data_callback = robotsDataCallback,
|
||||
.done_callback = robotsDoneCallback,
|
||||
.error_callback = robotsErrorCallback,
|
||||
.shutdown_callback = robotsShutdownCallback,
|
||||
});
|
||||
} else {
|
||||
// Not using our own robots URL, only using the one from the first request.
|
||||
self.allocator.free(robots_url);
|
||||
}
|
||||
|
||||
try entry.value_ptr.append(self.allocator, req);
|
||||
}
|
||||
|
||||
fn robotsHeaderCallback(response: Response) !bool {
|
||||
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
|
||||
// Robots callbacks only happen on real live requests.
|
||||
const transfer = response.inner.transfer;
|
||||
|
||||
if (transfer.response_header) |hdr| {
|
||||
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url });
|
||||
ctx.status = hdr.status;
|
||||
}
|
||||
|
||||
if (transfer.getContentLength()) |cl| {
|
||||
try ctx.buffer.ensureTotalCapacity(ctx.client.allocator, cl);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn robotsDataCallback(response: Response, data: []const u8) !void {
|
||||
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx));
|
||||
try ctx.buffer.appendSlice(ctx.client.allocator, data);
|
||||
}
|
||||
|
||||
fn robotsDoneCallback(ctx_ptr: *anyopaque) !void {
|
||||
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
defer ctx.deinit();
|
||||
|
||||
var allowed = true;
|
||||
|
||||
switch (ctx.status) {
|
||||
200 => {
|
||||
if (ctx.buffer.items.len > 0) {
|
||||
const robots: ?Robots = ctx.client.network.robot_store.robotsFromBytes(
|
||||
ctx.client.getUserAgent(),
|
||||
ctx.buffer.items,
|
||||
) catch blk: {
|
||||
log.warn(.browser, "failed to parse robots", .{ .robots_url = ctx.robots_url });
|
||||
// If we fail to parse, we just insert it as absent and ignore.
|
||||
try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
|
||||
break :blk null;
|
||||
};
|
||||
|
||||
if (robots) |r| {
|
||||
try ctx.client.network.robot_store.put(ctx.robots_url, r);
|
||||
const path = URL.getPathname(ctx.req.url);
|
||||
allowed = r.isAllowed(path);
|
||||
}
|
||||
}
|
||||
},
|
||||
404 => {
|
||||
log.debug(.http, "robots not found", .{ .url = ctx.robots_url });
|
||||
// If we get a 404, we just insert it as absent.
|
||||
try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
|
||||
},
|
||||
else => {
|
||||
log.debug(.http, "unexpected status on robots", .{ .url = ctx.robots_url, .status = ctx.status });
|
||||
// If we get an unexpected status, we just insert as absent.
|
||||
try ctx.client.network.robot_store.putAbsent(ctx.robots_url);
|
||||
},
|
||||
}
|
||||
|
||||
var queued = ctx.client.pending_robots_queue.fetchRemove(
|
||||
ctx.robots_url,
|
||||
) orelse @panic("Client.robotsDoneCallbacke empty queue");
|
||||
defer queued.value.deinit(ctx.client.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
if (!allowed) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
|
||||
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
|
||||
} else {
|
||||
ctx.client.processRequest(queued_req) catch |e| {
|
||||
queued_req.error_callback(queued_req.ctx, e);
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn robotsErrorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
||||
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
defer ctx.deinit();
|
||||
|
||||
log.warn(.http, "robots fetch failed", .{ .err = err });
|
||||
|
||||
var queued = ctx.client.pending_robots_queue.fetchRemove(
|
||||
ctx.robots_url,
|
||||
) orelse @panic("Client.robotsErrorCallback empty queue");
|
||||
defer queued.value.deinit(ctx.client.allocator);
|
||||
|
||||
// On error, allow all queued requests to proceed
|
||||
for (queued.value.items) |queued_req| {
|
||||
ctx.client.processRequest(queued_req) catch |e| {
|
||||
queued_req.error_callback(queued_req.ctx, e);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn robotsShutdownCallback(ctx_ptr: *anyopaque) void {
|
||||
const ctx: *RobotsRequestContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
defer ctx.deinit();
|
||||
|
||||
log.debug(.http, "robots fetch shutdown", .{});
|
||||
|
||||
var queued = ctx.client.pending_robots_queue.fetchRemove(
|
||||
ctx.robots_url,
|
||||
) orelse @panic("Client.robotsErrorCallback empty queue");
|
||||
defer queued.value.deinit(ctx.client.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
if (queued_req.shutdown_callback) |shutdown_cb| {
|
||||
shutdown_cb(queued_req.ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
|
||||
// The request was intercepted and is blocking. This is messy, but our
|
||||
// callers, the ScriptManager -> Page, don't have a great way to stop the
|
||||
@@ -1028,13 +821,6 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
|
||||
}
|
||||
}
|
||||
|
||||
if (transfer._pending_cache_metadata) |metadata| {
|
||||
const cache = &self.network.cache.?;
|
||||
cache.put(metadata.*, body) catch |err| {
|
||||
log.warn(.cache, "cache put failed", .{ .err = err });
|
||||
};
|
||||
}
|
||||
|
||||
// release conn ASAP so that it's available; some done_callbacks
|
||||
// will load more resources.
|
||||
transfer.releaseConn();
|
||||
@@ -1155,6 +941,13 @@ fn ensureNoActiveConnection(self: *const Client) !void {
|
||||
}
|
||||
|
||||
pub const Request = struct {
|
||||
pub const StartCallback = *const fn (response: Response) anyerror!void;
|
||||
pub const HeaderCallback = *const fn (response: Response) anyerror!bool;
|
||||
pub const DataCallback = *const fn (response: Response, data: []const u8) anyerror!void;
|
||||
pub const DoneCallback = *const fn (ctx: *anyopaque) anyerror!void;
|
||||
pub const ErrorCallback = *const fn (ctx: *anyopaque, err: anyerror) void;
|
||||
pub const ShutdownCallback = *const fn (ctx: *anyopaque) void;
|
||||
|
||||
frame_id: u32,
|
||||
loader_id: u32,
|
||||
method: Method,
|
||||
@@ -1178,12 +971,12 @@ pub const Request = struct {
|
||||
// arbitrary data that can be associated with this request
|
||||
ctx: *anyopaque = undefined,
|
||||
|
||||
start_callback: ?*const fn (response: Response) anyerror!void = null,
|
||||
header_callback: *const fn (response: Response) anyerror!bool,
|
||||
data_callback: *const fn (response: Response, data: []const u8) anyerror!void,
|
||||
done_callback: *const fn (ctx: *anyopaque) anyerror!void,
|
||||
error_callback: *const fn (ctx: *anyopaque, err: anyerror) void,
|
||||
shutdown_callback: ?*const fn (ctx: *anyopaque) void = null,
|
||||
start_callback: ?StartCallback = null,
|
||||
header_callback: HeaderCallback,
|
||||
data_callback: DataCallback,
|
||||
done_callback: DoneCallback,
|
||||
error_callback: ErrorCallback,
|
||||
shutdown_callback: ?ShutdownCallback = null,
|
||||
|
||||
const ResourceType = enum {
|
||||
document,
|
||||
@@ -1204,6 +997,10 @@ pub const Request = struct {
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub fn deinit(self: *const Request) void {
|
||||
self.headers.deinit();
|
||||
}
|
||||
};
|
||||
|
||||
pub const Response = struct {
|
||||
@@ -1290,7 +1087,6 @@ pub const Transfer = struct {
|
||||
// total bytes received in the response, including the response status line,
|
||||
// the headers, and the [encoded] body.
|
||||
bytes_received: usize = 0,
|
||||
_pending_cache_metadata: ?*CacheMetadata = null,
|
||||
|
||||
start_time: u64,
|
||||
aborted: bool = false,
|
||||
@@ -1442,12 +1238,6 @@ pub const Transfer = struct {
|
||||
try conn.secretHeaders(&header_list, &client.network.config.http_headers);
|
||||
try conn.setHeaders(&header_list);
|
||||
|
||||
// If we have WebBotAuth, sign our request.
|
||||
if (client.network.web_bot_auth) |*wba| {
|
||||
const authority = URL.getHost(req.url);
|
||||
try wba.signRequest(self.arena.allocator(), &header_list, authority);
|
||||
}
|
||||
|
||||
// Add cookies from cookie jar.
|
||||
if (try self.getCookieString()) |cookies| {
|
||||
try conn.setCookies(@ptrCast(cookies.ptr));
|
||||
@@ -1693,56 +1483,6 @@ pub const Transfer = struct {
|
||||
return err;
|
||||
};
|
||||
|
||||
if (transfer.client.network.cache != null and transfer.req.method == .GET) {
|
||||
const rh = &transfer.response_header.?;
|
||||
const allocator = transfer.arena.allocator();
|
||||
|
||||
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
|
||||
|
||||
const maybe_cm = try Cache.tryCache(
|
||||
allocator,
|
||||
std.time.timestamp(),
|
||||
transfer.url,
|
||||
rh.status,
|
||||
rh.contentType(),
|
||||
if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null,
|
||||
vary,
|
||||
if (conn.getResponseHeader("age", 0)) |h| h.value else null,
|
||||
conn.getResponseHeader("set-cookie", 0) != null,
|
||||
conn.getResponseHeader("authorization", 0) != null,
|
||||
);
|
||||
|
||||
if (maybe_cm) |cm| {
|
||||
var iter = transfer.responseHeaderIterator();
|
||||
var header_list = try iter.collect(allocator);
|
||||
const end_of_response = header_list.items.len;
|
||||
|
||||
if (vary) |vary_str| {
|
||||
var req_it = transfer.req.headers.iterator();
|
||||
|
||||
while (req_it.next()) |hdr| {
|
||||
var vary_iter = std.mem.splitScalar(u8, vary_str, ',');
|
||||
|
||||
while (vary_iter.next()) |part| {
|
||||
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
|
||||
try header_list.append(allocator, .{
|
||||
.name = try allocator.dupe(u8, hdr.name),
|
||||
.value = try allocator.dupe(u8, hdr.value),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = try transfer.arena.allocator().create(CacheMetadata);
|
||||
metadata.* = cm;
|
||||
metadata.headers = header_list.items[0..end_of_response];
|
||||
metadata.vary_headers = header_list.items[end_of_response..];
|
||||
transfer._pending_cache_metadata = metadata;
|
||||
}
|
||||
}
|
||||
|
||||
return proceed and transfer.aborted == false;
|
||||
}
|
||||
|
||||
|
||||
239
src/network/layer/CacheLayer.zig
Normal file
239
src/network/layer/CacheLayer.zig
Normal file
@@ -0,0 +1,239 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const http = @import("../http.zig");
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
|
||||
const Cache = @import("../cache/Cache.zig");
|
||||
const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata;
|
||||
const CachedResponse = @import("../cache/Cache.zig").CachedResponse;
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const CacheLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
|
||||
pub fn layer(self: *CacheLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{
|
||||
.request = request,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *CacheLayer = @ptrCast(@alignCast(ptr));
|
||||
const network = ctx.network;
|
||||
|
||||
if (req.method != .GET) {
|
||||
return self.next.request(ctx, req);
|
||||
}
|
||||
|
||||
const arena = try network.app.arena_pool.acquire(.small, "CacheLayer");
|
||||
errdefer network.app.arena_pool.release(arena);
|
||||
|
||||
var iter = req.headers.iterator();
|
||||
const req_header_list = try iter.collect(arena);
|
||||
|
||||
if (network.cache.?.get(arena, .{
|
||||
.url = req.url,
|
||||
.timestamp = std.time.timestamp(),
|
||||
.request_headers = req_header_list.items,
|
||||
})) |cached| {
|
||||
defer req.deinit();
|
||||
defer network.app.arena_pool.release(arena);
|
||||
return serveFromCache(req, &cached);
|
||||
}
|
||||
|
||||
const cache_ctx = try arena.create(CacheContext);
|
||||
cache_ctx.* = .{
|
||||
.arena = arena,
|
||||
.context = ctx,
|
||||
.forward = Forward.fromRequest(req),
|
||||
.req_url = req.url,
|
||||
.req_headers = req.headers,
|
||||
};
|
||||
|
||||
const wrapped = cache_ctx.forward.wrapRequest(
|
||||
req,
|
||||
cache_ctx,
|
||||
.{
|
||||
.start = CacheContext.startCallback,
|
||||
.header = CacheContext.headerCallback,
|
||||
.done = CacheContext.doneCallback,
|
||||
.shutdown = CacheContext.shutdownCallback,
|
||||
.err = CacheContext.errorCallback,
|
||||
},
|
||||
);
|
||||
|
||||
return self.next.request(ctx, wrapped);
|
||||
}
|
||||
|
||||
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
|
||||
const response = Response.fromCached(req.ctx, cached);
|
||||
defer switch (cached.data) {
|
||||
.buffer => |_| {},
|
||||
.file => |f| f.file.close(),
|
||||
};
|
||||
|
||||
if (req.start_callback) |cb| {
|
||||
try cb(response);
|
||||
}
|
||||
|
||||
const proceed = try req.header_callback(response);
|
||||
if (!proceed) {
|
||||
req.error_callback(req.ctx, error.Abort);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (cached.data) {
|
||||
.buffer => |data| {
|
||||
if (data.len > 0) {
|
||||
try req.data_callback(response, data);
|
||||
}
|
||||
},
|
||||
.file => |f| {
|
||||
const file = f.file;
|
||||
var buf: [1024]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
const reader = &file_reader.interface;
|
||||
var read_buf: [1024]u8 = undefined;
|
||||
var remaining = f.len;
|
||||
while (remaining > 0) {
|
||||
const read_len = @min(read_buf.len, remaining);
|
||||
const n = try reader.readSliceShort(read_buf[0..read_len]);
|
||||
if (n == 0) break;
|
||||
remaining -= n;
|
||||
try req.data_callback(response, read_buf[0..n]);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
try req.done_callback(req.ctx);
|
||||
}
|
||||
|
||||
const CacheContext = struct {
|
||||
arena: std.mem.Allocator,
|
||||
context: Context,
|
||||
transfer: ?*Transfer = null,
|
||||
forward: Forward,
|
||||
req_url: [:0]const u8,
|
||||
req_headers: http.Headers,
|
||||
pending_metadata: ?*CachedMetadata = null,
|
||||
|
||||
fn startCallback(response: Response) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
self.transfer = response.inner.transfer;
|
||||
return self.forward.forwardStart(response);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
const allocator = self.arena;
|
||||
|
||||
const transfer = response.inner.transfer;
|
||||
var rh = &transfer.response_header.?;
|
||||
|
||||
const conn = transfer._conn.?;
|
||||
|
||||
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
|
||||
|
||||
const maybe_cm = try Cache.tryCache(
|
||||
allocator,
|
||||
std.time.timestamp(),
|
||||
transfer.url,
|
||||
rh.status,
|
||||
rh.contentType(),
|
||||
if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null,
|
||||
vary,
|
||||
if (conn.getResponseHeader("age", 0)) |h| h.value else null,
|
||||
conn.getResponseHeader("set-cookie", 0) != null,
|
||||
conn.getResponseHeader("authorization", 0) != null,
|
||||
);
|
||||
|
||||
if (maybe_cm) |cm| {
|
||||
var iter = transfer.responseHeaderIterator();
|
||||
var header_list = try iter.collect(allocator);
|
||||
const end_of_response = header_list.items.len;
|
||||
|
||||
if (vary) |vary_str| {
|
||||
var req_it = self.req_headers.iterator();
|
||||
while (req_it.next()) |hdr| {
|
||||
var vary_iter = std.mem.splitScalar(u8, vary_str, ',');
|
||||
while (vary_iter.next()) |part| {
|
||||
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
|
||||
try header_list.append(allocator, .{
|
||||
.name = try allocator.dupe(u8, hdr.name),
|
||||
.value = try allocator.dupe(u8, hdr.value),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = try allocator.create(CachedMetadata);
|
||||
metadata.* = cm;
|
||||
metadata.headers = header_list.items[0..end_of_response];
|
||||
metadata.vary_headers = header_list.items[end_of_response..];
|
||||
self.pending_metadata = metadata;
|
||||
}
|
||||
|
||||
return self.forward.forwardHeader(response);
|
||||
}
|
||||
|
||||
fn doneCallback(ctx: *anyopaque) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
|
||||
const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer");
|
||||
|
||||
if (self.pending_metadata) |metadata| {
|
||||
const cache = &self.context.network.cache.?;
|
||||
|
||||
log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata });
|
||||
cache.put(metadata.*, transfer._stream_buffer.items) catch |err| {
|
||||
log.warn(.http, "cache put failed", .{ .err = err });
|
||||
};
|
||||
log.debug(.browser, "http.cache.put", .{ .url = self.req_url });
|
||||
}
|
||||
|
||||
return self.forward.forwardDone();
|
||||
}
|
||||
|
||||
fn shutdownCallback(ctx: *anyopaque) void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
self.forward.forwardShutdown();
|
||||
}
|
||||
|
||||
fn errorCallback(ctx: *anyopaque, e: anyerror) void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
self.forward.forwardErr(e);
|
||||
}
|
||||
};
|
||||
134
src/network/layer/Forward.zig
Normal file
134
src/network/layer/Forward.zig
Normal file
@@ -0,0 +1,134 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
const Forward = @This();
|
||||
|
||||
ctx: *anyopaque,
|
||||
start: ?Request.StartCallback,
|
||||
header: Request.HeaderCallback,
|
||||
data: Request.DataCallback,
|
||||
done: Request.DoneCallback,
|
||||
err: Request.ErrorCallback,
|
||||
shutdown: ?Request.ShutdownCallback,
|
||||
|
||||
pub fn fromRequest(req: Request) Forward {
|
||||
return .{
|
||||
.ctx = req.ctx,
|
||||
.start = req.start_callback,
|
||||
.header = req.header_callback,
|
||||
.data = req.data_callback,
|
||||
.done = req.done_callback,
|
||||
.err = req.error_callback,
|
||||
.shutdown = req.shutdown_callback,
|
||||
};
|
||||
}
|
||||
|
||||
pub const Overrides = struct {
|
||||
start: ?Request.StartCallback = null,
|
||||
header: ?Request.HeaderCallback = null,
|
||||
data: ?Request.DataCallback = null,
|
||||
done: ?Request.DoneCallback = null,
|
||||
err: ?Request.ErrorCallback = null,
|
||||
shutdown: ?Request.ShutdownCallback = null,
|
||||
};
|
||||
|
||||
pub fn wrapRequest(
|
||||
self: *Forward,
|
||||
req: Request,
|
||||
new_ctx: anytype,
|
||||
overrides: Overrides,
|
||||
) Request {
|
||||
const T = @TypeOf(new_ctx.*);
|
||||
const PassthroughT = makePassthrough(T, "forward");
|
||||
var wrapped = req;
|
||||
wrapped.ctx = new_ctx;
|
||||
wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null;
|
||||
wrapped.header_callback = overrides.header orelse PassthroughT.header;
|
||||
wrapped.data_callback = overrides.data orelse PassthroughT.data;
|
||||
wrapped.done_callback = overrides.done orelse PassthroughT.done;
|
||||
wrapped.error_callback = overrides.err orelse PassthroughT.err;
|
||||
wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null;
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
fn makePassthrough(comptime T: type, comptime field: []const u8) type {
|
||||
return struct {
|
||||
pub fn start(response: Response) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardStart(response);
|
||||
}
|
||||
|
||||
pub fn header(response: Response) anyerror!bool {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardHeader(response);
|
||||
}
|
||||
|
||||
pub fn data(response: Response, chunk: []const u8) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardData(response, chunk);
|
||||
}
|
||||
|
||||
pub fn done(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
return @field(self, field).forwardDone();
|
||||
}
|
||||
|
||||
pub fn err(ctx_ptr: *anyopaque, e: anyerror) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardErr(e);
|
||||
}
|
||||
|
||||
pub fn shutdown(ctx_ptr: *anyopaque) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardShutdown();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn forwardStart(self: Forward, response: Response) anyerror!void {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
if (self.start) |cb| try cb(fwd);
|
||||
}
|
||||
|
||||
pub fn forwardHeader(self: Forward, response: Response) anyerror!bool {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
return self.header(fwd);
|
||||
}
|
||||
|
||||
pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
return self.data(fwd, chunk);
|
||||
}
|
||||
|
||||
pub fn forwardDone(self: Forward) anyerror!void {
|
||||
return self.done(self.ctx);
|
||||
}
|
||||
|
||||
pub fn forwardErr(self: Forward, e: anyerror) void {
|
||||
self.err(self.ctx, e);
|
||||
}
|
||||
|
||||
pub fn forwardShutdown(self: Forward) void {
|
||||
if (self.shutdown) |cb| cb(self.ctx);
|
||||
}
|
||||
264
src/network/layer/RobotsLayer.zig
Normal file
264
src/network/layer/RobotsLayer.zig
Normal file
@@ -0,0 +1,264 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const Robots = @import("../Robots.zig");
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const RobotsLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
allocator: std.mem.Allocator,
|
||||
pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty,
|
||||
|
||||
pub fn layer(self: *RobotsLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{
|
||||
.request = request,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void {
|
||||
var it = self.pending.iterator();
|
||||
while (it.next()) |entry| {
|
||||
entry.value_ptr.deinit(allocator);
|
||||
}
|
||||
self.pending.deinit(allocator);
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *RobotsLayer = @ptrCast(@alignCast(ptr));
|
||||
|
||||
const arena = try ctx.network.app.arena_pool.acquire(.small, "RobotsLayer");
|
||||
errdefer ctx.network.app.arena_pool.release(arena);
|
||||
|
||||
const robots_url = try URL.getRobotsUrl(arena, req.url);
|
||||
|
||||
if (ctx.network.robot_store.get(robots_url)) |robot_entry| {
|
||||
defer ctx.network.app.arena_pool.release(arena);
|
||||
|
||||
switch (robot_entry) {
|
||||
.present => |robots| {
|
||||
const path = URL.getPathname(req.url);
|
||||
|
||||
if (!robots.isAllowed(path)) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = req.url });
|
||||
req.error_callback(req.ctx, error.RobotsBlocked);
|
||||
return;
|
||||
}
|
||||
},
|
||||
.absent => {},
|
||||
}
|
||||
return self.next.request(ctx, req);
|
||||
}
|
||||
|
||||
return self.fetchRobotsThenRequest(ctx, arena, robots_url, req);
|
||||
}
|
||||
|
||||
fn fetchRobotsThenRequest(
|
||||
self: *RobotsLayer,
|
||||
ctx: Context,
|
||||
arena: std.mem.Allocator,
|
||||
robots_url: [:0]const u8,
|
||||
req: Request,
|
||||
) !void {
|
||||
errdefer ctx.network.app.arena_pool.release(arena);
|
||||
|
||||
const entry = try self.pending.getOrPut(self.allocator, robots_url);
|
||||
|
||||
if (!entry.found_existing) {
|
||||
errdefer std.debug.assert(self.pending.remove(robots_url));
|
||||
entry.value_ptr.* = .empty;
|
||||
|
||||
const robots_ctx = try arena.create(RobotsContext);
|
||||
robots_ctx.* = .{
|
||||
.layer = self,
|
||||
.ctx = ctx,
|
||||
.arena = arena,
|
||||
.robots_url = robots_url,
|
||||
.buffer = .empty,
|
||||
};
|
||||
|
||||
const headers = try ctx.newHeaders();
|
||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||
|
||||
try self.next.request(ctx, .{
|
||||
.ctx = robots_ctx,
|
||||
.url = robots_url,
|
||||
.method = .GET,
|
||||
.headers = headers,
|
||||
.blocking = false,
|
||||
.frame_id = req.frame_id,
|
||||
.loader_id = req.loader_id,
|
||||
.cookie_jar = req.cookie_jar,
|
||||
.cookie_origin = req.cookie_origin,
|
||||
.notification = req.notification,
|
||||
.resource_type = .fetch,
|
||||
.header_callback = RobotsContext.headerCallback,
|
||||
.data_callback = RobotsContext.dataCallback,
|
||||
.done_callback = RobotsContext.doneCallback,
|
||||
.error_callback = RobotsContext.errorCallback,
|
||||
.shutdown_callback = RobotsContext.shutdownCallback,
|
||||
});
|
||||
} else {
|
||||
ctx.network.app.arena_pool.release(arena);
|
||||
}
|
||||
|
||||
try entry.value_ptr.append(self.allocator, req);
|
||||
}
|
||||
|
||||
fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPending: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
if (!allowed) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
|
||||
defer queued_req.deinit();
|
||||
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
|
||||
} else {
|
||||
self.next.request(ctx, queued_req) catch |e| {
|
||||
defer queued_req.deinit();
|
||||
queued_req.error_callback(queued_req.ctx, e);
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPendingShutdown: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
defer queued_req.deinit();
|
||||
if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx);
|
||||
}
|
||||
}
|
||||
|
||||
const RobotsContext = struct {
|
||||
layer: *RobotsLayer,
|
||||
arena: std.mem.Allocator,
|
||||
ctx: Context,
|
||||
robots_url: [:0]const u8,
|
||||
buffer: std.ArrayListUnmanaged(u8),
|
||||
status: u16 = 0,
|
||||
|
||||
fn deinit(self: *RobotsContext) void {
|
||||
self.buffer.deinit(self.layer.allocator);
|
||||
self.layer.allocator.destroy(self);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(response.ctx));
|
||||
switch (response.inner) {
|
||||
.transfer => |t| {
|
||||
if (t.response_header) |hdr| {
|
||||
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url });
|
||||
self.status = hdr.status;
|
||||
}
|
||||
if (t.getContentLength()) |cl| {
|
||||
try self.buffer.ensureTotalCapacity(self.arena, cl);
|
||||
}
|
||||
},
|
||||
.cached => {},
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn dataCallback(response: Response, data: []const u8) anyerror!void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(response.ctx));
|
||||
try self.buffer.appendSlice(self.arena, data);
|
||||
}
|
||||
|
||||
fn doneCallback(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const ctx = self.ctx;
|
||||
const robots_url = self.robots_url;
|
||||
defer ctx.network.app.arena_pool.release(self.arena);
|
||||
|
||||
var allowed = true;
|
||||
const network = ctx.network;
|
||||
|
||||
switch (self.status) {
|
||||
200 => {
|
||||
if (self.buffer.items.len > 0) {
|
||||
const robots: ?Robots = network.robot_store.robotsFromBytes(
|
||||
network.config.http_headers.user_agent,
|
||||
self.buffer.items,
|
||||
) catch blk: {
|
||||
log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url });
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
break :blk null;
|
||||
};
|
||||
if (robots) |r| {
|
||||
try network.robot_store.put(robots_url, r);
|
||||
const path = URL.getPathname(l.pending.get(robots_url).?.items[0].url);
|
||||
allowed = r.isAllowed(path);
|
||||
}
|
||||
}
|
||||
},
|
||||
404 => {
|
||||
log.debug(.http, "robots not found", .{ .url = robots_url });
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
},
|
||||
else => {
|
||||
log.debug(.http, "unexpected status on robots", .{
|
||||
.url = robots_url,
|
||||
.status = self.status,
|
||||
});
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
},
|
||||
}
|
||||
|
||||
l.flushPending(ctx, robots_url, allowed);
|
||||
}
|
||||
|
||||
fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const ctx = self.ctx;
|
||||
const robots_url = self.robots_url;
|
||||
defer ctx.network.app.arena_pool.release(self.arena);
|
||||
|
||||
log.warn(.http, "robots fetch failed", .{ .err = err });
|
||||
l.flushPending(ctx, robots_url, true);
|
||||
}
|
||||
|
||||
fn shutdownCallback(ctx_ptr: *anyopaque) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const ctx = self.ctx;
|
||||
const robots_url = self.robots_url;
|
||||
defer ctx.network.app.arena_pool.release(self.arena);
|
||||
|
||||
log.debug(.http, "robots fetch shutdown", .{});
|
||||
l.flushPendingShutdown(robots_url);
|
||||
}
|
||||
};
|
||||
52
src/network/layer/WebBotAuthLayer.zig
Normal file
52
src/network/layer/WebBotAuthLayer.zig
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const WebBotAuth = @import("../WebBotAuth.zig");
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
|
||||
const WebBotAuthLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
|
||||
pub fn layer(self: *WebBotAuthLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{ .request = request },
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr));
|
||||
var our_req = req;
|
||||
|
||||
const wba = ctx.network.web_bot_auth orelse @panic("WebBotAuthLayer shouldn't be active without WebBotAuth");
|
||||
|
||||
const arena = try ctx.network.app.arena_pool.acquire(.small, "WebBotAuthLayer");
|
||||
defer ctx.network.app.arena_pool.release(arena);
|
||||
|
||||
const authority = URL.getHost(req.url);
|
||||
try wba.signRequest(arena, &our_req.headers, authority);
|
||||
|
||||
return self.next.request(ctx, our_req);
|
||||
}
|
||||
Reference in New Issue
Block a user