Merge branch 'main' into agent

This commit is contained in:
Adrià Arrufat
2026-04-10 09:32:43 +02:00
57 changed files with 1008 additions and 120 deletions

View File

@@ -51,8 +51,11 @@ jobs:
- uses: ./.github/actions/install
- name: v8 snapshot
run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast snapshot_creator -- src/snapshot.bin
- name: zig build release
run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64
run: zig build -Dsnapshot_path=../../snapshot.bin -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64
- name: upload artifact
uses: actions/upload-artifact@v7

View File

@@ -70,7 +70,7 @@
gcc.cc.lib
crtFiles
# Libaries
# Libraries
expat.dev
glib.dev
glibc.dev

View File

@@ -55,15 +55,15 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
.arena_pool = undefined,
};
app.network = try Network.init(allocator, app, config);
errdefer app.network.deinit();
app.platform = try Platform.init();
errdefer app.platform.deinit();
app.snapshot = try Snapshot.load();
errdefer app.snapshot.deinit();
app.network = try Network.init(allocator, app, config);
errdefer app.network.deinit();
app.app_dir_path = getAndMakeAppDir(allocator);
app.telemetry = try Telemetry.init(app, config.mode);

View File

@@ -180,6 +180,20 @@ pub fn webBotAuth(self: *const Config) ?WebBotAuthConfig {
};
}
pub fn blockPrivateNetworks(self: *const Config) bool {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.block_private_networks,
else => unreachable,
};
}
pub fn blockCidrs(self: *const Config) ?[]const u8 {
return switch (self.mode) {
inline .serve, .fetch, .mcp => |opts| opts.common.block_cidrs,
else => unreachable,
};
}
pub fn maxConnections(self: *const Config) u16 {
return switch (self.mode) {
.serve => |opts| opts.cdp_max_connections,
@@ -288,6 +302,9 @@ pub const Common = struct {
web_bot_auth_key_file: ?[]const u8 = null,
web_bot_auth_keyid: ?[]const u8 = null,
web_bot_auth_domain: ?[]const u8 = null,
block_private_networks: bool = false,
block_cidrs: ?[]const u8 = null,
};
/// Pre-formatted HTTP headers for reuse across Http and Client.
@@ -350,6 +367,21 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
\\ we make requests towards.
\\ Defaults to false.
\\
\\--block-private-networks
\\ Blocks HTTP requests to private/internal IP addresses
\\ after DNS resolution. Useful for sandboxing, multi-tenant
\\ deployments, and preventing access to internal infrastructure
\\ regardless of what triggers the request (JavaScript, HTML
\\ resources, redirects, etc.).
\\ Defaults to false.
\\
\\--block-cidrs
\\ Additional CIDR ranges to block, comma-separated.
\\ Prefix with '-' to allow (exempt from blocking).
\\ e.g. --block-cidrs 169.254.169.254/32,fd00:ec2::254/128
\\ e.g. --block-cidrs 10.0.0.0/8,-10.0.0.42/32
\\ Can be used standalone or combined with --block-private-networks.
\\
\\--http-proxy The HTTP proxy to use for all HTTP requests.
\\ A username:password can be included for basic authentication.
\\ Defaults to none.
@@ -706,7 +738,7 @@ fn parseServeArgs(
}
log.fatal(.app, "unknown argument", .{ .mode = "serve", .arg = opt });
return error.UnkownOption;
return error.UnknownOption;
}
return serve;
@@ -737,7 +769,7 @@ fn parseMcpArgs(
}
log.fatal(.mcp, "unknown argument", .{ .mode = "mcp", .arg = opt });
return error.UnkownOption;
return error.UnknownOption;
}
return result;
@@ -879,7 +911,7 @@ fn parseFetchArgs(
if (std.mem.startsWith(u8, opt, "--")) {
log.fatal(.app, "unknown argument", .{ .mode = "fetch", .arg = opt });
return error.UnkownOption;
return error.UnknownOption;
}
if (url != null) {
@@ -1236,5 +1268,19 @@ fn parseCommonArg(
return true;
}
if (std.mem.eql(u8, "--block-private-networks", opt)) {
common.block_private_networks = true;
return true;
}
if (std.mem.eql(u8, "--block-cidrs", opt)) {
const str = args.next() orelse {
log.fatal(.app, "missing argument value", .{ .arg = "--block-cidrs" });
return error.InvalidArgument;
};
common.block_cidrs = try allocator.dupe(u8, str);
return true;
}
return false;
}

View File

@@ -380,7 +380,7 @@ pub fn destroy(self: *Factory, value: anytype) void {
// We should always destroy from the leaf down.
if (@hasDecl(S, "_prototype_root")) {
// A Event{._type == .generic} (or any other similar types)
// _should_ be destoyed directly. The _type = .generic is a pseudo
// _should_ be destroyed directly. The _type = .generic is a pseudo
// child
if (S != Event or value._type != .generic) {
log.fatal(.bug, "factory.destroy.event", .{ .type = @typeName(S) });

View File

@@ -98,7 +98,7 @@ pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empt
// Once we have a handle/easy to process a request with, we create a Transfer
// which contains the Request as well as any state we need to process the
// request. These wil come and go with each request.
// request. These will come and go with each request.
transfer_pool: std.heap.MemoryPool(Transfer),
// The current proxy. CDP can change it, changeProxy(null) restores
@@ -635,7 +635,7 @@ fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool {
}
// Above, request will not process if there's an interception request. In such
// cases, the interecptor is expected to call resume to continue the transfer
// cases, the interceptor is expected to call resume to continue the transfer
// or transfer.abort() to abort it.
fn process(self: *Client, transfer: *Transfer) !void {
// libcurl doesn't allow recursive calls, if we're in a `perform()` operation
@@ -772,7 +772,7 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer
// cleaning things up. That's why the above code is in a block. If anything
// fails BEFORE `curl_multi_add_handle` succeeds, the we still need to do
// cleanup. But if things fail after `curl_multi_add_handle`, we expect
// perfom to pickup the failure and cleanup.
// perform to pickup the failure and cleanup.
self.trackConn(conn) catch |err| {
transfer._conn = null;
transfer.deinit();
@@ -859,7 +859,7 @@ fn processOneMessage(self: *Client, msg: http.Handles.MultiMessage, transfer: *T
}
transfer._intercept_state = .pending;
// Wether or not this is a blocking request, we're not going
// Whether or not this is a blocking request, we're not going
// to process it now. We can end the transfer, which will
// release the easy handle back into the pool. The transfer
// is still valid/alive (just has no handle).

View File

@@ -97,7 +97,7 @@ _parse_mode: enum { document, fragment, document_write } = .document,
// identity (a given attribute should return the same *Attribute), so we do
// a look here. We don't store this in the Element or Attribute.List.Entry
// because that would require additional space per element / Attribute.List.Entry
// even thoug we'll create very few (if any) actual *Attributes.
// even though we'll create very few (if any) actual *Attributes.
_attribute_lookup: std.AutoHashMapUnmanaged(usize, *Element.Attribute) = .empty,
// Same as _atlribute_lookup, but instead of individual attributes, this is for
@@ -480,7 +480,7 @@ pub fn navigate(self: *Page, request_url: [:0]const u8, opts: NavigateOpts) !voi
// even though this might be the same _data_ as `default_location`, we
// have to do this to make sure window.location is at a unique _address_.
// If we don't do this, mulitple window._location will have the same
// If we don't do this, multiple window._location will have the same
// address and thus be mapped to the same v8::Object in the identity map.
self.window._location = try Location.init(self.url, self);
@@ -690,7 +690,7 @@ fn scheduleNavigationWithArena(originator: *Page, arena: Allocator, request_url:
});
// This is a micro-optimization. Terminate any inflight request as early
// as we can. This will be more propery shutdown when we process the
// as we can. This will be more properly shutdown when we process the
// scheduled navigation.
if (target.parent == null) {
session.browser.http_client.abort();
@@ -1174,7 +1174,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *IFrame) !void {
iframe._window = page_frame.window;
errdefer iframe._window = null;
// on first load, dispatch frame_created evnet
// on first load, dispatch frame_created event
self._session.notification.dispatch(.page_frame_created, &.{
.frame_id = frame_id,
.parent_id = self._frame_id,
@@ -3213,7 +3213,7 @@ const IdleNotification = union(enum) {
init,
// timestamp where the state was first triggered. If the state stays
// true (e.g. 0 nework activity for NetworkIdle, or <= 2 for NetworkAlmostIdle)
// true (e.g. 0 network activity for NetworkIdle, or <= 2 for NetworkAlmostIdle)
// for 500ms, it'll send the notification and transition to .done. If
// the state doesn't stay true, it'll revert to .init.
triggered: u64,
@@ -3475,7 +3475,10 @@ pub fn handleClick(self: *Page, target: *Node) !void {
pub fn triggerKeyboard(self: *Page, keyboard_event: *KeyboardEvent) !void {
const event = keyboard_event.asEvent();
const element = self.window._document._active_element orelse return;
const element = self.window._document._active_element orelse {
event.deinit(self._session);
return;
};
if (comptime IS_DEBUG) {
log.debug(.page, "page keydown", .{

View File

@@ -71,7 +71,7 @@ allocator: Allocator,
// source is placed here (keyed by the full url) for some point in the future
// when v8 asks for it.
// The type is confusing (too confusing? move to a union). Starts of as `null`
// then transitions to either an error (from errorCalback) or the completed
// then transitions to either an error (from errorCallback) or the completed
// buffer from doneCallback
imported_modules: std.StringHashMapUnmanaged(ImportedModule),

View File

@@ -236,6 +236,8 @@ pub fn releaseOrigin(self: *Session, origin: *js.Origin) void {
/// Reset page_arena and factory for a clean slate.
/// Called when root page is removed.
fn resetPageResources(self: *Session) void {
defer self.browser.env.memoryPressureNotification(.moderate);
self.identity.deinit();
self.identity = .{};
@@ -294,7 +296,6 @@ pub fn replacePage(self: *Session) !*Page {
current.deinit(true);
self.resetPageResources();
self.browser.env.memoryPressureNotification(.moderate);
self.page = @as(Page, undefined);
const page = &self.page.?;

View File

@@ -667,7 +667,7 @@ const VisibilityProperties = struct {
opacity_zero: ?bool = null,
pointer_events_none: ?bool = null,
// returne true if any field in VisibilityProperties is not null
// return true if any field in VisibilityProperties is not null
fn isRelevant(self: VisibilityProperties) bool {
return self.display_none != null or
self.visibility_hidden != null or

View File

@@ -40,7 +40,7 @@ pub const Opts = struct {
// Skip shadow DOM entirely (innerHTML/outerHTML)
skip,
// Dump everyhting (like "view source")
// Dump everything (like "view source")
complete,
// Resolve slot elements (like what actually gets rendered)

View File

@@ -39,9 +39,22 @@ prev_local: ?*const js.Local,
prev_context: *Context,
// Takes the raw v8 isolate and extracts the context from it.
pub fn init(self: *Caller, v8_isolate: *v8.Isolate) void {
const ctx, const v8_context = Context.fromIsolate(.{ .handle = v8_isolate });
// Returns false if the context has been destroyed (e.g., navigated-away iframe),
// in which case a JS exception has been thrown and the caller should return immediately.
pub fn init(self: *Caller, v8_isolate: *v8.Isolate) bool {
const ctx, const v8_context = Context.fromIsolate(.{ .handle = v8_isolate }) orelse {
throwDetachedError(v8_isolate);
return false;
};
initWithContext(self, ctx, v8_context);
return true;
}
fn throwDetachedError(isolate: *v8.Isolate) void {
const message = "Cannot execute in detached context (e.g., navigated-away iframe)";
const v8_message = v8.v8__String__NewFromUtf8(isolate, message.ptr, v8.kNormal, @intCast(message.len));
const js_exception = v8.v8__Exception__Error(v8_message);
_ = v8.v8__Isolate__ThrowException(isolate, js_exception);
}
fn initWithContext(self: *Caller, ctx: *Context, v8_context: *const v8.Context) void {
@@ -60,9 +73,9 @@ fn initWithContext(self: *Caller, ctx: *Context, v8_context: *const v8.Context)
ctx.local = &self.local;
}
pub fn initFromHandle(self: *Caller, handle: ?*const v8.FunctionCallbackInfo) void {
pub fn initFromHandle(self: *Caller, handle: ?*const v8.FunctionCallbackInfo) bool {
const isolate = v8.v8__FunctionCallbackInfo__GetIsolate(handle).?;
self.init(isolate);
return self.init(isolate);
}
pub fn deinit(self: *Caller) void {
@@ -348,7 +361,7 @@ fn handleError(comptime T: type, comptime F: type, local: *const Local, err: any
error.InvalidArgument => isolate.createTypeError("invalid argument"),
error.TypeError => isolate.createTypeError(""),
error.OutOfMemory => isolate.createError("out of memory"),
error.IllegalConstructor => isolate.createError("Illegal Contructor"),
error.IllegalConstructor => isolate.createError("Illegal Constructor"),
else => blk: {
if (comptime opts.dom_exception) {
const DOMException = @import("../webapi/DOMException.zig");
@@ -538,7 +551,10 @@ pub const Function = struct {
pub fn call(comptime T: type, info_handle: *const v8.FunctionCallbackInfo, func: anytype, comptime opts: Opts) void {
const v8_isolate = v8.v8__FunctionCallbackInfo__GetIsolate(info_handle).?;
const ctx, const v8_context = Context.fromIsolate(.{ .handle = v8_isolate });
const ctx, const v8_context = Context.fromIsolate(.{ .handle = v8_isolate }) orelse {
throwDetachedError(v8_isolate);
return;
};
const info = FunctionCallbackInfo{ .handle = info_handle };
var hs: js.HandleScope = undefined;
@@ -617,7 +633,7 @@ pub const Function = struct {
if (v8.v8__Object__GetInternalField(js_this, idx)) |cached| {
// means we can't cache undefined, since we can't tell the
// difference between "it isn't in the cache" and "it's
// in the cache with a valud of undefined"
// in the cache with a value of undefined"
if (!v8.v8__Value__IsUndefined(cached)) {
return_value.set(cached);
return true;

View File

@@ -114,7 +114,7 @@ scheduler: Scheduler,
unknown_properties: (if (IS_DEBUG) std.StringHashMapUnmanaged(UnknownPropertyStat) else void) = if (IS_DEBUG) .{} else {},
const ModuleEntry = struct {
// Can be null if we're asynchrously loading the module, in
// Can be null if we're asynchronously loading the module, in
// which case resolver_promise cannot be null.
module: ?js.Module.Global = null,
@@ -138,7 +138,8 @@ pub fn fromC(c_context: *const v8.Context) ?*Context {
/// Returns the Context and v8::Context for the given isolate.
/// If the current context is from a destroyed Context (e.g., navigated-away iframe),
/// falls back to the incumbent context (the calling context).
pub fn fromIsolate(isolate: js.Isolate) struct { *Context, *const v8.Context } {
/// Returns null if neither context has a valid Context struct (both were destroyed).
pub fn fromIsolate(isolate: js.Isolate) ?struct { *Context, *const v8.Context } {
const v8_context = v8.v8__Isolate__GetCurrentContext(isolate.handle).?;
if (fromC(v8_context)) |ctx| {
return .{ ctx, v8_context };
@@ -146,7 +147,8 @@ pub fn fromIsolate(isolate: js.Isolate) struct { *Context, *const v8.Context } {
// The current context's Context struct has been freed (e.g., iframe navigated away).
// Fall back to the incumbent context (the calling context).
const v8_incumbent = v8.v8__Isolate__GetIncumbentContext(isolate.handle).?;
return .{ fromC(v8_incumbent).?, v8_incumbent };
const ctx = fromC(v8_incumbent) orelse return null;
return .{ ctx, v8_incumbent };
}
pub fn deinit(self: *Context) void {
@@ -742,7 +744,7 @@ fn _dynamicModuleCallback(self: *Context, specifier: [:0]const u8, referrer: []c
// since we're going to be doing all the work.
entry.resolver_promise = try promise.persist();
// But we can skip direclty to `resolveDynamicModule` which is
// But we can skip directly to `resolveDynamicModule` which is
// what the above callback will eventually do.
self.resolveDynamicModule(state, entry.*, local);
return promise;
@@ -806,7 +808,9 @@ fn resolveDynamicModule(self: *Context, state: *DynamicModuleResolveState, modul
const then_callback = newFunctionWithData(local, struct {
pub fn callback(callback_handle: ?*const v8.FunctionCallbackInfo) callconv(.c) void {
var c: Caller = undefined;
c.initFromHandle(callback_handle);
if (!c.initFromHandle(callback_handle)) {
return;
}
defer c.deinit();
const info = Caller.FunctionCallbackInfo{ .handle = callback_handle.? };
@@ -830,7 +834,7 @@ fn resolveDynamicModule(self: *Context, state: *DynamicModuleResolveState, modul
const catch_callback = newFunctionWithData(local, struct {
pub fn callback(callback_handle: ?*const v8.FunctionCallbackInfo) callconv(.c) void {
var c: Caller = undefined;
c.initFromHandle(callback_handle);
if (!c.initFromHandle(callback_handle)) return;
defer c.deinit();
const info = Caller.FunctionCallbackInfo{ .handle = callback_handle.? };

View File

@@ -519,7 +519,7 @@ fn promiseRejectCallback(message_handle: v8.PromiseRejectMessage) callconv(.c) v
const promise_handle = v8.v8__PromiseRejectMessage__GetPromise(&message_handle).?;
const v8_isolate = v8.v8__Object__GetIsolate(@ptrCast(promise_handle)).?;
const isolate = js.Isolate{ .handle = v8_isolate };
const ctx, const v8_context = Context.fromIsolate(isolate);
const ctx, const v8_context = Context.fromIsolate(isolate) orelse return;
const local = js.Local{
.ctx = ctx,

View File

@@ -146,7 +146,7 @@ fn _tryCallWithThis(self: *const Function, comptime T: type, this: anytype, args
}
break :blk values;
},
else => @compileError("JS Function called with invalid paremter type"),
else => @compileError("JS Function called with invalid parameter type"),
};
const c_args = @as(?[*]const ?*v8.Value, @ptrCast(js_args.ptr));

View File

@@ -195,7 +195,7 @@ pub const RemoteObject = struct {
// Combines a v8::InspectorSession and a v8::InspectorChannelImpl. The
// InspectorSession is for zig -> v8 (sending messages to the inspector). The
// Channel is for v8 -> zig, getting events from the Inspector (that we'll pass
// back ot some opaque context, i.e the CDP BrowserContext).
// back to some opaque context, i.e the CDP BrowserContext).
// The channel callbacks are defined below, as:
// pub export fn v8_inspector__Channel__IMPL__XYZ
pub const Session = struct {

View File

@@ -39,7 +39,7 @@ const CallOpts = Caller.CallOpts;
// v8::Local<v8::Context>. In V8, you need a Local<v8::Context> or get anything
// done, but the local only exists for the lifetime of the HandleScope it was
// created on. When V8 calls into Zig, things are pretty straightforward, since
// that callback gives us the currenty-entered V8::Local<Context>. But when Zig
// that callback gives us the currently-entered V8::Local<Context>. But when Zig
// has to call into V8, it's a bit more messy.
// As a general rule, think of it this way:
// 1 - Caller.zig is for V8 -> Zig
@@ -503,7 +503,7 @@ pub fn jsValueToZig(self: *const Local, comptime T: type, js_val: js.Value) !T {
.optional => |o| {
// If type type is a ?js.Value or a ?js.Object, then we want to pass
// a js.Object, not null. Consider a function,
// _doSomething(arg: ?Env.JsObjet) void { ... }
// _doSomething(arg: ?Env.JsObject) void { ... }
//
// And then these two calls:
// doSomething();

View File

@@ -82,7 +82,7 @@ pub fn run(self: *Scheduler) !void {
pub fn hasReadyTasks(self: *Scheduler) bool {
const now = milliTimestamp(.monotonic);
return queueuHasReadyTask(&self.low_priority, now) or queueuHasReadyTask(&self.high_priority, now);
return queueHasReadyTask(&self.low_priority, now) or queueHasReadyTask(&self.high_priority, now);
}
pub fn msToNextHigh(self: *Scheduler) ?u64 {
@@ -125,7 +125,7 @@ fn runQueue(self: *Scheduler, queue: *Queue, now: u64) !void {
return;
}
fn queueuHasReadyTask(queue: *Queue, now: u64) bool {
fn queueHasReadyTask(queue: *Queue, now: u64) bool {
const task = queue.peek() orelse return false;
return task.run_at <= now;
}

View File

@@ -32,7 +32,7 @@ const embedded_snapshot_blob = if (@import("build_config").snapshot_path) |path|
// When creating our Snapshot, we use local function templates for every Zig type.
// You cannot, from what I can tell, create persisted FunctionTemplates at
// snapshot creation time. But you can embedd those templates (or any other v8
// snapshot creation time. But you can embed those templates (or any other v8
// Data) so that it's available to contexts created from the snapshot. This is
// the starting index of those function templates, which we can extract. At
// creation time, in debug, we assert that this is actually a consecutive integer

View File

@@ -116,7 +116,9 @@ pub const Constructor = struct {
fn wrap(handle: ?*const v8.FunctionCallbackInfo) callconv(.c) void {
const v8_isolate = v8.v8__FunctionCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return;
}
defer caller.deinit();
caller.constructor(T, func, handle.?, .{
@@ -216,7 +218,9 @@ pub const Indexed = struct {
fn wrap(idx: u32, handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
return caller.getIndex(T, getter, idx, handle.?, .{
@@ -232,7 +236,9 @@ pub const Indexed = struct {
fn wrap(handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
return caller.getEnumerator(T, enumerator, handle.?, .{});
}
@@ -258,7 +264,9 @@ pub const NamedIndexed = struct {
fn wrap(c_name: ?*const v8.Name, handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
return caller.getNamedIndex(T, getter, c_name.?, handle.?, .{
@@ -272,7 +280,9 @@ pub const NamedIndexed = struct {
fn wrap(c_name: ?*const v8.Name, c_value: ?*const v8.Value, handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
return caller.setNamedIndex(T, setter, c_name.?, c_value.?, handle.?, .{
@@ -286,7 +296,9 @@ pub const NamedIndexed = struct {
fn wrap(c_name: ?*const v8.Name, handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
return caller.deleteNamedIndex(T, deleter, c_name.?, handle.?, .{
@@ -387,7 +399,9 @@ pub const Property = struct {
pub fn unknownWindowPropertyCallback(c_name: ?*const v8.Name, handle: ?*const v8.PropertyCallbackInfo) callconv(.c) u8 {
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
const local = &caller.local;
@@ -465,7 +479,9 @@ pub fn unknownObjectPropertyCallback(comptime JsApi: type) *const fn (?*const v8
const v8_isolate = v8.v8__PropertyCallbackInfo__GetIsolate(handle).?;
var caller: Caller = undefined;
caller.init(v8_isolate);
if (!caller.init(v8_isolate)) {
return 0;
}
defer caller.deinit();
const local = &caller.local;

View File

@@ -35,7 +35,7 @@ pub const ParsedNode = struct {
// Data associated with this element to be passed back to html5ever as needed
// We only have this for Elements. For other types, like comments, it's null.
// html5ever should never ask us for this data on a non-element, and we'll
// assert that, with this opitonal, to make sure our assumption is correct.
// assert that, with this optional, to make sure our assumption is correct.
data: ?*anyopaque,
};

View File

@@ -108,7 +108,7 @@
}
</script>
<script id=createHTMLDocument_nulll_title>
<script id=createHTMLDocument_null_title>
{
const impl = document.implementation;
const doc = impl.createHTMLDocument(null);

View File

@@ -327,7 +327,7 @@
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
<description>After an inadvertent trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>

View File

@@ -120,7 +120,7 @@
</script>
<div id=parent4><div id=child4></div></div>
<script id=stpoPropagation>
<script id=stopPropagation>
child_calls = 0;
parent_calls = 0;
child = $('#child4');

View File

@@ -2,7 +2,7 @@
<script src="../testing.js"></script>
<body></body>
<script id=documentFragement>
<script id=documentFragment>
testing.expectEqual('DocumentFragment', new DocumentFragment().constructor.name);
const dc1 = new DocumentFragment();

View File

@@ -11,7 +11,7 @@
</body>
<script src="../testing.js"></script>
<script id=caseInsensitve>
<script id=caseInsensitive>
const Ptags = document.getElementsByTagName('P');
testing.expectEqual(2, Ptags.length);
testing.expectEqual('p', Ptags.item(0).localName);

View File

@@ -31,10 +31,10 @@
testing.expectEqual(true, evt.timeStamp >= Math.floor(startTime));
</script>
<script id=propogate>
<script id=propagate>
nb = 0;
let para = $('#para');
// the stop listener is capturing, so it propogates down
// the stop listener is capturing, so it propagates down
content.addEventListener('stop',function(e) {
e.stopPropagation();
nb += 1;
@@ -45,7 +45,7 @@
});
para.dispatchEvent(new Event('stop'));
// didn't propogate down (because of capturing) to para handler
// didn't propagate down (because of capturing) to para handler
testing.expectEqual(1, nb);
</script>

View File

@@ -35,7 +35,7 @@
<!-- -->
<!-- -->
Atreides</h3>
<script id=adjascent_test_nodes>
<script id=adjacent_test_nodes>
const token = $('#token');
testing.expectEqual('"puppeteer "', token.firstChild.nodeValue);

View File

@@ -146,7 +146,7 @@
if (IS_TEST_RUNNER === false) {
// The page is running in a different browser. Probably a developer making sure
// a test is correct. There are a few tweaks we need to do to make this a
// seemless, namely around adapting paths/urls.
// seamless, namely around adapting paths/urls.
console.warn(`The page is not being executed in the test runner, certain behavior has been adjusted`);
window.testing.HOST = location.hostname;
window.testing.ORIGIN = location.origin;

View File

@@ -36,7 +36,7 @@
ALT_BASE + 'window/support/frame2.html',
];
// No strong order guarantee for messaages, and we don't care about the order
// No strong order guarantee for messages, and we don't care about the order
// so long as it's the correct data.
testing.expectEqual(expected_urls.sort(), captures.map((c) => {return c.url}).sort());
captures.forEach((c) => {

View File

@@ -41,7 +41,7 @@
// the above resolve() would stop the test running after "scroll" fires but
// before "scrollend" fires. That timing is pretty sensitive/fragile. If
// the browser gets delayed and doesn't figure the scroll event exactly when
// schedule, it could easily execute in the same sheduler.run call as the
// schedule, it could easily execute in the same scheduler.run call as the
// scrollend.
testing.expectEqual(true, scrollendevt === 1 || scrollendevt === 2);
});

View File

@@ -171,7 +171,7 @@ pub const RenderOpts = struct {
trim_left: bool = true,
trim_right: bool = true,
};
// Replace successives whitespaces with one withespace.
// Replace successives whitespaces with one whitespace.
// Trims left and right according to the options.
// Returns true if the string ends with a trimmed whitespace.
pub fn render(self: *const CData, writer: *std.io.Writer, opts: RenderOpts) !bool {

View File

@@ -645,7 +645,7 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
if (self._script_created_parser) |*parser| {
parser.read(html) catch |err| {
log.warn(.dom, "document.write parser error", .{ .err = err });
// was alrady closed
// was already closed
self._script_created_parser = null;
};
}
@@ -657,7 +657,7 @@ pub fn write(self: *Document, text: []const []const u8, page: *Page) !void {
const script = self._current_script.?;
const parent = script.asNode().parentNode() orelse return;
// Our implemnetation is hacky. We'll write to a DocumentFragment, then
// Our implementation is hacky. We'll write to a DocumentFragment, then
// append its children.
const fragment = try Node.DocumentFragment.init(page);
const fragment_node = fragment.asNode();

View File

@@ -83,7 +83,7 @@ pub const Namespace = enum(u8) {
pub fn parse(namespace_: ?[]const u8) Namespace {
const namespace = namespace_ orelse return .null;
if (namespace.len == "http://www.w3.org/1999/xhtml".len) {
// Common case, avoid the string comparion. Recklessly
// Common case, avoid the string comparison. Recklessly
@branchHint(.likely);
return .html;
}
@@ -847,7 +847,7 @@ pub fn replaceWith(self: *Element, nodes: []const Node.NodeOrText, page: *Page)
const parent_is_connected = parent.isConnected();
// Detect if the ref_node must be removed (byt default) or kept.
// Detect if the ref_node must be removed (by default) or kept.
// We kept it when ref_node is present into the nodes list.
var rm_ref_node = true;

View File

@@ -255,7 +255,7 @@ pub const Iterator = struct {
// is being composed by various types, so it can't reference those types.
// Using *anyopaque here is "dangerous", in that it requires the composer
// to pass the right value, which normally would be itself (`*Self`), but
// only because (as of now) everyting that uses KeyValueList has no prototype
// only because (as of now) everything that uses KeyValueList has no prototype
list: *anyopaque,
pub const Entry = struct { []const u8, []const u8 };

View File

@@ -183,7 +183,7 @@ pub fn getOnFinish(self: *const Animation) ?js.Function.Temp {
return self._onFinish;
}
// callback function transitionning from a state to another
// callback function transitioning from a state to another
fn update(ctx: *anyopaque) !?u32 {
const self: *Animation = @ptrCast(@alignCast(ctx));

View File

@@ -31,7 +31,7 @@ pub const _prototype_root = true;
_width: u32,
_height: u32,
/// Since there's no base class rendering contextes inherit from,
/// Since there's no base class rendering contexts inherit from,
/// we're using tagged union.
const DrawingContext = union(enum) {
@"2d": *OffscreenCanvasRenderingContext2D,

View File

@@ -4,7 +4,7 @@ const Node = @import("Node.zig");
const LinkedList = std.DoublyLinkedList;
// Our node._chilren is of type ?*NodeList. The extra (extra) indirection is to
// Our node._children is of type ?*NodeList. The extra (extra) indirection is to
// keep memory size down.
// First, a lot of nodes have no children. For these nodes, `?*NodeList = null`
// will take 8 bytes and require no allocations (because an optional pointer in

View File

@@ -115,7 +115,7 @@ pub const JsApi = struct {
// imagine a page will have tens of thousands of attributes, and it's very likely
// that page will _never_ load a single Attribute. It might get a string value
// from a string key, but it won't load the full Attribute. And, even if it does,
// it will almost certainly load realtively few.
// it will almost certainly load relatively few.
// The main issue with Attribute is that it's a full Node -> EventTarget. It's
// _huge_ for something that's essentially just name=>value.
// That said, we need identity. el.getAttributeNode("id") should return the same
@@ -234,7 +234,7 @@ pub const List = struct {
}
// Optimized for cloning. We know `name` is already normalized. We know there isn't duplicates.
// We know the Element is detatched (and thus, don't need to check for `id`).
// We know the Element is detached (and thus, don't need to check for `id`).
pub fn putForCloned(self: *List, name: []const u8, value: []const u8, page: *Page) !void {
const entry = try page._factory.create(Entry{
._node = .{},
@@ -266,7 +266,7 @@ pub const List = struct {
// called form our parser, names already lower-cased
pub fn putNew(self: *List, name: []const u8, value: []const u8, page: *Page) !void {
if (try self.getEntry(.wrap(name), page) != null) {
// When parsing, if there are dupicate names, it isn't valid, and
// When parsing, if there are duplicate names, it isn't valid, and
// the first is kept
return;
}

View File

@@ -63,7 +63,7 @@ pub fn setHeight(self: *Canvas, value: u32, page: *Page) !void {
try self.asElement().setAttributeSafe(comptime .wrap("height"), .wrap(str), page);
}
/// Since there's no base class rendering contextes inherit from,
/// Since there's no base class rendering contexts inherit from,
/// we're using tagged union.
const DrawingContext = union(enum) {
@"2d": *CanvasRenderingContext2D,

View File

@@ -34,7 +34,7 @@ const MouseEvent = @This();
pub const MouseButton = enum(u8) {
main = 0,
auxillary = 1,
auxiliary = 1,
secondary = 2,
fourth = 3,
fifth = 4,

View File

@@ -58,7 +58,7 @@ pub fn deinit(self: *const Cookie) void {
// - any shenanigans with the domain attribute - it has to be the current
// domain or one of higher order, excluding TLD.
// Anything else, will turn into a cookie.
// Single value? That's a cookie with an emtpy name and a value
// Single value? That's a cookie with an empty name and a value
// Key or Values with characters the RFC says aren't allowed? Allowed! (
// (as long as the characters are 32...126)
// Invalid attributes? Ignored.
@@ -384,7 +384,7 @@ pub fn appliesTo(self: *const Cookie, url: *const PreparedUri, same_site: bool,
{
if (self.path[self.path.len - 1] == '/') {
// If our cookie has a trailing slash, we can only match is
// the target path is a perfix. I.e., if our path is
// the target path is a prefix. I.e., if our path is
// /doc/ we can only match /doc/*
if (std.mem.startsWith(u8, url.path, self.path) == false) {
return false;

View File

@@ -174,7 +174,7 @@ pub fn doError(self: *ReadableStreamDefaultController, err: []const u8) !void {
// Reject all pending reads
for (self._pending_reads.items) |resolver| {
self._page.js.toLocal(resolver).reject("stream errror", err);
self._page.js.toLocal(resolver).reject("stream error", err);
}
self._pending_reads.clearRetainingCapacity();
}

View File

@@ -28,7 +28,7 @@ const Node = @import("Node.zig");
const AXNode = @This();
// Need a custom writer, because we can't just serialize the node as-is.
// Sometimes we want to serializ the node without chidren, sometimes with just
// Sometimes we want to serializ the node without children, sometimes with just
// its direct children, and sometimes the entire tree.
// (For now, we only support direct children)
pub const Writer = struct {
@@ -1091,7 +1091,7 @@ pub fn getRole(self: AXNode) ![]const u8 {
return @tagName(role_implicit);
}
// Replace successives whitespaces with one withespace.
// Replace successives whitespaces with one whitespace.
// Trims left and right according to the options.
// Returns true if the string ends with a trimmed whitespace.
fn writeString(s: []const u8, w: anytype) !void {

View File

@@ -76,7 +76,7 @@ notification_arena: std.heap.ArenaAllocator,
page_arena: std.heap.ArenaAllocator,
// Valid for the entire lifetime of the BrowserContext. Should minimize
// (or altogether elimiate) our use of this.
// (or altogether eliminate) our use of this.
browser_context_arena: std.heap.ArenaAllocator,
pub fn init(client: *Client) !CDP {
@@ -444,7 +444,7 @@ pub const BrowserContext = struct {
env.inspector.?.resetContextGroup();
env.inspector.?.stopSession();
// abort all intercepted requests before closing the sesion/page
// abort all intercepted requests before closing the session/page
// since some of these might callback into the page/scriptmanager
for (self.intercept_state.pendingTransfers()) |transfer| {
transfer.abort(error.ClientDisconnect);
@@ -787,7 +787,7 @@ const ScriptOnNewDocument = struct {
};
/// in the isolated world by using its Context ID or the worldName.
/// grantUniveralAccess Indecated whether the isolated world can reference objects like the DOM or other JS Objects.
/// grantUniversalAccess Indicates whether the isolated world can reference objects like the DOM or other JS Objects.
/// An isolated world has it's own instance of globals like Window.
/// Generally the client needs to resolve a node into the isolated world to be able to work with it.
/// An object id is unique across all contexts, different object ids can refer to the same Node in different contexts.
@@ -819,7 +819,7 @@ const IsolatedWorld = struct {
}
// The isolate world must share at least some of the state with the related page, specifically the DocumentHTML
// (assuming grantUniveralAccess will be set to True!).
// (assuming grantUniversalAccess will be set to True!).
// We just created the world and the page. The page's state lives in the session, but is update on navigation.
// This also means this pointer becomes invalid after removePage until a new page is created.
// Currently we have only 1 page/frame and thus also only 1 state in the isolate world.
@@ -862,7 +862,7 @@ pub const Command = struct {
// In most cases, Sender is going to be cdp itself. We'll call
// sender.sendJSON() and CDP will send it to the client. But some
// comamnds are dispatched internally, in which cases the Sender will
// commands are dispatched internally, in which cases the Sender will
// be code to capture the data that we were "sending".
sender: Sender,
@@ -1021,14 +1021,14 @@ test "cdp: invalid sessionId" {
}
{
// we have a brower context but no session_id
// we have a browser context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .method = "Hi", .sessionId = "BC-Has-No-SessionId" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
}
{
// we have a brower context with a different session_id
// we have a browser context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .method = "Hi", .sessionId = "SESS-1" });
try ctx.expectSentError(-32001, "Unknown sessionId", .{});
@@ -1046,14 +1046,14 @@ test "cdp: STARTUP sessionId" {
}
{
// we have a brower context but no session_id
// we have a browser context but no session_id
_ = try ctx.loadBrowserContext(.{});
try ctx.processMessage(.{ .id = 3, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 3, .index = 1, .session_id = "STARTUP" });
}
{
// we have a brower context with a different session_id
// we have a browser context with a different session_id
_ = try ctx.loadBrowserContext(.{ .session_id = "SESS-2" });
try ctx.processMessage(.{ .id = 4, .method = "Hi", .sessionId = "STARTUP" });
try ctx.expectSentResult(null, .{ .id = 4, .index = 2, .session_id = "STARTUP" });

View File

@@ -197,7 +197,7 @@ pub const Search = struct {
};
// Need a custom writer, because we can't just serialize the node as-is.
// Sometimes we want to serializ the node without chidren, sometimes with just
// Sometimes we want to serializ the node without children, sometimes with just
// its direct children, and sometimes the entire tree.
// (For now, we only support direct children)

View File

@@ -824,7 +824,7 @@ test "cdp.page: getFrameTree" {
}
{
// STARTUP sesion is handled when a broweser context and a target id exists.
// STARTUP session is handled when a broweser context and a target id exists.
try ctx.processMessage(.{ .id = 12, .method = "Page.getFrameTree", .session_id = "STARTUP" });
try ctx.expectSentResult(.{
.frameTree = .{

View File

@@ -438,7 +438,7 @@ fn setAutoAttach(cmd: *CDP.Command) !void {
// This is a hack. Puppeteer, and probably others, expect the Browser to
// automatically started creating targets. Things like an empty tab, or
// a blank page. And they block until this happens. So we send an event
// telling them that they've been attached to our Broswer. Hopefully, the
// telling them that they've been attached to our Browser. Hopefully, the
// first thing they'll do is create a real BrowserContext and progress from
// there.
// This hack requires the main cdp dispatch handler to special case

View File

@@ -354,7 +354,7 @@ pub const DateTime = struct {
return error.OutsideJulianPeriod;
}
// Per the spec, it can be argued thatt 't' and even ' ' should be allowed,
// Per the spec, it can be argued that 't' and even ' ' should be allowed,
// but certainly not encouraged.
if (parser.consumeIf('T') == false) {
return error.InvalidDateTime;
@@ -569,7 +569,7 @@ fn writeDate(into: []u8, date: Date) u8 {
// cast year to a u16 so it doesn't insert a sign
// we don't want the + sign, ever
// and we don't even want it to insert the - sign, because it screws up
// the padding (we need to do it ourselfs)
// the padding (we need to do it ourselves)
const year = date.year;
if (year < 0) {
_ = std.fmt.printInt(into[1..], @as(u16, @intCast(year * -1)), 10, .lower, .{ .width = 4, .fill = '0' });

View File

@@ -31,12 +31,13 @@ pub fn main() !void {
// allocator
// - in Debug mode we use the General Purpose Allocator to detect memory leaks
// - in Release mode we use the c allocator
var gpa_instance: std.heap.DebugAllocator(.{ .stack_trace_frames = 10 }) = .init;
const gpa = if (builtin.mode == .Debug) gpa_instance.allocator() else std.heap.c_allocator;
// var gpa_instance: std.heap.DebugAllocator(.{ .stack_trace_frames = 10 }) = .init;
// const gpa = if (builtin.mode == .Debug) gpa_instance.allocator() else std.heap.c_allocator;
const gpa = std.heap.c_allocator;
defer if (builtin.mode == .Debug) {
if (gpa_instance.detectLeaks()) std.posix.exit(1);
};
// defer if (builtin.mode == .Debug) {
// if (gpa_instance.detectLeaks()) std.posix.exit(1);
// };
// arena for main-specific allocations
var main_arena_instance = std.heap.ArenaAllocator.init(gpa);

624
src/network/IpFilter.zig Normal file
View File

@@ -0,0 +1,624 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const posix = std.posix;
const libcurl = @import("../sys/libcurl.zig");
const IpFilter = @This();
/// Binary representation for bitwise CIDR comparison.
pub const Ipv4Addr = [4]u8;
pub const Ipv6Addr = [16]u8;
pub const CidrV4 = struct {
network: u32,
mask: u32,
fn fromPrefix(addr: Ipv4Addr, prefix_len: u6) CidrV4 {
const network = std.mem.readInt(u32, &addr, .big);
const mask: u32 = if (prefix_len == 0)
0
else if (prefix_len == 32)
0xFFFFFFFF
else
~(@as(u32, 0xFFFFFFFF) >> @intCast(prefix_len));
return .{ .network = network, .mask = mask };
}
};
pub const CidrV6 = struct {
network_hi: u64,
network_lo: u64,
mask_hi: u64,
mask_lo: u64,
fn fromPrefix(addr: Ipv6Addr, prefix_len: u8) CidrV6 {
const network_hi = std.mem.readInt(u64, addr[0..8], .big);
const network_lo = std.mem.readInt(u64, addr[8..16], .big);
var mask_hi: u64 = 0;
var mask_lo: u64 = 0;
if (prefix_len > 0) {
if (prefix_len < 64) {
mask_hi = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len));
} else if (prefix_len == 64) {
mask_hi = 0xFFFFFFFFFFFFFFFF;
} else if (prefix_len < 128) {
mask_hi = 0xFFFFFFFFFFFFFFFF;
mask_lo = ~(@as(u64, 0xFFFFFFFFFFFFFFFF) >> @intCast(prefix_len - 64));
} else {
// prefix_len == 128
mask_hi = 0xFFFFFFFFFFFFFFFF;
mask_lo = 0xFFFFFFFFFFFFFFFF;
}
}
return .{ .network_hi = network_hi, .network_lo = network_lo, .mask_hi = mask_hi, .mask_lo = mask_lo };
}
};
// IpFilter fields
block_private: bool,
cidrs: ?Cidrs,
// ── Comptime helpers ─────────────────────────────────────────────────────────
/// Comptime helper: parse dotted-decimal IPv4 to [4]u8.
fn parseIpv4Comptime(comptime s: []const u8) Ipv4Addr {
var result: Ipv4Addr = undefined;
var octet: u8 = 0;
var octet_idx: usize = 0;
for (s) |ch| {
if (ch == '.') {
result[octet_idx] = octet;
octet_idx += 1;
octet = 0;
} else {
octet = octet * 10 + (ch - '0');
}
}
result[octet_idx] = octet;
return result;
}
/// Comptime helper: build a CidrV4.
fn makeCidrV4(comptime addr: []const u8, comptime prefix: u6) CidrV4 {
return CidrV4.fromPrefix(parseIpv4Comptime(addr), prefix);
}
/// Comptime helper: build a CidrV6 from a 16-byte literal array.
fn makeCidrV6(comptime bytes: Ipv6Addr, comptime prefix: u8) CidrV6 {
return CidrV6.fromPrefix(bytes, prefix);
}
// ── Comptime CIDR range tables ───────────────────────────────────────────────
const PRIVATE_V4 = [_]CidrV4{
makeCidrV4("127.0.0.0", 8), // localhost
makeCidrV4("0.0.0.0", 8), // current network
makeCidrV4("10.0.0.0", 8), // RFC1918
makeCidrV4("172.16.0.0", 12), // RFC1918
makeCidrV4("192.168.0.0", 16), // RFC1918
makeCidrV4("169.254.0.0", 16), // link-local
};
const PRIVATE_V6 = [_]CidrV6{
// ::/128 — IPv6 Unspecified
makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 128),
// ::1/128 — IPv6 localhost
makeCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, 128),
// fe80::/10 — link-local
makeCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10),
// fc00::/7 — ULA
makeCidrV6(.{ 0xfc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 7),
};
// ── Runtime IP parsing ───────────────────────────────────────────────────────
/// Parse dotted-decimal IPv4 string to 4-byte array. Returns null on parse failure.
fn parseIpv4(str: []const u8) ?Ipv4Addr {
var addr: Ipv4Addr = undefined;
var it = std.mem.splitScalar(u8, str, '.');
var i: usize = 0;
while (it.next()) |part| : (i += 1) {
if (i >= 4) return null;
addr[i] = std.fmt.parseInt(u8, part, 10) catch return null;
}
if (i != 4) return null;
return addr;
}
/// Parse IPv6 string to 16-byte array. Handles compressed notation.
/// Strips zone ID (e.g. "fe80::1%eth0" -> "fe80::1").
/// Returns null on parse failure.
fn parseIpv6(str: []const u8) ?Ipv6Addr {
// Strip zone ID
const clean = if (std.mem.indexOfScalar(u8, str, '%')) |idx| str[0..idx] else str;
const parsed = std.net.Address.parseIp6(clean, 0) catch return null;
return parsed.in6.sa.addr;
}
// ── CIDR matching ────────────────────────────────────────────────────────────
/// Detect IPv4-mapped IPv6 address (::ffff:x.x.x.x).
/// Returns the embedded IPv4 address if detected, null otherwise.
fn isIpv4Mapped(addr: Ipv6Addr) ?Ipv4Addr {
// IPv4-mapped prefix: 10 zero bytes + 2 0xFF bytes
const prefix = [12]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
if (!std.mem.eql(u8, addr[0..12], &prefix)) return null;
return addr[12..16].*;
}
/// Check if IPv4 address falls within a CIDR range.
fn matchesCidrV4(addr: Ipv4Addr, cidr: CidrV4) bool {
const addr_int = std.mem.readInt(u32, &addr, .big);
return (addr_int ^ cidr.network) & cidr.mask == 0;
}
/// Check if IPv6 address falls within a CIDR range.
fn matchesCidrV6(addr: Ipv6Addr, cidr: CidrV6) bool {
const addr_hi = std.mem.readInt(u64, addr[0..8], .big);
const addr_lo = std.mem.readInt(u64, addr[8..16], .big);
return ((addr_hi ^ cidr.network_hi) & cidr.mask_hi == 0) and
((addr_lo ^ cidr.network_lo) & cidr.mask_lo == 0);
}
// ── Public API ───────────────────────────────────────────────────────────────
pub const Cidrs = struct {
v4: []CidrV4,
v6: []CidrV6,
allow_v4: []CidrV4,
allow_v6: []CidrV6,
pub fn deinit(self: Cidrs, allocator: std.mem.Allocator) void {
allocator.free(self.v4);
allocator.free(self.v6);
allocator.free(self.allow_v4);
allocator.free(self.allow_v6);
}
};
/// Parse a comma-separated list of CIDR strings (e.g. "10.0.0.0/8,2001:db8::/32")
/// into a Cidrs struct. Entries prefixed with '-' are added to the allow list
/// (e.g. "-10.0.0.42/32" exempts that IP from blocking).
/// Caller owns the returned Cidrs and must free them via Cidrs.deinit.
/// Returns error.InvalidCidr on any malformed entry.
pub fn parseCidrList(
allocator: std.mem.Allocator,
cidr_str: []const u8,
) !Cidrs {
var v4_list: std.ArrayList(CidrV4) = .empty;
errdefer v4_list.deinit(allocator);
var v6_list: std.ArrayList(CidrV6) = .empty;
errdefer v6_list.deinit(allocator);
var allow_v4_list: std.ArrayList(CidrV4) = .empty;
errdefer allow_v4_list.deinit(allocator);
var allow_v6_list: std.ArrayList(CidrV6) = .empty;
errdefer allow_v6_list.deinit(allocator);
var it = std.mem.splitScalar(u8, cidr_str, ',');
while (it.next()) |entry| {
const trimmed = std.mem.trim(u8, entry, " \t");
if (trimmed.len == 0) continue;
const is_allow = trimmed[0] == '-';
const cidr_part = if (is_allow) trimmed[1..] else trimmed;
const slash = std.mem.indexOfScalar(u8, cidr_part, '/') orelse return error.InvalidCidr;
const addr_str = cidr_part[0..slash];
const prefix_str = cidr_part[slash + 1 ..];
if (parseIpv4(addr_str)) |v4| {
const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr;
if (prefix > 32) return error.InvalidCidr;
const cidr = CidrV4.fromPrefix(v4, @intCast(prefix));
if (is_allow) {
try allow_v4_list.append(allocator, cidr);
} else {
try v4_list.append(allocator, cidr);
}
} else if (parseIpv6(addr_str)) |v6| {
const prefix = std.fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidCidr;
if (prefix > 128) return error.InvalidCidr;
const cidr = CidrV6.fromPrefix(v6, prefix);
if (is_allow) {
try allow_v6_list.append(allocator, cidr);
} else {
try v6_list.append(allocator, cidr);
}
} else {
return error.InvalidCidr;
}
}
const v4 = try v4_list.toOwnedSlice(allocator);
errdefer allocator.free(v4);
const v6 = try v6_list.toOwnedSlice(allocator);
errdefer allocator.free(v6);
const allow_v4 = try allow_v4_list.toOwnedSlice(allocator);
errdefer allocator.free(allow_v4);
const allow_v6 = try allow_v6_list.toOwnedSlice(allocator);
return .{ .v4 = v4, .v6 = v6, .allow_v4 = allow_v4, .allow_v6 = allow_v6 };
}
// Create a IpFilter. Set block_private to block outbound requests to RFC1918,
// localhost, link-local, and ULA ranges. Pass parsed CIDRs for additional
// custom block/allow ranges; the filter takes ownership of the Cidrs and will
// free them on deinit.
pub fn init(
block_private: bool,
cidrs: ?Cidrs,
) IpFilter {
return .{
.block_private = block_private,
.cidrs = cidrs,
};
}
pub fn deinit(self: IpFilter, allocator: std.mem.Allocator) void {
if (self.cidrs) |c| {
c.deinit(allocator);
}
}
fn isBlockedV4(self: *const IpFilter, addr: Ipv4Addr) bool {
if (self.cidrs) |c| {
for (c.allow_v4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return false;
}
}
for (c.v4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return true;
}
}
}
if (self.block_private) {
for (PRIVATE_V4) |cidr| {
if (matchesCidrV4(addr, cidr)) {
return true;
}
}
}
return false;
}
fn isBlockedV6(self: *const IpFilter, addr: Ipv6Addr) bool {
if (self.cidrs) |c| {
for (c.allow_v6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return false;
}
}
for (c.v6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return true;
}
}
}
if (self.block_private) {
for (PRIVATE_V6) |cidr| {
if (matchesCidrV6(addr, cidr)) {
return true;
}
}
}
return false;
}
/// Check if an address from curl's opensocket callback should be blocked.
/// Extracts the IP directly from the sockaddr structure; no string parsing needed.
/// Fail-closed: unknown address family -> true (blocked).
pub fn isBlockedSockaddr(self: *const IpFilter, sa: *const libcurl.CurlSockAddr) bool {
switch (sa.family) {
posix.AF.INET => {
const sin: *const posix.sockaddr.in = @ptrCast(&sa.addr);
// sin.addr is in network byte order (big-endian); convert to host bytes
const bytes: [4]u8 = @bitCast(sin.addr);
return self.isBlockedV4(bytes);
},
posix.AF.INET6 => {
const sin6: *const posix.sockaddr.in6 = @ptrCast(&sa.addr);
const addr: Ipv6Addr = sin6.addr;
if (isIpv4Mapped(addr)) |v4| return self.isBlockedV4(v4);
return self.isBlockedV6(addr);
},
else => return true, // unknown family -> fail-closed
}
}
const testing = @import("../testing.zig");
test "IpFilter: IPv4 CIDR matching: private group boundaries" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("0.0.0.0"));
// Loopback
try testing.expect(filter.testBlocked("127.0.0.1"));
try testing.expect(filter.testBlocked("127.255.255.255"));
try testing.expect(!filter.testBlocked("128.0.0.1"));
// RFC1918 10.0.0.0/8
try testing.expect(filter.testBlocked("10.0.0.1"));
try testing.expect(filter.testBlocked("10.255.255.255"));
try testing.expect(!filter.testBlocked("11.0.0.0"));
// RFC1918 172.16.0.0/12 — critical boundary
try testing.expect(!filter.testBlocked("172.15.255.255")); // MUST NOT block
try testing.expect(filter.testBlocked("172.16.0.0")); // MUST block
try testing.expect(filter.testBlocked("172.31.255.255")); // MUST block
try testing.expect(!filter.testBlocked("172.32.0.0")); // MUST NOT block
// RFC1918 192.168.0.0/16
try testing.expect(filter.testBlocked("192.168.0.1"));
try testing.expect(!filter.testBlocked("192.169.0.0"));
// Link-local
try testing.expect(filter.testBlocked("169.254.1.1"));
try testing.expect(!filter.testBlocked("169.255.0.0"));
// Public IP — must NOT be blocked
try testing.expect(!filter.testBlocked("8.8.8.8"));
try testing.expect(!filter.testBlocked("1.1.1.1"));
try testing.expect(!filter.testBlocked("93.184.216.34")); // example.com
}
test "IpFilter: IPv6 CIDR matching: private group" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("::")); // unspecified
try testing.expect(filter.testBlocked("::1")); // localhost
try testing.expect(filter.testBlocked("fe80::1")); // link-local
try testing.expect(filter.testBlocked("fc00::1")); // ULA
try testing.expect(filter.testBlocked("fd00::1")); // ULA (fd is fc00::/7)
try testing.expect(!filter.testBlocked("2001:db8::1")); // documentation range — public
try testing.expect(!filter.testBlocked("2606:4700::1111")); // Cloudflare
}
test "IpFilter: IPv4-mapped IPv6 bypass prevention" {
const filter = IpFilter.init(true, null);
defer filter.deinit(testing.allocator);
// ::ffff:127.0.0.1 must be blocked (maps to loopback)
try testing.expect(filter.testBlocked("::ffff:127.0.0.1"));
// ::ffff:10.0.0.1 must be blocked (maps to RFC1918)
try testing.expect(filter.testBlocked("::ffff:10.0.0.1"));
// ::ffff:8.8.8.8 must NOT be blocked (maps to public)
try testing.expect(!filter.testBlocked("::ffff:8.8.8.8"));
}
test "IpFilter: fail-closed: unknown address family blocked by isBlockedSockaddr" {
const filter = IpFilter.init(false, null);
defer filter.deinit(testing.allocator);
// Construct a sockaddr with an unknown address family
var sa: libcurl.CurlSockAddr = .{
.family = 255, // not AF_INET or AF_INET6
.socktype = posix.SOCK.STREAM,
.protocol = 0,
.addrlen = 0,
.addr = undefined,
};
try testing.expect(filter.isBlockedSockaddr(&sa));
}
test "IpFilter: custom CIDR ranges" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24");
const filter = IpFilter.init(false, cidrs);
defer filter.deinit(testing.allocator);
try testing.expect(filter.testBlocked("203.0.113.1")); // in custom range
try testing.expect(filter.testBlocked("203.0.113.255")); // in custom range
try testing.expect(!filter.testBlocked("203.0.114.0")); // outside custom range
try testing.expect(!filter.testBlocked("8.8.8.8")); // not in range
}
test "IpFilter: private group blocks cloud metadata IP via link-local" {
// 169.254.169.254 is in link-local (169.254.0.0/16) which is in the private group.
// Users who want targeted cloud-metadata-only blocking can use --block-cidrs.
const filter_private = IpFilter.init(true, null);
defer filter_private.deinit(testing.allocator);
const filter_none = IpFilter.init(false, null);
defer filter_none.deinit(testing.allocator);
try testing.expect(filter_private.testBlocked("169.254.169.254")); // blocked via link-local
try testing.expect(!filter_none.testBlocked("169.254.169.254")); // not blocked when disabled
}
test "IpFilter: parseCidrList: mixed IPv4 and IPv6" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24, 2001:db8::/32, 192.168.1.0/24");
try testing.expectEqual(2, cidrs.v4.len);
try testing.expectEqual(1, cidrs.v6.len);
// spot-check: 203.0.113.0/24 and 192.168.1.0/24
const f = IpFilter.init(false, cidrs);
defer f.deinit(testing.allocator);
try testing.expect(f.testBlocked("203.0.113.1"));
try testing.expect(!f.testBlocked("203.0.114.0"));
try testing.expect(f.testBlocked("192.168.1.1"));
try testing.expect(f.testBlocked("2001:db8::1"));
try testing.expect(!f.testBlocked("2001:db9::1"));
}
test "IpFilter: allow list exempts from private blocking" {
const cidrs = try parseCidrList(testing.allocator, "-10.0.0.42/32,-fc00::1/128");
const filter = IpFilter.init(true, cidrs);
defer filter.deinit(testing.allocator);
// Allowed IPs pass through despite being in private ranges
try testing.expect(!filter.testBlocked("10.0.0.42"));
try testing.expect(!filter.testBlocked("fc00::1"));
// Other private IPs still blocked
try testing.expect(filter.testBlocked("10.0.0.43"));
try testing.expect(filter.testBlocked("10.0.0.41"));
try testing.expect(filter.testBlocked("192.168.1.1"));
try testing.expect(filter.testBlocked("fc00::2"));
}
test "IpFilter: allow list exempts from custom CIDR blocking" {
const cidrs = try parseCidrList(testing.allocator, "203.0.113.0/24,-203.0.113.100/32");
const filter = IpFilter.init(false, cidrs);
defer filter.deinit(testing.allocator);
try testing.expect(!filter.testBlocked("203.0.113.100")); // allowed
try testing.expect(filter.testBlocked("203.0.113.99")); // blocked
try testing.expect(filter.testBlocked("203.0.113.101")); // blocked
}
test "IpFilter: parseCidrList: allow entries with '-' prefix" {
const cidrs = try parseCidrList(testing.allocator, "10.0.0.0/8,-10.0.0.42/32,-fc00::1/128");
try testing.expectEqual(1, cidrs.v4.len);
try testing.expectEqual(0, cidrs.v6.len);
try testing.expectEqual(1, cidrs.allow_v4.len);
try testing.expectEqual(1, cidrs.allow_v6.len);
const f = IpFilter.init(false, cidrs);
defer f.deinit(testing.allocator);
try testing.expect(!f.testBlocked("10.0.0.42")); // allowed
try testing.expect(f.testBlocked("10.0.0.43")); // blocked
try testing.expect(!f.testBlocked("fc00::1")); // allowed (not blocked by custom, but allow-listed)
}
test "IpFilter: parseCidrList: invalid input returns error" {
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "not-a-cidr"));
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/33")); // prefix too large
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0")); // missing prefix
try testing.expectError(error.InvalidCidr, parseCidrList(testing.allocator, "10.0.0.0/abc")); // non-numeric prefix
}
test "IpFilter: matchesCidrV4: exact match /32" {
const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 100 }, 32);
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 100 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 101 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 99 }, cidr));
}
test "IpFilter: matchesCidrV4: /0 matches everything" {
const cidr = CidrV4.fromPrefix(.{ 0, 0, 0, 0 }, 0);
try testing.expect(matchesCidrV4(.{ 0, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 255, 255, 255, 255 }, cidr));
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 1 }, cidr));
}
test "IpFilter: matchesCidrV4: /8 boundary" {
const cidr = CidrV4.fromPrefix(.{ 10, 0, 0, 0 }, 8);
try testing.expect(matchesCidrV4(.{ 10, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 10, 255, 255, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 11, 0, 0, 0 }, cidr));
try testing.expect(!matchesCidrV4(.{ 9, 255, 255, 255 }, cidr));
}
test "IpFilter: matchesCidrV4: /12 boundary (172.16.0.0/12)" {
const cidr = CidrV4.fromPrefix(.{ 172, 16, 0, 0 }, 12);
// In range
try testing.expect(matchesCidrV4(.{ 172, 16, 0, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 172, 31, 255, 255 }, cidr));
try testing.expect(matchesCidrV4(.{ 172, 20, 100, 50 }, cidr));
// Out of range
try testing.expect(!matchesCidrV4(.{ 172, 15, 255, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 172, 32, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV4: /24 network" {
const cidr = CidrV4.fromPrefix(.{ 203, 0, 113, 0 }, 24);
try testing.expect(matchesCidrV4(.{ 203, 0, 113, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 203, 0, 113, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 203, 0, 112, 255 }, cidr));
try testing.expect(!matchesCidrV4(.{ 203, 0, 114, 0 }, cidr));
}
test "IpFilter: matchesCidrV4: non-byte-aligned /25" {
const cidr = CidrV4.fromPrefix(.{ 192, 168, 1, 0 }, 25);
// 192.168.1.0 - 192.168.1.127 should match
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 0 }, cidr));
try testing.expect(matchesCidrV4(.{ 192, 168, 1, 127 }, cidr));
// 192.168.1.128+ should not match
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 128 }, cidr));
try testing.expect(!matchesCidrV4(.{ 192, 168, 1, 255 }, cidr));
}
test "IpFilter: matchesCidrV6: /128 exact match" {
const addr: Ipv6Addr = .{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
const cidr = CidrV6.fromPrefix(addr, 128);
try testing.expect(matchesCidrV6(addr, cidr));
var different = addr;
different[15] = 2;
try testing.expect(!matchesCidrV6(different, cidr));
}
test "IpFilter: matchesCidrV6: /0 matches everything" {
const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0);
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
}
test "IpFilter: matchesCidrV6: /64 boundary" {
// 2001:db8::/64
const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 64);
// In range - any suffix in lower 64 bits
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
// Out of range - different prefix
try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: /48 network" {
// 2001:db8:abcd::/48
const cidr = CidrV6.fromPrefix(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 48);
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
try testing.expect(matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xcd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
try testing.expect(!matchesCidrV6(.{ 0x20, 0x01, 0x0d, 0xb8, 0xab, 0xce, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: /10 link-local (fe80::/10)" {
const cidr = CidrV6.fromPrefix(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 10);
// fe80:: through febf:: should match (first 10 bits: 1111111010)
try testing.expect(matchesCidrV6(.{ 0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0xfe, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, cidr));
// fec0:: should NOT match (11th bit differs)
try testing.expect(!matchesCidrV6(.{ 0xfe, 0xc0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, cidr));
}
test "IpFilter: matchesCidrV6: prefix > 64 bits (/96)" {
// ::ffff:0:0/96 (IPv4-mapped prefix)
const cidr = CidrV6.fromPrefix(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0 }, 96);
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1 }, cidr));
try testing.expect(matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 10, 0, 0, 1 }, cidr));
try testing.expect(!matchesCidrV6(.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xfe, 192, 168, 1, 1 }, cidr));
}
/// Test-only convenience: parse an IP string and check against the filter.
/// Test inputs must be valid IPs; unreachable on parse failure.
fn testBlocked(self: *const IpFilter, ip: []const u8) bool {
if (parseIpv4(ip)) |v4| return self.isBlockedV4(v4);
if (parseIpv6(ip)) |v6| {
if (isIpv4Mapped(v6)) |v4| return self.isBlockedV4(v4);
return self.isBlockedV6(v6);
}
unreachable;
}

View File

@@ -28,6 +28,7 @@ const Config = @import("../Config.zig");
const libcurl = @import("../sys/libcurl.zig");
const http = @import("http.zig");
const IpFilter = @import("IpFilter.zig");
const RobotStore = @import("Robots.zig").RobotStore;
const WebBotAuth = @import("WebBotAuth.zig");
@@ -85,6 +86,9 @@ callbacks: [MAX_TICK_CALLBACKS]TickCallback = undefined,
callbacks_len: usize = 0,
callbacks_mutex: std.Thread.Mutex = .{},
/// Optional IP filter for blocking requests to private/internal networks (--block-private-networks).
ip_filter: ?*IpFilter = null,
const TickCallback = struct {
ctx: *anyopaque,
fun: *const fn (*anyopaque) void,
@@ -230,13 +234,31 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
ca_blob = try loadCerts(allocator);
}
// IP filter for blocking requests to private/internal networks.
const block_private = config.blockPrivateNetworks();
const cidrs: ?IpFilter.Cidrs = blk: {
const s = config.blockCidrs() orelse break :blk null;
break :blk try IpFilter.parseCidrList(allocator, s);
};
const has_cidrs = if (cidrs) |c| c.v4.len > 0 or c.v6.len > 0 or c.allow_v4.len > 0 or c.allow_v6.len > 0 else false;
const ip_filter: ?*IpFilter = blk: {
if (!block_private and !has_cidrs) break :blk null;
const f = try allocator.create(IpFilter);
f.* = IpFilter.init(block_private, cidrs);
break :blk f;
};
errdefer if (ip_filter) |f| {
f.deinit(allocator);
allocator.destroy(f);
};
const count: usize = config.httpMaxConcurrent();
const connections = try allocator.alloc(http.Connection, count);
errdefer allocator.free(connections);
var available: std.DoublyLinkedList = .{};
for (0..count) |i| {
connections[i] = try http.Connection.init(ca_blob, config);
connections[i] = try http.Connection.init(ca_blob, config, ip_filter);
available.append(&connections[i].node);
}
@@ -280,6 +302,8 @@ pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
.ws_pool = .init(allocator),
.ws_max = config.wsMaxConcurrent(),
.ip_filter = ip_filter,
};
}
@@ -316,6 +340,11 @@ pub fn deinit(self: *Network) void {
if (self.cache) |*cache| cache.deinit();
if (self.ip_filter) |f| {
f.deinit(self.allocator);
self.allocator.destroy(f);
}
globalDeinit();
}
@@ -612,7 +641,7 @@ pub fn releaseConnection(self: *Network, conn: *http.Connection) void {
self.ws_count -= 1;
},
else => {
conn.reset(self.config, self.ca_blob) catch |err| {
conn.reset(self.config, self.ca_blob, self.ip_filter) catch |err| {
lp.assert(false, "couldn't reset curl easy", .{ .err = err });
};
self.conn_mutex.lock();
@@ -637,7 +666,7 @@ pub fn newConnection(self: *Network) ?*http.Connection {
};
// don't do this under lock
conn.* = http.Connection.init(self.ca_blob, self.config) catch {
conn.* = http.Connection.init(self.ca_blob, self.config, self.ip_filter) catch {
self.ws_mutex.lock();
defer self.ws_mutex.unlock();
self.ws_pool.destroy(conn);

View File

@@ -17,9 +17,11 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.
const std = @import("std");
const posix = std.posix;
const Config = @import("../Config.zig");
const libcurl = @import("../sys/libcurl.zig");
const IpFilter = @import("IpFilter.zig");
const log = @import("lightpanda").log;
const assert = @import("lightpanda").assert;
@@ -104,7 +106,7 @@ pub const Headers = struct {
// In normal cases, the header iterator comes from the curl linked list.
// But it's also possible to inject a response, via `transfer.fulfill`. In that
// case, the resposne headers are a list, []const Http.Header.
// case, the response headers are a list, []const Http.Header.
// This union, is an iterator that exposes the same API for either case.
pub const HeaderIterator = union(enum) {
curl: CurlHeaderIterator,
@@ -229,6 +231,35 @@ pub const ResponseHead = struct {
}
};
/// Opensocket callback: blocks connections to private/internal IP ranges
/// before TCP SYN, regardless of request origin (JS, HTML resources, redirects, etc.).
/// Called by curl after DNS resolution, before the socket is created.
/// Returns CURL_SOCKET_BAD to block; otherwise creates and returns a real socket fd.
/// clientp is a *const IpFilter passed via CURLOPT_OPENSOCKETDATA.
fn opensocketCallback(
purpose: libcurl.CurlSockType,
address: *libcurl.CurlSockAddr,
clientp: ?*anyopaque,
) libcurl.CurlSocket {
const filter: *const IpFilter = @ptrCast(@alignCast(clientp orelse return libcurl.CURL_SOCKET_BAD));
if (filter.isBlockedSockaddr(address)) {
if (address.family == posix.AF.INET or address.family == posix.AF.INET6) {
const ip = std.net.Address.initPosix(@ptrCast(&address.addr));
log.warn(.http, "blocked by IP filter", .{ .ip = ip });
} else {
log.warn(.http, "blocked by IP filter", .{ .family = address.family });
}
return libcurl.CURL_SOCKET_BAD;
}
_ = purpose; // purpose is informational; we always open the same socket type
const fd = posix.socket(
@intCast(address.family),
@intCast(address.socktype),
@intCast(address.protocol),
) catch return libcurl.CURL_SOCKET_BAD;
return fd;
}
pub const Connection = struct {
_easy: *libcurl.Curl,
transport: Transport,
@@ -240,13 +271,17 @@ pub const Connection = struct {
websocket: *@import("../browser/webapi/net/WebSocket.zig"),
};
pub fn init(ca_blob: ?libcurl.CurlBlob, config: *const Config) !Connection {
pub fn init(
ca_blob: ?libcurl.CurlBlob,
config: *const Config,
ip_filter: ?*const IpFilter,
) !Connection {
const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy;
var self = Connection{ ._easy = easy, .transport = .none };
errdefer self.deinit();
try self.reset(config, ca_blob);
try self.reset(config, ca_blob, ip_filter);
return self;
}
@@ -371,6 +406,7 @@ pub const Connection = struct {
self: *Connection,
config: *const Config,
ca_blob: ?libcurl.CurlBlob,
ip_filter: ?*const IpFilter,
) !void {
libcurl.curl_easy_reset(self._easy);
self.transport = .none;
@@ -421,6 +457,12 @@ pub const Connection = struct {
// try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback);
}
// IP filter: block private/internal network addresses
if (ip_filter) |filter| {
try libcurl.curl_easy_setopt(self._easy, .opensocket_function, opensocketCallback);
try libcurl.curl_easy_setopt(self._easy, .opensocket_data, @constCast(filter));
}
}
fn discardBody(_: [*]const u8, count: usize, len: usize, _: ?*anyopaque) usize {
@@ -603,3 +645,53 @@ fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8,
}
return 0;
}
// ── Unit tests for opensocketCallback ────────────────────────────────────────
fn makeSockAddrV4(ip: [4]u8) libcurl.CurlSockAddr {
var sa: posix.sockaddr.in = .{
.port = 0,
.addr = @bitCast(ip),
};
var curl_sa: libcurl.CurlSockAddr = .{
.family = posix.AF.INET,
.socktype = posix.SOCK.STREAM,
.protocol = 0,
.addrlen = @sizeOf(posix.sockaddr.in),
.addr = undefined,
};
@memcpy(std.mem.asBytes(&curl_sa.addr)[0..@sizeOf(posix.sockaddr.in)], std.mem.asBytes(&sa));
return curl_sa;
}
test "opensocketCallback: private IPv4 returns CURL_SOCKET_BAD" {
const filter = IpFilter.init(true, null);
var sa = makeSockAddrV4(.{ 127, 0, 0, 1 });
const result = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result);
}
test "opensocketCallback: public IPv4 opens a real socket" {
// 8.8.8.8 — not in any blocked range; callback should create a real socket
const filter = IpFilter.init(true, null);
var sa = makeSockAddrV4(.{ 8, 8, 8, 8 });
const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
// A real fd is always >= 0
try std.testing.expect(fd >= 0);
posix.close(fd);
}
test "opensocketCallback: null clientp returns CURL_SOCKET_BAD (fail-closed)" {
var sa = makeSockAddrV4(.{ 8, 8, 8, 8 });
const result = opensocketCallback(.ipcxn, &sa, null);
try std.testing.expectEqual(libcurl.CURL_SOCKET_BAD, result);
}
test "opensocketCallback: block_private=false allows private IP" {
// When block_private is false the filter blocks nothing
const filter = IpFilter.init(false, null);
var sa = makeSockAddrV4(.{ 127, 0, 0, 1 });
const fd = opensocketCallback(.ipcxn, &sa, @ptrCast(@constCast(&filter)));
try std.testing.expect(fd >= 0);
posix.close(fd);
}

View File

@@ -201,7 +201,7 @@ pub fn Reader(comptime EXPECT_MASK: bool) type {
const can_be_fragmented = message_type == .text or message_type == .binary;
if (self.fragments != null and can_be_fragmented) {
// if this isn't a continuation, then we can't have fragments
return error.NestedFragementation;
return error.NestedFragmentation;
}
if (fin == false) {
@@ -446,7 +446,7 @@ pub const WsConnection = struct {
error.InvalidMessageType => self.send(&CLOSE_PROTOCOL_ERROR) catch {},
error.ControlTooLarge => self.send(&CLOSE_PROTOCOL_ERROR) catch {},
error.InvalidContinuation => self.send(&CLOSE_PROTOCOL_ERROR) catch {},
error.NestedFragementation => self.send(&CLOSE_PROTOCOL_ERROR) catch {},
error.NestedFragmentation => self.send(&CLOSE_PROTOCOL_ERROR) catch {},
error.OutOfMemory => {}, // don't borther trying to send an error in this case
}
return err;
@@ -537,7 +537,7 @@ pub const WsConnection = struct {
const alloc = self.send_arena.allocator();
const response = blk: {
// Response to an ugprade request is always this, with
// Response to an upgrade request is always this, with
// the Sec-Websocket-Accept value a spacial sha1 hash of the
// request "sec-websocket-version" and a magic value.

View File

@@ -43,6 +43,23 @@ pub const curl_writefunc_error: usize = c.CURL_WRITEFUNC_ERROR;
pub const curl_readfunc_pause: usize = c.CURL_READFUNC_PAUSE;
pub const CurlReadFunction = fn ([*]u8, usize, usize, *anyopaque) usize;
pub const CurlSockType = enum(c.curlsocktype) {
ipcxn = c.CURLSOCKTYPE_IPCXN,
accept = c.CURLSOCKTYPE_ACCEPT,
};
/// Mirror of curl's struct curl_sockaddr. The addr field is a struct sockaddr
/// inline (not a pointer), so addrlen tells you how many bytes of addr are valid.
pub const CurlSockAddr = extern struct {
family: c_int,
socktype: c_int,
protocol: c_int,
addrlen: c_uint,
addr: std.posix.sockaddr,
};
pub const CURL_SOCKET_BAD: c.curl_socket_t = c.CURL_SOCKET_BAD;
pub const FreeCallback = fn (ptr: ?*anyopaque) void;
pub const StrdupCallback = fn (str: [*:0]const u8) ?[*:0]u8;
pub const MallocCallback = fn (size: usize) ?*anyopaque;
@@ -137,8 +154,17 @@ comptime {
return 0;
}
}.cb;
const opensocket_cb_check: c.curl_opensocket_callback = struct {
fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t {
_ = clientp;
_ = purpose;
_ = address;
return CURL_SOCKET_BAD;
}
}.cb;
_ = debug_cb_check;
_ = write_cb_check;
_ = opensocket_cb_check;
if (@sizeOf(CurlWaitFd) != @sizeOf(c.curl_waitfd)) {
@compileError("CurlWaitFd size mismatch");
@@ -152,6 +178,17 @@ comptime {
if (c.CURL_WAIT_POLLIN != 1 or c.CURL_WAIT_POLLPRI != 2 or c.CURL_WAIT_POLLOUT != 4) {
@compileError("CURL_WAIT_* flag values don't match CurlWaitEvents packed struct bit layout");
}
if (@sizeOf(CurlSockAddr) != @sizeOf(c.curl_sockaddr)) {
@compileError("CurlSockAddr size mismatch with curl_sockaddr");
}
if (@offsetOf(CurlSockAddr, "family") != @offsetOf(c.curl_sockaddr, "family") or
@offsetOf(CurlSockAddr, "socktype") != @offsetOf(c.curl_sockaddr, "socktype") or
@offsetOf(CurlSockAddr, "protocol") != @offsetOf(c.curl_sockaddr, "protocol") or
@offsetOf(CurlSockAddr, "addrlen") != @offsetOf(c.curl_sockaddr, "addrlen") or
@offsetOf(CurlSockAddr, "addr") != @offsetOf(c.curl_sockaddr, "addr"))
{
@compileError("CurlSockAddr layout mismatch with curl_sockaddr");
}
}
pub const CurlOption = enum(c.CURLoption) {
@@ -190,6 +227,8 @@ pub const CurlOption = enum(c.CURLoption) {
read_function = c.CURLOPT_READFUNCTION,
connect_only = c.CURLOPT_CONNECT_ONLY,
upload = c.CURLOPT_UPLOAD,
opensocket_function = c.CURLOPT_OPENSOCKETFUNCTION,
opensocket_data = c.CURLOPT_OPENSOCKETDATA,
};
pub const CurlMOption = enum(c.CURLMoption) {
@@ -620,6 +659,7 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype
.header_data,
.read_data,
.write_data,
.opensocket_data,
=> blk: {
const ptr: ?*anyopaque = switch (@typeInfo(@TypeOf(value))) {
.null => null,
@@ -643,6 +683,20 @@ pub fn curl_easy_setopt(easy: *Curl, comptime option: CurlOption, value: anytype
break :blk c.curl_easy_setopt(easy, opt, cb);
},
.opensocket_function => blk: {
const cb: c.curl_opensocket_callback = switch (@typeInfo(@TypeOf(value))) {
.null => null,
.@"fn" => struct {
fn cb(clientp: ?*anyopaque, purpose: c.curlsocktype, address: [*c]c.curl_sockaddr) callconv(.c) c.curl_socket_t {
const addr: *CurlSockAddr = @ptrCast(address orelse return CURL_SOCKET_BAD);
return value(@enumFromInt(purpose), addr, clientp);
}
}.cb,
else => @compileError("expected Zig function or null for " ++ @tagName(option) ++ ", got " ++ @typeName(@TypeOf(value))),
};
break :blk c.curl_easy_setopt(easy, opt, cb);
},
.header_function => blk: {
const cb: c.curl_write_callback = switch (@typeInfo(@TypeOf(value))) {
.null => null,

View File

@@ -28,7 +28,7 @@ run_mode: Config.RunMode = .serve,
head: std.atomic.Value(usize) = .init(0),
tail: std.atomic.Value(usize) = .init(0),
dropped: std.atomic.Value(usize) = .init(0),
dropped: std.atomic.Value(u32) = .init(0),
buffer: [BUFFER_SIZE]telemetry.Event = undefined,
pub fn init(self: *LightPanda, app: *App, iid: ?[36]u8, run_mode: Config.RunMode) !void {

View File

@@ -103,16 +103,15 @@ pub const Event = union(enum) {
run: void,
navigate: Navigate,
buffer_overflow: BufferOverflow,
flag: []const u8, // used for testing
const Navigate = struct {
tls: bool,
proxy: bool,
driver: []const u8 = "cdp",
driver: enum { cdp } = .cdp,
};
const BufferOverflow = struct {
dropped: usize,
dropped: u32,
};
};
@@ -166,13 +165,13 @@ test "telemetry: sends event to provider" {
telemetry.disabled = false;
const mock = telemetry.provider;
telemetry.record(.{ .flag = "1" });
telemetry.record(.{ .flag = "2" });
telemetry.record(.{ .flag = "3" });
telemetry.record(.{ .buffer_overflow = .{ .dropped = 1 } });
telemetry.record(.{ .buffer_overflow = .{ .dropped = 2 } });
telemetry.record(.{ .buffer_overflow = .{ .dropped = 3 } });
try testing.expectEqual(3, mock.events.items.len);
for (mock.events.items, 0..) |event, i| {
try testing.expectEqual(i + 1, std.fmt.parseInt(usize, event.flag, 10));
try testing.expectEqual(i + 1, event.buffer_overflow.dropped);
}
}