fix use-after-free on robotslayer shutdown

This commit is contained in:
Karl Seguin
2026-05-12 18:47:10 +08:00
parent 7869cbb68c
commit 5e0976bbd6
3 changed files with 134 additions and 85 deletions

View File

@@ -378,10 +378,20 @@ pub fn _request(_: *anyopaque, transfer: *Transfer) !void {
return transfer.client.process(transfer);
}
// Ownership contract: from the moment this function is entered, the
// HttpClient owns `req` — specifically `req.params.headers` (a curl_slist).
// On success, transfer.deinit eventually frees it. On any failure path
// inside this function, we free it before returning the error. Callers
// must NOT pair `request()` with their own `errdefer headers.deinit()`
// — that's a double-free.
pub fn request(self: *Client, req: Request, owner: ?*Owner) !void {
const arena = try self.arena_pool.acquire(.small, "Request.arena");
const arena = self.arena_pool.acquire(.small, "Request.arena") catch |err| {
req.params.headers.deinit();
return err;
};
const transfer = arena.create(Transfer) catch |err| {
req.params.headers.deinit();
self.arena_pool.release(arena);
return err;
};
@@ -390,36 +400,41 @@ pub fn request(self: *Client, req: Request, owner: ?*Owner) !void {
.req = req,
.url = req.params.url,
.client = self,
.owner = owner,
// owner is set AFTER we've actually appended to the owner list,
// so transfer.deinit's `if (self.owner)` branch only fires when
// we're truly linked. Otherwise we'd try to remove a node from
// a list it was never in.
.owner = null,
.arena = arena,
.id = self.incrReqId(),
.start_time = timestamp(.monotonic),
.owner_node = .{},
};
// From here, transfer owns req+arena. Any subsequent failure flows
// through transfer.deinit (or transfer.abort), which handles headers
// via req.deinit. Do NOT free headers directly past this point.
// Register for id-based lookup. putNoClobber would fail if request_id
// collides (i.e. we've wrapped through 2^32 requests and the old
// transfer is still alive — practically never).
self.transfers.putNoClobber(self.allocator, transfer.id, transfer) catch |err| {
self.arena_pool.release(arena);
transfer.deinit();
return err;
};
if (owner) |o| {
o.addTransfer(transfer);
transfer.owner = o;
}
// From this point forward, the transfer owns `req` and `arena`. If the
// layer chain fails before any layer commits the transfer to an external
// owner (queue / multi handle / pending interception), we clean up here.
// `loop_owned` is set by the commit points; if it stays false on failure,
// no one else will deinit the transfer, so we do it. requestFailed goes
// through transfer.req — so any layer wrappers see the failure too — and
// latches _notified_fail to keep the notification single-fire.
// owner (queue / multi handle / pending interception), we clean up here
// via transfer.abort which fires error_callback and deinits.
self.entry_layer.request(transfer) catch |err| {
if (!transfer.loop_owned) {
transfer.requestFailed(err, true);
transfer.deinit();
transfer.abort(err);
}
return err;
};
@@ -567,10 +582,10 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer
if (transfer.req.start_callback) |cb| {
cb(Response.fromTransfer(transfer)) catch |err| {
// We're now committed to the multi. Tear down explicitly: deinit
// (which removeConn's from the multi) and notify the caller.
transfer.requestFailed(err, true);
transfer.deinit();
// We're now committed to the multi. transfer.abort fires the
// error_callback and tears down (removeConn handles the
// already-in-multi case via the dirty queue).
transfer.abort(err);
return err;
};
}
@@ -1115,73 +1130,97 @@ pub const Transfer = struct {
arena_pool.release(arena);
}
// Cancel this transfer with `err`. Fires error_callback once (latched
// via _notified_fail), then either deinits synchronously or, if we're
// mid-perform with a libcurl handle still in the multi, detaches and
// lets the natural processOneMessage flow deinit later.
//
// This is the ONE entry point external callers should use to cancel
// a transfer. Don't reach for kill() or requestFailed() directly —
// they're internal helpers.
pub fn abort(self: *Transfer, err: anyerror) void {
self.requestFailed(err, true);
if (self._performing or self.client.performing) {
// We're currently in a curl_multi_perform. We cannot call
// curl_multi_remove_handle from a curl callback. Instead, we flag
// this transfer and our callbacks will check for this flag.
self.aborted = true;
return;
}
self.deinit();
self.detachOrDeinit();
}
pub fn terminate(self: *Transfer) void {
self.requestFailed(error.Shutdown, false);
self.deinit();
}
// internal, when the frame is shutting down. Doesn't have the same ceremony
// as abort (doesn't send a notification, doesn't invoke an error callback)
// Owner-driven teardown: fires shutdown_callback (not error_callback)
// and otherwise behaves like abort. Called by Client.abortOwner /
// abortRequests when a Frame / WGS is being torn down.
fn kill(self: *Transfer) void {
if (self.req.shutdown_callback) |cb| {
cb(self.req.ctx);
}
if (self._performing or self.client.performing) {
// We're currently inside of a callback. This client, and libcurl
// generally don't expect a transfer to become deinitialized during
// a callback. We can flag the transfer as aborted (which is what
// we do when transfer.abort() is called in this condition) AND,
// since this "kill()"should prevent any future callbacks, the best
// we can do is null/noop them.
self.aborted = true;
self.req.start_callback = null;
self.req.shutdown_callback = null;
self.req.header_callback = Noop.headerCallback;
self.req.data_callback = Noop.dataCallback;
self.req.done_callback = Noop.doneCallback;
self.req.error_callback = Noop.errorCallback;
// Detach from the owner list NOW. The transfer will finish its
// curl lifecycle and be deinit'd later from processOneMessage,
// but by then the owner (Frame / WGS) may have been freed —
// attempting o.transfers.remove(&owner_node) against a dangling
// list would UAF. Leaving the transfer linked also breaks the
// invariant that an owner can be torn down once its list walks
// clean. Clear `owner` so transfer.deinit doesn't try again.
if (self.owner) |o| {
o.removeTransfer(self);
self.owner = null;
}
return;
}
self.deinit();
self.detachOrDeinit();
}
// The single failure-notification point. Latches via _notified_fail so
// multiple paths racing to report the same failure (e.g. an error inside
// a callback that's already being handled by the loop) only fire one
// Decide whether to tear down now or defer until processOneMessage
// eventually drains the in-flight curl handle.
//
// Two cases force deferral:
// * `_performing` — processOneMessage is currently processing THIS
// transfer (set/cleared around the callback chain). It will call
// `transfer.deinit` itself after the chain returns; deiniting
// here would double-free. Note that `_conn` is cleared partway
// through this window (the "release conn ASAP" step before
// done_callback fires), so we cannot rely on `_conn != null`.
// * `client.performing` + we have a libcurl handle — libcurl could
// still fire callbacks for us. Releasing the arena now would UAF
// from inside curl.
//
// Otherwise (parked / queued / never-trackConn'd / fully drained),
// there is nothing left referencing this transfer and we can safely
// deinit inline even from inside a perform callback.
fn detachOrDeinit(self: *Transfer) void {
const must_defer = self._performing or
(self.client.performing and self._conn != null);
if (must_defer) {
self.detachInPerform();
} else {
self.deinit();
}
}
// Deferred-cleanup path when we can't synchronously deinit.
//
// We:
// - flag `aborted` so processOneMessage's normal-completion paths
// short-circuit when they next see this transfer,
// - noop every user callback so libcurl naturally draining the
// in-flight response can't re-enter user code,
// - unlink from owner.transfers and clear `owner` so the owning
// Frame/WGS can be freed while this transfer is still draining.
// transfer.deinit (called later by processOneMessage) sees
// `owner == null` and skips the list-remove that would otherwise
// UAF against a freed list.
fn detachInPerform(self: *Transfer) void {
self.aborted = true;
self.req.start_callback = null;
self.req.shutdown_callback = null;
self.req.header_callback = Noop.headerCallback;
self.req.data_callback = Noop.dataCallback;
self.req.done_callback = Noop.doneCallback;
self.req.error_callback = Noop.errorCallback;
if (self.owner) |o| {
o.removeTransfer(self);
self.owner = null;
}
}
// Internal failure-notification helper. Latches via _notified_fail so
// multiple paths racing to report the same failure only fire one
// notification. Goes through transfer.req — so layer wrappers
// (InterceptContext, CacheContext) see the failure and can propagate it
// up the chain.
// (InterceptContext, CacheContext) see the failure and can propagate
// it up the chain.
//
// Not part of the external API: callers cancelling a transfer should
// use transfer.abort(err) instead, which goes through this and also
// handles the deinit / detach side. The internal HttpClient flow uses
// this directly (from processOneMessage) because it's already paired
// with the natural processMessages → transfer.deinit handoff.
//
// execute_callback=true → fires error_callback. false → fires
// shutdown_callback (used by Frame shutdown / WGS teardown).
pub fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void {
fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void {
if (self._notified_fail) return;
self._notified_fail = true;

View File

@@ -205,8 +205,7 @@ pub fn continueRequest(self: *InterceptionLayer, transfer: *Transfer) anyerror!v
transfer.loop_owned = false;
self.next.request(transfer) catch |err| {
if (!transfer.loop_owned) {
transfer.requestFailed(err, true);
transfer.deinit();
transfer.abort(err);
}
return err;
};
@@ -218,9 +217,7 @@ pub fn abortRequest(self: *InterceptionLayer, transfer: *Transfer) void {
log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted });
}
self.intercepted -= 1;
transfer.requestFailed(error.Abort, true);
transfer.deinit();
transfer.abort(error.Abort);
}
pub fn fulfillRequest(
@@ -235,7 +232,6 @@ pub fn fulfillRequest(
log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted });
}
self.intercepted -= 1;
defer transfer.deinit();
// `done` flips true once we've called the user's done_callback. If
// done_callback itself throws, the user already saw their end-of-flow
@@ -243,10 +239,13 @@ pub fn fulfillRequest(
var done: bool = false;
fulfillInner(&transfer.req, status, headers, body, &done) catch |err| {
if (!done) {
transfer.requestFailed(err, true);
transfer.abort(err);
} else {
transfer.deinit();
}
return err;
};
transfer.deinit();
}
fn fulfillInner(

View File

@@ -110,10 +110,20 @@ fn fetchRobotsThenRequest(
@compileError("expected request.params to be a struct");
}
// CRITICAL: build a fresh Headers for the inner robots fetch.
// params is value-copied from the parent's req.params, but
// Headers is a struct wrapping a *curl_slist — value copy shares
// the pointer. Letting Client.request take ownership of a shared
// headers list means both transfers will free it at deinit time
// -> double-free. The robots.txt fetch is a system-level GET
// anyway, no need to inherit the parent's user headers.
params.headers = try transfer.client.newHeaders();
errdefer params.headers.deinit();
params.method = .GET;
params.url = robots_url;
params.skip_robots = true;
params.resource_type = .fetch;
params.body = null;
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
try transfer.client.request(.{
@@ -142,8 +152,7 @@ fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) voi
for (queued.value.items) |transfer| {
if (!allowed) {
log.warn(.http, "blocked by robots", .{ .url = transfer.url });
transfer.requestFailed(error.RobotsBlocked, true);
transfer.deinit();
transfer.abort(error.RobotsBlocked);
} else {
// Reset ownership: handing back to the layer chain. If a downstream
// layer commits (multi / queue / pause), it'll flip loop_owned back
@@ -151,24 +160,26 @@ fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) voi
transfer.loop_owned = false;
self.next.request(transfer) catch |e| {
if (!transfer.loop_owned) {
transfer.requestFailed(e, true);
transfer.deinit();
transfer.abort(e);
}
};
}
}
}
// Invariant: shutdown_callback fires on a Transfer only via Transfer.kill,
// and the only callers of kill are Client.abortOwner / .abortRequests
// (owner-driven teardown). So if THIS robots fetch's shutdown_callback
// fired, the owner is being torn down — every parked transfer in this
// pending queue is on the same owner list and is already being killed by
// the same walk. We just need to drop the pending entry; the owner walk
// handles the rest. (If a future code path adds per-transfer kill
// without owner teardown, this assumption breaks — see comment above
// detachOrDeinit in HttpClient.zig.)
fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void {
var pending = self.pending.fetchRemove(robots_url) orelse
@panic("RobotsLayer.flushPendingShutdown: missing queue");
defer pending.value.deinit(self.allocator);
for (pending.value.items) |transfer| {
// execute_callback=false → fires shutdown_callback (not error_callback).
transfer.requestFailed(error.Shutdown, false);
transfer.deinit();
}
pending.value.deinit(self.allocator);
}
const RobotsContext = struct {