From 5e0976bbd610ef6b212a2fb5011c46900fee4bcb Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Tue, 12 May 2026 18:47:10 +0800 Subject: [PATCH] fix use-after-free on robotslayer shutdown --- src/browser/HttpClient.zig | 173 +++++++++++++++--------- src/network/layer/InterceptionLayer.zig | 13 +- src/network/layer/RobotsLayer.zig | 33 +++-- 3 files changed, 134 insertions(+), 85 deletions(-) diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index ad67c8f7..bd0f3bd6 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -378,10 +378,20 @@ pub fn _request(_: *anyopaque, transfer: *Transfer) !void { return transfer.client.process(transfer); } +// Ownership contract: from the moment this function is entered, the +// HttpClient owns `req` — specifically `req.params.headers` (a curl_slist). +// On success, transfer.deinit eventually frees it. On any failure path +// inside this function, we free it before returning the error. Callers +// must NOT pair `request()` with their own `errdefer headers.deinit()` +// — that's a double-free. pub fn request(self: *Client, req: Request, owner: ?*Owner) !void { - const arena = try self.arena_pool.acquire(.small, "Request.arena"); + const arena = self.arena_pool.acquire(.small, "Request.arena") catch |err| { + req.params.headers.deinit(); + return err; + }; const transfer = arena.create(Transfer) catch |err| { + req.params.headers.deinit(); self.arena_pool.release(arena); return err; }; @@ -390,36 +400,41 @@ pub fn request(self: *Client, req: Request, owner: ?*Owner) !void { .req = req, .url = req.params.url, .client = self, - .owner = owner, + // owner is set AFTER we've actually appended to the owner list, + // so transfer.deinit's `if (self.owner)` branch only fires when + // we're truly linked. Otherwise we'd try to remove a node from + // a list it was never in. + .owner = null, .arena = arena, .id = self.incrReqId(), .start_time = timestamp(.monotonic), .owner_node = .{}, }; + // From here, transfer owns req+arena. Any subsequent failure flows + // through transfer.deinit (or transfer.abort), which handles headers + // via req.deinit. Do NOT free headers directly past this point. + // Register for id-based lookup. putNoClobber would fail if request_id // collides (i.e. we've wrapped through 2^32 requests and the old // transfer is still alive — practically never). self.transfers.putNoClobber(self.allocator, transfer.id, transfer) catch |err| { - self.arena_pool.release(arena); + transfer.deinit(); return err; }; if (owner) |o| { o.addTransfer(transfer); + transfer.owner = o; } // From this point forward, the transfer owns `req` and `arena`. If the // layer chain fails before any layer commits the transfer to an external - // owner (queue / multi handle / pending interception), we clean up here. - // `loop_owned` is set by the commit points; if it stays false on failure, - // no one else will deinit the transfer, so we do it. requestFailed goes - // through transfer.req — so any layer wrappers see the failure too — and - // latches _notified_fail to keep the notification single-fire. + // owner (queue / multi handle / pending interception), we clean up here + // via transfer.abort which fires error_callback and deinits. self.entry_layer.request(transfer) catch |err| { if (!transfer.loop_owned) { - transfer.requestFailed(err, true); - transfer.deinit(); + transfer.abort(err); } return err; }; @@ -567,10 +582,10 @@ fn makeRequest(self: *Client, conn: *http.Connection, transfer: *Transfer) anyer if (transfer.req.start_callback) |cb| { cb(Response.fromTransfer(transfer)) catch |err| { - // We're now committed to the multi. Tear down explicitly: deinit - // (which removeConn's from the multi) and notify the caller. - transfer.requestFailed(err, true); - transfer.deinit(); + // We're now committed to the multi. transfer.abort fires the + // error_callback and tears down (removeConn handles the + // already-in-multi case via the dirty queue). + transfer.abort(err); return err; }; } @@ -1115,73 +1130,97 @@ pub const Transfer = struct { arena_pool.release(arena); } + // Cancel this transfer with `err`. Fires error_callback once (latched + // via _notified_fail), then either deinits synchronously or, if we're + // mid-perform with a libcurl handle still in the multi, detaches and + // lets the natural processOneMessage flow deinit later. + // + // This is the ONE entry point external callers should use to cancel + // a transfer. Don't reach for kill() or requestFailed() directly — + // they're internal helpers. pub fn abort(self: *Transfer, err: anyerror) void { self.requestFailed(err, true); - - if (self._performing or self.client.performing) { - // We're currently in a curl_multi_perform. We cannot call - // curl_multi_remove_handle from a curl callback. Instead, we flag - // this transfer and our callbacks will check for this flag. - self.aborted = true; - return; - } - - self.deinit(); + self.detachOrDeinit(); } - pub fn terminate(self: *Transfer) void { - self.requestFailed(error.Shutdown, false); - self.deinit(); - } - - // internal, when the frame is shutting down. Doesn't have the same ceremony - // as abort (doesn't send a notification, doesn't invoke an error callback) + // Owner-driven teardown: fires shutdown_callback (not error_callback) + // and otherwise behaves like abort. Called by Client.abortOwner / + // abortRequests when a Frame / WGS is being torn down. fn kill(self: *Transfer) void { if (self.req.shutdown_callback) |cb| { cb(self.req.ctx); } - - if (self._performing or self.client.performing) { - // We're currently inside of a callback. This client, and libcurl - // generally don't expect a transfer to become deinitialized during - // a callback. We can flag the transfer as aborted (which is what - // we do when transfer.abort() is called in this condition) AND, - // since this "kill()"should prevent any future callbacks, the best - // we can do is null/noop them. - self.aborted = true; - self.req.start_callback = null; - self.req.shutdown_callback = null; - self.req.header_callback = Noop.headerCallback; - self.req.data_callback = Noop.dataCallback; - self.req.done_callback = Noop.doneCallback; - self.req.error_callback = Noop.errorCallback; - // Detach from the owner list NOW. The transfer will finish its - // curl lifecycle and be deinit'd later from processOneMessage, - // but by then the owner (Frame / WGS) may have been freed — - // attempting o.transfers.remove(&owner_node) against a dangling - // list would UAF. Leaving the transfer linked also breaks the - // invariant that an owner can be torn down once its list walks - // clean. Clear `owner` so transfer.deinit doesn't try again. - if (self.owner) |o| { - o.removeTransfer(self); - self.owner = null; - } - return; - } - - self.deinit(); + self.detachOrDeinit(); } - // The single failure-notification point. Latches via _notified_fail so - // multiple paths racing to report the same failure (e.g. an error inside - // a callback that's already being handled by the loop) only fire one + // Decide whether to tear down now or defer until processOneMessage + // eventually drains the in-flight curl handle. + // + // Two cases force deferral: + // * `_performing` — processOneMessage is currently processing THIS + // transfer (set/cleared around the callback chain). It will call + // `transfer.deinit` itself after the chain returns; deiniting + // here would double-free. Note that `_conn` is cleared partway + // through this window (the "release conn ASAP" step before + // done_callback fires), so we cannot rely on `_conn != null`. + // * `client.performing` + we have a libcurl handle — libcurl could + // still fire callbacks for us. Releasing the arena now would UAF + // from inside curl. + // + // Otherwise (parked / queued / never-trackConn'd / fully drained), + // there is nothing left referencing this transfer and we can safely + // deinit inline even from inside a perform callback. + fn detachOrDeinit(self: *Transfer) void { + const must_defer = self._performing or + (self.client.performing and self._conn != null); + if (must_defer) { + self.detachInPerform(); + } else { + self.deinit(); + } + } + + // Deferred-cleanup path when we can't synchronously deinit. + // + // We: + // - flag `aborted` so processOneMessage's normal-completion paths + // short-circuit when they next see this transfer, + // - noop every user callback so libcurl naturally draining the + // in-flight response can't re-enter user code, + // - unlink from owner.transfers and clear `owner` so the owning + // Frame/WGS can be freed while this transfer is still draining. + // transfer.deinit (called later by processOneMessage) sees + // `owner == null` and skips the list-remove that would otherwise + // UAF against a freed list. + fn detachInPerform(self: *Transfer) void { + self.aborted = true; + self.req.start_callback = null; + self.req.shutdown_callback = null; + self.req.header_callback = Noop.headerCallback; + self.req.data_callback = Noop.dataCallback; + self.req.done_callback = Noop.doneCallback; + self.req.error_callback = Noop.errorCallback; + if (self.owner) |o| { + o.removeTransfer(self); + self.owner = null; + } + } + + // Internal failure-notification helper. Latches via _notified_fail so + // multiple paths racing to report the same failure only fire one // notification. Goes through transfer.req — so layer wrappers - // (InterceptContext, CacheContext) see the failure and can propagate it - // up the chain. + // (InterceptContext, CacheContext) see the failure and can propagate + // it up the chain. + // + // Not part of the external API: callers cancelling a transfer should + // use transfer.abort(err) instead, which goes through this and also + // handles the deinit / detach side. The internal HttpClient flow uses + // this directly (from processOneMessage) because it's already paired + // with the natural processMessages → transfer.deinit handoff. // // execute_callback=true → fires error_callback. false → fires // shutdown_callback (used by Frame shutdown / WGS teardown). - pub fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void { + fn requestFailed(self: *Transfer, err: anyerror, comptime execute_callback: bool) void { if (self._notified_fail) return; self._notified_fail = true; diff --git a/src/network/layer/InterceptionLayer.zig b/src/network/layer/InterceptionLayer.zig index 5ce9e3b5..6fc5c9bc 100644 --- a/src/network/layer/InterceptionLayer.zig +++ b/src/network/layer/InterceptionLayer.zig @@ -205,8 +205,7 @@ pub fn continueRequest(self: *InterceptionLayer, transfer: *Transfer) anyerror!v transfer.loop_owned = false; self.next.request(transfer) catch |err| { if (!transfer.loop_owned) { - transfer.requestFailed(err, true); - transfer.deinit(); + transfer.abort(err); } return err; }; @@ -218,9 +217,7 @@ pub fn abortRequest(self: *InterceptionLayer, transfer: *Transfer) void { log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; - - transfer.requestFailed(error.Abort, true); - transfer.deinit(); + transfer.abort(error.Abort); } pub fn fulfillRequest( @@ -235,7 +232,6 @@ pub fn fulfillRequest( log.debug(.http, "fulfill transfer", .{ .intercepted = self.intercepted }); } self.intercepted -= 1; - defer transfer.deinit(); // `done` flips true once we've called the user's done_callback. If // done_callback itself throws, the user already saw their end-of-flow @@ -243,10 +239,13 @@ pub fn fulfillRequest( var done: bool = false; fulfillInner(&transfer.req, status, headers, body, &done) catch |err| { if (!done) { - transfer.requestFailed(err, true); + transfer.abort(err); + } else { + transfer.deinit(); } return err; }; + transfer.deinit(); } fn fulfillInner( diff --git a/src/network/layer/RobotsLayer.zig b/src/network/layer/RobotsLayer.zig index 320f3423..710884e9 100644 --- a/src/network/layer/RobotsLayer.zig +++ b/src/network/layer/RobotsLayer.zig @@ -110,10 +110,20 @@ fn fetchRobotsThenRequest( @compileError("expected request.params to be a struct"); } + // CRITICAL: build a fresh Headers for the inner robots fetch. + // params is value-copied from the parent's req.params, but + // Headers is a struct wrapping a *curl_slist — value copy shares + // the pointer. Letting Client.request take ownership of a shared + // headers list means both transfers will free it at deinit time + // -> double-free. The robots.txt fetch is a system-level GET + // anyway, no need to inherit the parent's user headers. + params.headers = try transfer.client.newHeaders(); + errdefer params.headers.deinit(); params.method = .GET; params.url = robots_url; params.skip_robots = true; params.resource_type = .fetch; + params.body = null; log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url }); try transfer.client.request(.{ @@ -142,8 +152,7 @@ fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) voi for (queued.value.items) |transfer| { if (!allowed) { log.warn(.http, "blocked by robots", .{ .url = transfer.url }); - transfer.requestFailed(error.RobotsBlocked, true); - transfer.deinit(); + transfer.abort(error.RobotsBlocked); } else { // Reset ownership: handing back to the layer chain. If a downstream // layer commits (multi / queue / pause), it'll flip loop_owned back @@ -151,24 +160,26 @@ fn flushPending(self: *RobotsLayer, robots_url: [:0]const u8, allowed: bool) voi transfer.loop_owned = false; self.next.request(transfer) catch |e| { if (!transfer.loop_owned) { - transfer.requestFailed(e, true); - transfer.deinit(); + transfer.abort(e); } }; } } } +// Invariant: shutdown_callback fires on a Transfer only via Transfer.kill, +// and the only callers of kill are Client.abortOwner / .abortRequests +// (owner-driven teardown). So if THIS robots fetch's shutdown_callback +// fired, the owner is being torn down — every parked transfer in this +// pending queue is on the same owner list and is already being killed by +// the same walk. We just need to drop the pending entry; the owner walk +// handles the rest. (If a future code path adds per-transfer kill +// without owner teardown, this assumption breaks — see comment above +// detachOrDeinit in HttpClient.zig.) fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void { var pending = self.pending.fetchRemove(robots_url) orelse @panic("RobotsLayer.flushPendingShutdown: missing queue"); - defer pending.value.deinit(self.allocator); - - for (pending.value.items) |transfer| { - // execute_callback=false → fires shutdown_callback (not error_callback). - transfer.requestFailed(error.Shutdown, false); - transfer.deinit(); - } + pending.value.deinit(self.allocator); } const RobotsContext = struct {