Fix Network dropping CDP sockets from its poll set once a multi exists

preparePollFds cleared and rebuilt the curl portion of `pollfds` every
loop iteration, but sliced `pollfds[PSEUDO_POLLFDS..]` — all the way to
the end of the array. That range also covers the CDP socket region
`[cdp_start..]`, which prepareCdpPollFds owns and only rebuilds when
`cdp_dirty` is set (a steady-state optimization). So the @memset wiped
every live CDP socket fd to -1 on each iteration.

This only bites once Network owns a curl multi handle, which is created
solely by telemetry — and telemetry is disabled in Debug builds, which
is why it reproduced only in ReleaseFast/ReleaseSafe (and the nightly).
Regular HTTP/navigation runs on the worker's own handles, not Network's
multi, so it never triggered the path in Debug.

Once the CDP sockets are dropped from the poll set, the Network thread
stops reading client messages (#2508, hard stall after the first
command) and never observes peer EOF or `conn.shutdown`, so the worker
is never told to exit and SIGTERM is ignored after a connection (#2507).

Fix: slice only the curl region `[PSEUDO_POLLFDS..cdp_start]`.

Also harden the poll timeout: `curl_multi_timeout` returns -1 when curl
is idle, and `@min(250, -1)` is -1 (block forever), which starved
onTick (telemetry's periodic flush) and turned any missed wakeup into a
permanent hang rather than a <=250ms blip. Treat curl_timeout <= 0 as
"no deadline" and fall back to the 250ms cap.

Fixes #2507
Fixes #2508
This commit is contained in:
Navid EMAD
2026-05-20 19:13:09 +02:00
parent f1b0adf923
commit e1e49c8a2e

View File

@@ -722,8 +722,15 @@ pub fn run(self: *Network) void {
break :blk min_timeout;
}
// curl_multi_timeout reports -1 when curl has no timeout
// preference (idle) and 0 when it wants to be serviced
// immediately. Treat both as "no curl-imposed deadline" and
// fall back to min_timeout — otherwise @min(min_timeout, -1)
// would be -1, i.e. poll() blocks forever, starving onTick
// (telemetry's periodic flush) and removing the safety net
// that bounds any missed wakeup to min_timeout.
const curl_timeout = self.getCurlTimeout();
if (curl_timeout == 0) {
if (curl_timeout <= 0) {
break :blk min_timeout;
}
@@ -860,7 +867,16 @@ fn acceptConnections(self: *Network) void {
}
fn preparePollFds(self: *Network, multi: *libcurl.CurlM) void {
const curl_fds = self.pollfds[PSEUDO_POLLFDS..];
// Only the curl slice — NOT through to the end of pollfds. The CDP
// socket fds live in [cdp_start..] and are owned by
// prepareCdpPollFds, which only rebuilds them when cdp_dirty is set
// (a steady-state optimization). Slicing to the end here would
// @memset those fds to -1 every iteration once a multi exists (which
// happens as soon as telemetry sends its first request), silently
// dropping every live CDP socket from the poll set — Network then
// never reads another CDP message (#2508) nor observes peer
// EOF/shutdown (#2507).
const curl_fds = self.pollfds[PSEUDO_POLLFDS..self.cdp_start];
@memset(curl_fds, .{ .fd = -1, .events = 0, .revents = 0 });
var fd_count: c_uint = 0;
@@ -1053,3 +1069,40 @@ fn loadCerts(allocator: Allocator) !libcurl.CurlBlob {
.flags = 0,
};
}
const testing = @import("../testing.zig");
test "Network: preparePollFds leaves the CDP fd region untouched" {
// Regression for #2507 / #2508. Once a multi exists (telemetry creates
// one in optimized builds), preparePollFds runs every loop iteration.
// It rebuilds only the curl slice [PSEUDO_POLLFDS..cdp_start]; the CDP
// region [cdp_start..] is owned by prepareCdpPollFds, which keeps its
// entries across iterations and only rebuilds when cdp_dirty is set.
// A slice that ran to the end of pollfds @memset those CDP sockets to
// -1, silently dropping every live CDP connection from the poll set —
// so Network stopped reading CDP messages (#2508) and never observed
// peer EOF/shutdown (#2507). curl global is initialized by the test
// harness (App.init -> Network.init).
const multi = libcurl.curl_multi_init() orelse return error.FailedToInitMulti;
defer libcurl.curl_multi_cleanup(multi) catch {};
const curl_slots = 4;
const cdp_slots = 3;
var pollfds: [PSEUDO_POLLFDS + curl_slots + cdp_slots]posix.pollfd = undefined;
@memset(&pollfds, .{ .fd = -1, .events = 0, .revents = 0 });
// preparePollFds only reads self.pollfds and self.cdp_start.
var nw: Network = undefined;
nw.pollfds = &pollfds;
nw.cdp_start = PSEUDO_POLLFDS + curl_slots;
// Two live CDP sockets parked in the CDP region, mimicking the steady
// state between cdp_dirty rebuilds.
pollfds[nw.cdp_start] = .{ .fd = 4242, .events = posix.POLL.IN, .revents = 0 };
pollfds[nw.cdp_start + 1] = .{ .fd = 4243, .events = posix.POLL.IN, .revents = 0 };
nw.preparePollFds(multi);
try testing.expectEqual(@as(posix.fd_t, 4242), pollfds[nw.cdp_start].fd);
try testing.expectEqual(@as(posix.fd_t, 4243), pollfds[nw.cdp_start + 1].fd);
}