From c7d004fefb17484051d7f9d1e009907e7cbd2e1d Mon Sep 17 00:00:00 2001 From: Nikolay Govorov Date: Thu, 23 Apr 2026 15:37:02 +0100 Subject: [PATCH] Setup timeout via tcp keepalive --- src/Config.zig | 7 +++- src/Server.zig | 73 +++++++++++++++++++++++++++++---------- src/cdp/testing.zig | 2 +- src/network/websocket.zig | 4 +-- 4 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/Config.zig b/src/Config.zig index 4c858975..a78fd14a 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -637,7 +637,12 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ Useful, for example, when --host is 0.0.0.0. \\ Defaults to --host value \\ - \\--timeout Inactivity timeout in seconds before disconnecting clients + \\--timeout Approximate time in seconds after which a dead client + \\ is dropped. Implemented via TCP keepalive: the kernel + \\ probes silent peers and closes the socket if they do + \\ not respond. Does not affect clients that are actively + \\ sending or receiving data, or waiting on a slow page + \\ load. 0 disables keepalive (OS defaults apply). \\ Defaults to 10 (seconds). Limited to 604800 (1 week). \\ \\--cdp-max-connections diff --git a/src/Server.zig b/src/Server.zig index b4c2b119..e9b4355b 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -90,9 +90,49 @@ fn onAccept(ctx: *anyopaque, socket: posix.socket_t) void { }; } +// Liveness is enforced at the TCP layer via keepalive probes sent by the +// kernel. This is transparent to CDP clients (unlike a WebSocket ping, which +// some clients — go-rod panics, chromedp logs spurious "malformed" — handle +// incorrectly). Detection window is roughly `timeout_s`: +// keepidle = timeout_s - keepcnt * keepintvl (clamped >= 1s) +// keepintvl = 2s, keepcnt = 3 → 6s of probe escalation. +// timeout_ms == 0 leaves keepalive off (OS defaults apply — typically 2h). +fn setTcpKeepalive(socket: posix.socket_t, timeout_ms: u32) void { + if (timeout_ms == 0) return; + + posix.setsockopt(socket, posix.SOL.SOCKET, posix.SO.KEEPALIVE, &std.mem.toBytes(@as(c_int, 1))) catch |err| { + log.warn(.app, "SO_KEEPALIVE", .{ .err = err }); + return; + }; + + const keepcnt: u32 = 3; + const keepintvl: u32 = 2; + const timeout_s: u32 = @max(timeout_ms / 1000, 1); + const probe_window: u32 = keepcnt * keepintvl; + const keepidle: u32 = if (timeout_s > probe_window) timeout_s - probe_window else 1; + + if (@hasDecl(posix.TCP, "KEEPIDLE")) { + posix.setsockopt(socket, posix.IPPROTO.TCP, posix.TCP.KEEPIDLE, &std.mem.toBytes(@as(c_int, @intCast(keepidle)))) catch |err| { + log.warn(.app, "TCP_KEEPIDLE", .{ .err = err }); + }; + } + if (@hasDecl(posix.TCP, "KEEPINTVL")) { + posix.setsockopt(socket, posix.IPPROTO.TCP, posix.TCP.KEEPINTVL, &std.mem.toBytes(@as(c_int, @intCast(keepintvl)))) catch |err| { + log.warn(.app, "TCP_KEEPINTVL", .{ .err = err }); + }; + } + if (@hasDecl(posix.TCP, "KEEPCNT")) { + posix.setsockopt(socket, posix.IPPROTO.TCP, posix.TCP.KEEPCNT, &std.mem.toBytes(@as(c_int, @intCast(keepcnt)))) catch |err| { + log.warn(.app, "TCP_KEEPCNT", .{ .err = err }); + }; + } +} + fn handleConnection(self: *Server, socket: posix.socket_t, timeout_ms: u32) void { defer posix.close(socket); + setTcpKeepalive(socket, timeout_ms); + // Client is HUGE (> 512KB) because it has a large read buffer. // V8 crashes if this is on the stack (likely related to its size). const client = self.getClient() catch |err| { @@ -106,7 +146,6 @@ fn handleConnection(self: *Server, socket: posix.socket_t, timeout_ms: u32) void self.allocator, self.app, self.json_version_response, - timeout_ms, ) catch |err| { log.err(.app, "CDP client init", .{ .err = err }); return; @@ -216,9 +255,8 @@ pub const Client = struct { allocator: Allocator, app: *App, json_version_response: []const u8, - timeout_ms: u32, ) !Client { - var ws = try Net.WsConnection.init(socket, allocator, json_version_response, timeout_ms); + var ws = try Net.WsConnection.init(socket, allocator, json_version_response); errdefer ws.deinit(); if (log.enabled(.app, .info)) { @@ -277,15 +315,19 @@ pub const Client = struct { fn httpLoop(self: *Client, http: *HttpClient) !void { lp.assert(self.mode == .http, "Client.httpLoop invalid mode", .{}); + // Liveness is enforced by TCP keepalive configured in + // Server.setTcpKeepalive; the kernel closes dead sockets, which + // surfaces as EOF/error from readSocket. The loop blocks for ~24 days + // on each poll rather than tracking app-level timeouts. Capped at + // i32-max because HttpClient.tick narrows to c_int. + const wait_ms: u32 = std.math.maxInt(i32); + while (true) { - const status = http.tick(self.ws.timeout_ms) catch |err| { + const status = http.tick(wait_ms) catch |err| { log.err(.app, "http tick", .{ .err = err }); return; }; - if (status != .cdp_socket) { - log.info(.app, "CDP timeout", .{}); - return; - } + if (status != .cdp_socket) continue; if (self.readSocket() == false) { return; @@ -297,19 +339,15 @@ pub const Client = struct { } var cdp = &self.mode.cdp; - const timeout_ms = self.ws.timeout_ms; while (true) { - const result = cdp.pageWait(timeout_ms) catch |wait_err| switch (wait_err) { + const result = cdp.pageWait(wait_ms) catch |wait_err| switch (wait_err) { error.NoPage => { - const status = http.tick(timeout_ms) catch |err| { + const status = http.tick(wait_ms) catch |err| { log.err(.app, "http tick", .{ .err = err }); return; }; - if (status != .cdp_socket) { - log.info(.app, "CDP timeout", .{}); - return; - } + if (status != .cdp_socket) continue; if (self.readSocket() == false) { return; } @@ -324,10 +362,7 @@ pub const Client = struct { return; } }, - .done => { - log.info(.app, "CDP timeout", .{}); - return; - }, + .done => {}, } } } diff --git a/src/cdp/testing.zig b/src/cdp/testing.zig index 8be66b43..1838ef39 100644 --- a/src/cdp/testing.zig +++ b/src/cdp/testing.zig @@ -315,7 +315,7 @@ pub fn context() !TestContext { try posix.setsockopt(pair[1], posix.SOL.SOCKET, posix.SO.RCVBUF, &std.mem.toBytes(@as(c_int, 32_768))); try posix.setsockopt(pair[1], posix.SOL.SOCKET, posix.SO.SNDBUF, &std.mem.toBytes(@as(c_int, 32_768))); - const client = try Server.Client.init(pair[1], base.arena_allocator, base.test_app, "json-version", 2000); + const client = try Server.Client.init(pair[1], base.arena_allocator, base.test_app, "json-version"); return .{ .client = client, diff --git a/src/network/websocket.zig b/src/network/websocket.zig index c0d66862..4ecb5daa 100644 --- a/src/network/websocket.zig +++ b/src/network/websocket.zig @@ -319,9 +319,8 @@ pub const WsConnection = struct { reader: Reader(true), send_arena: ArenaAllocator, json_version_response: []const u8, - timeout_ms: u32, - pub fn init(socket: posix.socket_t, allocator: Allocator, json_version_response: []const u8, timeout_ms: u32) !WsConnection { + pub fn init(socket: posix.socket_t, allocator: Allocator, json_version_response: []const u8) !WsConnection { const socket_flags = try posix.fcntl(socket, posix.F.GETFL, 0); const nonblocking = @as(u32, @bitCast(posix.O{ .NONBLOCK = true })); if (builtin.is_test == false) { @@ -337,7 +336,6 @@ pub const WsConnection = struct { .reader = reader, .send_arena = ArenaAllocator.init(allocator), .json_version_response = json_version_response, - .timeout_ms = timeout_ms, }; }