feat: add --json to fetch command

The `fetch` command is very practical to render pages without needing to
have a long running browser instance.
It is however masking all details on the fetch, most importantly the HTTP status code.
This is a big caveat when leveraging `lightpanda fetch` in a pipeline.

This introduces a `--json` option to provide a structured output that
contains:
* url
* HTTP status code
* response headers
* rendered content as controlled by the `--dump` option

The proposal is to always output the same JSON format even when not
using `--dump` with an option.
This commit is contained in:
Marc Helbling
2026-05-19 12:08:18 +02:00
parent bdd456f76c
commit a89a28a4a2
4 changed files with 184 additions and 26 deletions

View File

@@ -192,6 +192,7 @@ const Commands = cli.Builder(.{
},
},
.{ .name = "terminate_ms", .type = ?u32 },
.{ .name = "json", .type = bool },
},
.shared_options = CommonOptions,
},

View File

@@ -256,6 +256,8 @@ _parent_notified: bool = false,
_type: enum { root, frame }, // only used for logs right now
_req_id: u32 = 0,
_navigated_options: ?NavigatedOpts = null,
_http_status: ?u16 = null,
_http_headers: std.StringArrayHashMapUnmanaged([]const u8) = .empty,
pub fn init(self: *Frame, frame_id: u32, page: *Page, parent: ?*Frame) !void {
if (comptime IS_DEBUG) {
@@ -447,6 +449,20 @@ pub fn getTitle(self: *Frame) !?[]const u8 {
return null;
}
pub const HttpMetadata = struct {
url: [:0]const u8,
status: ?u16,
headers: std.StringArrayHashMapUnmanaged([]const u8),
};
pub fn httpMetadata(self: *const Frame) HttpMetadata {
return .{
.url = self.url,
.status = self._http_status,
.headers = self._http_headers,
};
}
// Add common headers for a request:
// * referer
pub fn headersForRequest(self: *Frame, headers: *HttpClient.Headers) !void {
@@ -608,6 +624,9 @@ pub fn navigate(self: *Frame, request_url: [:0]const u8, opts: NavigateOpts) !vo
const http_client = &session.browser.http_client;
self._http_status = null;
self._http_headers = .empty;
self.url = try self.arena.dupeZ(u8, request_url);
self.origin = try URL.getOrigin(self.arena, self.url);
@@ -1021,6 +1040,14 @@ fn frameHeaderDoneCallback(response: HttpClient.Response) !bool {
});
}
self._http_status = response.status();
var it = response.headerIterator();
while (it.next()) |hdr| {
const name = try self.arena.dupe(u8, hdr.name);
const value = try self.arena.dupe(u8, hdr.value);
try self._http_headers.put(self.arena, name, value);
}
if (self._navigated_options) |no| {
// _navigated_options will be null in special short-circuit cases, like
// "navigating" to about:blank, in which case this notification has
@@ -4149,3 +4176,21 @@ test "Page: isSameOrigin" {
try testing.expectEqual(false, frame.isSameOrigin("not-a-url"));
try testing.expectEqual(false, frame.isSameOrigin("//origin.com/foo"));
}
test "Frame: httpMetadata after navigation" {
const frame = try testing.pageTest("page/meta.html", .{});
defer testing.test_session.removePage();
const meta = frame.httpMetadata();
try testing.expect(meta.status != null);
try std.testing.expectEqual(@as(u16, 200), meta.status.?);
try testing.expect(meta.headers.count() > 0);
try testing.expect(meta.url.len > 0);
}
test "Frame: httpMetadata 404" {
const frame = try testing.pageTest("nonexistent_page_xyz.html", .{});
defer testing.test_session.removePage();
const meta = frame.httpMetadata();
try testing.expect(meta.status != null);
try std.testing.expectEqual(@as(u16, 404), meta.status.?);
}

View File

@@ -60,6 +60,7 @@ pub const FetchOpts = struct {
dump: dump.Opts,
dump_mode: ?Config.DumpFormat = null,
writer: ?*std.Io.Writer = null,
json: bool = false,
};
pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !void {
const notification = try Notification.init(app.allocator);
@@ -157,38 +158,101 @@ pub fn fetch(app: *App, browser: *Browser, url: [:0]const u8, opts: FetchOpts) !
}
const writer = opts.writer orelse return;
if (opts.dump_mode) |mode| blk: {
const frame = session.currentFrame() orelse {
try writer.writeAll("Frame closed. Please open a bug report including the URL\n");
break :blk;
};
switch (mode) {
.html => try dump.root(frame.window._document, opts.dump, writer, frame),
.markdown => try markdown.dump(frame.window._document.asNode(), .{}, writer, frame),
.semantic_tree, .semantic_tree_text => {
var registry = CDPNode.Registry.init(app.allocator);
defer registry.deinit();
const st: SemanticTree = .{
.dom_node = frame.window._document.asNode(),
.registry = &registry,
.frame = frame,
.arena = frame.call_arena,
.prune = (mode == .semantic_tree_text),
};
if (opts.json) {
var aw: std.Io.Writer.Allocating = .init(app.allocator);
defer aw.deinit();
if (mode == .semantic_tree) {
try std.json.Stringify.value(st, .{}, writer);
} else {
try st.textStringify(writer);
}
},
.wpt => try dumpWPT(frame, writer),
if (opts.dump_mode) |mode| blk: {
const frame = session.currentFrame() orelse break :blk;
try dumpContent(app, mode, opts.dump, frame, &aw.writer);
}
const frame = session.currentFrame();
try writeJsonEnvelope(writer, frame, opts.dump_mode, aw.written());
} else {
if (opts.dump_mode) |mode| blk: {
const frame = session.currentFrame() orelse {
try writer.writeAll("Frame closed. Please open a bug report including the URL\n");
break :blk;
};
try dumpContent(app, mode, opts.dump, frame, writer);
}
}
try writer.flush();
}
fn dumpContent(app: *App, mode: Config.DumpFormat, dump_opts: dump.Opts, frame: *Frame, writer: *std.Io.Writer) !void {
switch (mode) {
.html => try dump.root(frame.window._document, dump_opts, writer, frame),
.markdown => try markdown.dump(frame.window._document.asNode(), .{}, writer, frame),
.semantic_tree, .semantic_tree_text => {
var registry = CDPNode.Registry.init(app.allocator);
defer registry.deinit();
const st: SemanticTree = .{
.dom_node = frame.window._document.asNode(),
.registry = &registry,
.frame = frame,
.arena = frame.call_arena,
.prune = (mode == .semantic_tree_text),
};
if (mode == .semantic_tree) {
try std.json.Stringify.value(st, .{}, writer);
} else {
try st.textStringify(writer);
}
},
.wpt => try dumpWPT(frame, writer),
}
}
fn writeJsonEnvelope(writer: *std.Io.Writer, frame: ?*Frame, dump_mode: ?Config.DumpFormat, body: []const u8) !void {
const meta: ?Frame.HttpMetadata = if (frame) |f| f.httpMetadata() else null;
try writer.writeAll("{\"url\":");
try writeJsonString(writer, if (meta) |m| m.url else "");
try writer.writeAll(",\"http_status\":");
if (meta) |m| {
if (m.status) |status| {
try writer.print("{d}", .{status});
} else {
try writer.writeAll("0");
}
} else {
try writer.writeAll("0");
}
try writer.writeAll(",\"headers\":{");
if (meta) |m| {
var first = true;
for (m.headers.keys(), m.headers.values()) |name, value| {
if (!first) try writer.writeAll(",");
first = false;
try writeJsonString(writer, name);
try writer.writeAll(":");
try writeJsonString(writer, value);
}
}
try writer.writeAll("}");
try writer.writeAll(",\"dump\":");
try writeJsonString(writer, if (dump_mode) |mode| @tagName(mode) else "");
try writer.writeAll(",\"body\":");
try writeJsonString(writer, body);
try writer.writeAll("}\n");
}
fn writeJsonString(writer: *std.Io.Writer, s: []const u8) !void {
try writer.writeByte('"');
try std.json.Stringify.encodeJsonStringChars(s, .{}, writer);
try writer.writeByte('"');
}
fn dumpWPT(frame: *Frame, writer: *std.Io.Writer) !void {
var ls: js.Local.Scope = undefined;
frame.js.localScope(&ls);
@@ -283,6 +347,53 @@ pub fn RC(comptime T: type) type {
};
}
test "writeJsonString: simple string" {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try writeJsonString(&aw.writer, "hello");
try std.testing.expectEqualStrings("\"hello\"", aw.written());
}
test "writeJsonString: escapes special chars" {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try writeJsonString(&aw.writer, "line1\nline2\ttab\"quote");
const result = aw.written();
try std.testing.expect(result[0] == '"');
try std.testing.expect(result[result.len - 1] == '"');
try std.testing.expect(std.mem.indexOf(u8, result, "\\n") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\\t") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\\\"") != null);
}
test "writeJsonString: empty string" {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try writeJsonString(&aw.writer, "");
try std.testing.expectEqualStrings("\"\"", aw.written());
}
test "writeJsonEnvelope: null frame" {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try writeJsonEnvelope(&aw.writer, null, null, "");
const result = aw.written();
try std.testing.expect(std.mem.startsWith(u8, result, "{\"url\":\"\""));
try std.testing.expect(std.mem.indexOf(u8, result, "\"http_status\":0") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\"headers\":{}") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\"dump\":\"\"") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\"body\":\"\"") != null);
}
test "writeJsonEnvelope: null frame with dump mode and body" {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try writeJsonEnvelope(&aw.writer, null, .html, "<html><body>hello</body></html>");
const result = aw.written();
try std.testing.expect(std.mem.indexOf(u8, result, "\"dump\":\"html\"") != null);
try std.testing.expect(std.mem.indexOf(u8, result, "\"body\":\"<html><body>hello</body></html>\"") != null);
}
test {
std.testing.refAllDecls(@This());
}

View File

@@ -129,11 +129,12 @@ fn run(allocator: Allocator, main_arena: Allocator) !void {
.with_base = opts.with_base,
.with_frames = opts.with_frames,
},
.json = opts.json,
};
var stdout = std.fs.File.stdout();
var writer = stdout.writer(&.{});
if (opts.dump != null) {
if (opts.dump != null or opts.json) {
fetch_opts.writer = &writer.interface;
}