From 6b7cd72e697f39f375ef4177aae6d4d235f0707a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= <adria.arrufat@gmail.com>
Date: Mon, 13 Apr 2026 19:06:57 +0200
Subject: [PATCH] agent: prune message history to limit memory growth

Drops older turns while preserving the system prompt and recent messages
to prevent the message arena from growing indefinitely.
---
 src/agent/Agent.zig | 94 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 5 deletions(-)

diff --git a/src/agent/Agent.zig b/src/agent/Agent.zig
index b5235142..a9f1d837 100644
--- a/src/agent/Agent.zig
+++ b/src/agent/Agent.zig
@@ -78,10 +78,6 @@ cmd_executor: CommandExecutor,
 verifier: Verifier,
 recorder: Recorder,
 messages: std.ArrayList(zenai.provider.Message),
-// TODO: message_arena grows without bound during long sessions. Every LLM
-// turn accumulates messages (user + assistant + tool results) that are only
-// freed at deinit. Consider a sliding window that preserves the system
-// prompt but drops older turns.
 message_arena: std.heap.ArenaAllocator,
 tools: []const zenai.provider.Tool,
 model: []const u8,
@@ -466,6 +462,89 @@ fn ensureSystemPrompt(self: *Self) !void {
     }
 }
 
+/// When the message list exceeds `prune_high`, drop older turns (keeping
+/// the system prompt) so only the last `prune_keep` messages remain.
+/// All string data is deep-copied into a fresh arena and the old one freed.
+const prune_high = 30;
+const prune_keep = 20;
+
+fn pruneMessages(self: *Self) void {
+    const msgs = self.messages.items;
+    if (msgs.len <= prune_high) return;
+
+    // Keep system prompt (index 0) + the last `prune_keep` messages.
+    const tail_start = msgs.len - prune_keep;
+
+    var new_arena = std.heap.ArenaAllocator.init(self.allocator);
+    const na = new_arena.allocator();
+
+    // The system prompt's content points into system_prompt (not the arena),
+    // so only the tail messages need deep-copying.
+    var i: usize = 0;
+    for (msgs[tail_start..]) |msg| {
+        msgs[1 + i] = dupeMessage(na, msg) orelse {
+            // On OOM, abandon the prune — the old arena stays intact.
+            new_arena.deinit();
+            return;
+        };
+        i += 1;
+    }
+
+    self.messages.shrinkRetainingCapacity(1 + i); // system prompt + copied tail
+    self.message_arena.deinit();
+    self.message_arena = new_arena;
+}
+
+fn dupeMessage(alloc: std.mem.Allocator, msg: zenai.provider.Message) ?zenai.provider.Message {
+    return .{
+        .role = msg.role,
+        .content = if (msg.content) |c| alloc.dupe(u8, c) catch return null else null,
+        .tool_calls = if (msg.tool_calls) |tcs| dupeToolCalls(alloc, tcs) catch return null else null,
+        .tool_results = if (msg.tool_results) |trs| dupeToolResults(alloc, trs) catch return null else null,
+        .parts = if (msg.parts) |ps| dupeParts(alloc, ps) catch return null else null,
+    };
+}
+
+fn dupeToolCalls(alloc: std.mem.Allocator, calls: []const zenai.provider.ToolCall) ![]const zenai.provider.ToolCall {
+    const out = try alloc.alloc(zenai.provider.ToolCall, calls.len);
+    for (calls, 0..) |tc, i| {
+        out[i] = .{
+            .id = try alloc.dupe(u8, tc.id),
+            .name = try alloc.dupe(u8, tc.name),
+            .arguments = try alloc.dupe(u8, tc.arguments),
+            .thought_signature = if (tc.thought_signature) |ts| try alloc.dupe(u8, ts) else null,
+        };
+    }
+    return out;
+}
+
+fn dupeToolResults(alloc: std.mem.Allocator, results: []const zenai.provider.ToolResult) ![]const zenai.provider.ToolResult {
+    const out = try alloc.alloc(zenai.provider.ToolResult, results.len);
+    for (results, 0..) |tr, i| {
+        out[i] = .{
+            .id = try alloc.dupe(u8, tr.id),
+            .name = try alloc.dupe(u8, tr.name),
+            .content = try alloc.dupe(u8, tr.content),
+            .thought_signature = if (tr.thought_signature) |ts| try alloc.dupe(u8, ts) else null,
+        };
+    }
+    return out;
+}
+
+fn dupeParts(alloc: std.mem.Allocator, parts: []const zenai.provider.ContentPart) ![]const zenai.provider.ContentPart {
+    const out = try alloc.alloc(zenai.provider.ContentPart, parts.len);
+    for (parts, 0..) |p, i| {
+        out[i] = switch (p) {
+            .text => |t| .{ .text = try alloc.dupe(u8, t) },
+            .image => |img| .{ .image = .{
+                .data = try alloc.dupe(u8, img.data),
+                .mime_type = try alloc.dupe(u8, img.mime_type),
+            } },
+        };
+    }
+    return out;
+}
+
 /// Runs a single LLM turn and returns the commands it executed, without
 /// recording them to the Recorder.  Used by attemptSelfHeal so that the
 /// caller can capture healed commands for script rewriting.
@@ -555,7 +634,10 @@ fn attemptSelfHeal(self: *Self, failed_command: []const u8, verify_context: ?[]c
             self.messages.shrinkRetainingCapacity(msg_baseline);
             continue;
         };
-        if (cmds.len > 0) return cmds;
+        if (cmds.len > 0) {
+            self.pruneMessages();
+            return cmds;
+        }
         self.messages.shrinkRetainingCapacity(msg_baseline);
     }
     return null;
@@ -610,6 +692,8 @@ fn processUserMessage(self: *Self, user_input: []const u8, record_comment: []con
     } else {
         self.terminal.printInfo("(no response from model)");
     }
+
+    self.pruneMessages();
 }
 
 fn handleToolCall(ctx: *anyopaque, allocator: std.mem.Allocator, tool_name: []const u8, arguments: []const u8) []const u8 {