From b4d55eb863508df122f7e34adf6b15c079f3525e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Arrufat?= <adria.arrufat@gmail.com>
Date: Tue, 26 May 2026 16:52:09 +0200
Subject: [PATCH] string: extract truncateUtf8 helper

Moves the UTF-8 truncation logic from Spinner's `utf8FloorTo` to a
shared `truncateUtf8` function in `string.zig`. Reuses it in both
Spinner and markdown rendering, and adds comprehensive tests.
---
 src/agent/Spinner.zig    | 25 +++++++------------------
 src/browser/markdown.zig |  3 ++-
 src/string.zig           | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/agent/Spinner.zig b/src/agent/Spinner.zig
index 36612514..a0993f88 100644
--- a/src/agent/Spinner.zig
+++ b/src/agent/Spinner.zig
@@ -21,6 +21,7 @@ const lp = @import("lightpanda");
 const log = lp.log;
 const Terminal = @import("Terminal.zig");
 const ansi = Terminal.ansi;
+const truncateUtf8 = @import("../string.zig").truncateUtf8;
 
 const Spinner = @This();
 
@@ -168,13 +169,15 @@ pub fn setTool(self: *Spinner, name: []const u8, args: []const u8) void {
     const manual = self.state == .idle;
     self.tool_calls += 1;
     var tool: ToolState = .{ .set_ns = std.time.nanoTimestamp(), .manual = manual };
-    tool.name_len = utf8FloorTo(name, tool.name_buf.len);
-    @memcpy(tool.name_buf[0..tool.name_len], name[0..tool.name_len]);
+    const name_prefix = truncateUtf8(name, tool.name_buf.len);
+    tool.name_len = name_prefix.len;
+    @memcpy(tool.name_buf[0..name_prefix.len], name_prefix);
     // Strip control chars: a literal `\n` in args (e.g. /eval """…""" bodies)
     // breaks the spinner's `\r`-based redraw — the cursor only rewinds to the
     // start of the last line, leaving prior frames stuck on screen.
-    tool.args_len = utf8FloorTo(args, tool.args_buf.len);
-    for (args[0..tool.args_len], 0..) |ch, i| {
+    const args_prefix = truncateUtf8(args, tool.args_buf.len);
+    tool.args_len = args_prefix.len;
+    for (args_prefix, 0..) |ch, i| {
         tool.args_buf[i] = if (ch < 0x20 or ch == 0x7f) ' ' else ch;
     }
     self.state = .{ .tool = tool };
@@ -287,20 +290,6 @@ fn renderLocked(self: *Spinner) void {
     _ = std.posix.write(std.posix.STDERR_FILENO, written) catch {};
 }
 
-/// Largest prefix length of `bytes` that fits in `max_bytes` and ends on
-/// a UTF-8 codepoint boundary. Invalid sequences are treated as one byte
-/// each so the function never loops.
-fn utf8FloorTo(bytes: []const u8, max_bytes: usize) usize {
-    if (bytes.len <= max_bytes) return bytes.len;
-    var i: usize = 0;
-    while (i < max_bytes) {
-        const seq_len = std.unicode.utf8ByteSequenceLength(bytes[i]) catch 1;
-        if (i + seq_len > max_bytes) break;
-        i += seq_len;
-    }
-    return i;
-}
-
 /// Returns the byte length of `bytes` that fits in `max_cells` cells,
 /// rounded down to a whole UTF-8 codepoint. Multi-cell glyphs (CJK,
 /// wide emoji) are counted as 1 — args are typically ASCII so the
diff --git a/src/browser/markdown.zig b/src/browser/markdown.zig
index d547e131..c542c7e4 100644
--- a/src/browser/markdown.zig
+++ b/src/browser/markdown.zig
@@ -24,6 +24,7 @@ const TreeWalker = @import("webapi/TreeWalker.zig");
 const Element = @import("webapi/Element.zig");
 const Node = @import("webapi/Node.zig");
 const isAllWhitespace = @import("../string.zig").isAllWhitespace;
+const truncateUtf8 = @import("../string.zig").truncateUtf8;
 
 pub const Opts = struct {
     max_bytes: ?u32 = null,
@@ -72,7 +73,7 @@ const LimitedWriter = struct {
             return;
         }
         if (self.remaining > 0) {
-            try self.inner.writeAll(bytes[0..self.remaining]);
+            try self.inner.writeAll(truncateUtf8(bytes, self.remaining));
             self.remaining = 0;
         }
         self.truncated = true;
diff --git a/src/string.zig b/src/string.zig
index c91b75d9..24bfe482 100644
--- a/src/string.zig
+++ b/src/string.zig
@@ -311,6 +311,20 @@ pub fn isAllWhitespace(text: []const u8) bool {
     } else true;
 }
 
+/// Largest prefix of `bytes` whose length is at most `max_bytes` and
+/// ends on a UTF-8 codepoint boundary. Invalid sequences count as one
+/// byte each so the function never loops.
+pub fn truncateUtf8(bytes: []const u8, max_bytes: usize) []const u8 {
+    if (bytes.len <= max_bytes) return bytes;
+    var i: usize = 0;
+    while (i < max_bytes) {
+        const seq_len = std.unicode.utf8ByteSequenceLength(bytes[i]) catch 1;
+        if (i + seq_len > max_bytes) break;
+        i += seq_len;
+    }
+    return bytes[0..i];
+}
+
 // Discriminatory type that signals the bridge to use arena instead of call_arena
 // Use this for strings that need to persist beyond the current call
 // The caller can unwrap and store just the underlying .str field
@@ -333,6 +347,30 @@ fn asUint(comptime string: anytype) std.meta.Int(
 
 const testing = @import("testing.zig");
 
+test "truncateUtf8" {
+    try testing.expectEqual("", truncateUtf8("", 10));
+    try testing.expectEqual("abc", truncateUtf8("abc", 10));
+    try testing.expectEqual("abc", truncateUtf8("abcdef", 3));
+
+    // 'é' = 0xC3 0xA9 — cap inside the codepoint walks back to the leader.
+    try testing.expectEqual("", truncateUtf8("é", 1));
+    try testing.expectEqual("é", truncateUtf8("é", 2));
+    try testing.expectEqual("é", truncateUtf8("éé", 3));
+
+    // 3-byte codepoint '世' = 0xE4 0xB8 0x96.
+    try testing.expectEqual("", truncateUtf8("世", 2));
+    try testing.expectEqual("世", truncateUtf8("世界", 3));
+    try testing.expectEqual("世", truncateUtf8("世界", 5));
+
+    // 4-byte codepoint '𝄞' (musical G clef) = 0xF0 0x9D 0x84 0x9E.
+    try testing.expectEqual("", truncateUtf8("𝄞", 3));
+    try testing.expectEqual("𝄞", truncateUtf8("𝄞x", 4));
+
+    // Invalid leader byte counts as one byte so the loop terminates.
+    try testing.expectEqual("\xFF", truncateUtf8("\xFFx", 1));
+    try testing.expectEqual("\xFFx", truncateUtf8("\xFFx", 2));
+}
+
 test "String" {
     const other_short = try String.init(undefined, "other_short", .{});
     const other_long = try String.init(testing.allocator, "other_long" ** 100, .{});