mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 01:25:53 -04:00
repl: add syntax highlighting and completions
This commit is contained in:
@@ -312,6 +312,18 @@ fn runRepl(self: *Self) void {
|
||||
|
||||
const cmd = Command.parse(line);
|
||||
|
||||
// Distinguish "you typed `TYPE` but forgot the args" from "this is
|
||||
// natural language for the LLM". Both fall through to
|
||||
// `.natural_language` in Command.parse, but the first should never
|
||||
// hit the LLM-needed error path — it's a syntax mistake on a
|
||||
// PandaScript command.
|
||||
if (std.meta.activeTag(cmd) == .natural_language) {
|
||||
if (Command.keywordSyntax(line)) |kc| {
|
||||
self.terminal.printErrorFmt("Usage: {s} {s}", .{ kc.name, kc.args });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd.needsLlm() and self.ai_client == null) {
|
||||
self.terminal.printError("This command needs an LLM. Set an API key or pass --provider (and drop --no-llm if you set it). PandaScript commands (GOTO, CLICK, EXTRACT, ...) work without one.");
|
||||
continue;
|
||||
|
||||
@@ -217,6 +217,40 @@ pub fn parse(line: []const u8) Command {
|
||||
return .{ .natural_language = trimmed };
|
||||
}
|
||||
|
||||
pub const KeywordSyntax = struct {
|
||||
name: []const u8,
|
||||
args: []const u8,
|
||||
};
|
||||
|
||||
/// If the first word of `line` matches a recognized PandaScript keyword that
|
||||
/// takes arguments, returns its expected shape. Lets the REPL distinguish
|
||||
/// "you mistyped args for a known command" from "this is natural language" —
|
||||
/// the latter goes to the LLM, the former gets a syntax error. Argless
|
||||
/// commands (TREE, MARKDOWN, LOGIN, ACCEPT_COOKIES) are intentionally absent
|
||||
/// because they always parse successfully when typed alone, so they never
|
||||
/// fall through to natural_language.
|
||||
pub fn keywordSyntax(line: []const u8) ?KeywordSyntax {
|
||||
const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);
|
||||
const end = std.mem.indexOfAny(u8, trimmed, &std.ascii.whitespace) orelse trimmed.len;
|
||||
const word = trimmed[0..end];
|
||||
const table = [_]KeywordSyntax{
|
||||
.{ .name = "GOTO", .args = "<url>" },
|
||||
.{ .name = "CLICK", .args = "'<selector>'" },
|
||||
.{ .name = "TYPE", .args = "'<selector>' '<value>'" },
|
||||
.{ .name = "WAIT", .args = "'<selector>'" },
|
||||
.{ .name = "SCROLL", .args = "[x] [y]" },
|
||||
.{ .name = "HOVER", .args = "'<selector>'" },
|
||||
.{ .name = "SELECT", .args = "'<selector>' '<value>'" },
|
||||
.{ .name = "CHECK", .args = "'<selector>' [true|false]" },
|
||||
.{ .name = "EXTRACT", .args = "'<selector>'" },
|
||||
.{ .name = "EVAL", .args = "'<script>'" },
|
||||
};
|
||||
for (table) |kc| {
|
||||
if (std.mem.eql(u8, word, kc.name)) return kc;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Iterator for parsing a script file, handling multi-line EVAL """ ... """ blocks.
|
||||
pub const ScriptIterator = struct {
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
@@ -95,6 +95,20 @@ pub fn init(allocator: std.mem.Allocator, history_path: ?[:0]const u8, verbosity
|
||||
// to ic_readline renders verbatim; the agent already supplies its own
|
||||
// `> ` prefix.
|
||||
c.ic_set_prompt_marker("", "");
|
||||
// PandaScript syntax highlighting. Names are namespaced `ps-*` so users
|
||||
// (or a future theme system) can override via `ic_style_def` without
|
||||
// colliding with isocline's built-in `ic-*` styles. Bold/underline are
|
||||
// intentionally restrained — the prompt is meant to read, not glow.
|
||||
c.ic_style_def("ps-cmd", "ansi-cyan bold");
|
||||
c.ic_style_def("ps-slash", "ansi-magenta bold");
|
||||
c.ic_style_def("ps-string", "ansi-green");
|
||||
c.ic_style_def("ps-var", "ansi-yellow bold");
|
||||
c.ic_style_def("ps-url", "ansi-blue underline");
|
||||
c.ic_style_def("ps-key", "ansi-cyan");
|
||||
c.ic_style_def("ps-num", "ansi-yellow");
|
||||
c.ic_style_def("ps-err", "ansi-red");
|
||||
_ = c.ic_enable_highlight(true);
|
||||
c.ic_set_default_highlighter(&highlighterCallback, null);
|
||||
if (history_path) |path| {
|
||||
// -1 → default cap (200 entries). Passing a filename makes isocline
|
||||
// load existing entries and auto-persist additions.
|
||||
@@ -276,6 +290,37 @@ fn addPartialKeyCompletions(
|
||||
}
|
||||
}
|
||||
|
||||
// Offers `$LP_*` completions when the user is mid-typing a `$VAR` token.
|
||||
// Triggers wherever a `$` appears with only name characters following it, so
|
||||
// it works in PandaScript args (`TYPE '#u' $LP_`), slash values
|
||||
// (`/click value=$L`), and bare prefixes (`$L`). Names come from the same
|
||||
// source as the `getEnv` tool — `std.os.environ` filtered to LP_*.
|
||||
fn addEnvVarCompletions(
|
||||
cenv: ?*c.ic_completion_env_t,
|
||||
buf: *[completion_buf_len:0]u8,
|
||||
input: []const u8,
|
||||
) void {
|
||||
const dollar = std.mem.lastIndexOfScalar(u8, input, '$') orelse return;
|
||||
const partial = input[dollar + 1 ..];
|
||||
for (partial) |ch| {
|
||||
if (!std.ascii.isAlphanumeric(ch) and ch != '_') return;
|
||||
}
|
||||
|
||||
// Stack-only scratch for the env-name list. 16 KiB holds ~1000 names'
|
||||
// worth of pointer metadata (names themselves point into std.os.environ
|
||||
// and aren't copied) — far more than any realistic environment.
|
||||
var stack: [16 * 1024]u8 = undefined;
|
||||
var fba: std.heap.FixedBufferAllocator = .init(&stack);
|
||||
const names = browser_tools.lpEnvNames(fba.allocator()) catch return;
|
||||
|
||||
const head = input[0 .. dollar + 1];
|
||||
for (names) |name| {
|
||||
if (!std.ascii.startsWithIgnoreCase(name, partial)) continue;
|
||||
const text = std.fmt.bufPrintZ(buf, "{s}{s}", .{ head, name }) catch continue;
|
||||
_ = c.ic_add_completion_prim(cenv, text.ptr, null, null, @intCast(input.len), 0);
|
||||
}
|
||||
}
|
||||
|
||||
fn completionCallback(cenv: ?*c.ic_completion_env_t, prefix: [*c]const u8) callconv(.c) void {
|
||||
const input = std.mem.sliceTo(@as([*:0]const u8, @ptrCast(prefix)), 0);
|
||||
const self_ptr = c.ic_completion_arg(cenv) orelse return;
|
||||
@@ -285,6 +330,8 @@ fn completionCallback(cenv: ?*c.ic_completion_env_t, prefix: [*c]const u8) callc
|
||||
// copies the string internally so reuse across candidates is fine.
|
||||
var buf: [completion_buf_len:0]u8 = undefined;
|
||||
|
||||
// `/help <name>` — the arg is itself a tool name, not a value, so env-var
|
||||
// completion would be confusing here. Short-circuit.
|
||||
if (parseHelpArgPrefix(input)) |partial| {
|
||||
for (all_slash_names) |name| addPrefixedCompletion(cenv, &buf, input, help_arg_prefix, name, "", partial);
|
||||
return;
|
||||
@@ -300,19 +347,165 @@ fn completionCallback(cenv: ?*c.ic_completion_env_t, prefix: [*c]const u8) callc
|
||||
addPartialKeyCompletions(cenv, input, parts.rest, schema, &buf);
|
||||
}
|
||||
}
|
||||
// Fall through so `value=$LP_` etc. picks up env completions.
|
||||
} else {
|
||||
const partial = input[1..];
|
||||
for (all_slash_names) |name| addPrefixedCompletion(cenv, &buf, input, "/", name, "", partial);
|
||||
return;
|
||||
}
|
||||
const partial = input[1..];
|
||||
for (all_slash_names) |name| addPrefixedCompletion(cenv, &buf, input, "/", name, "", partial);
|
||||
return;
|
||||
} else if (!has_space) {
|
||||
// Case-insensitive on the completion side so Tab also rewrites
|
||||
// mistyped lowercase (`goto` → `GOTO`). The highlighter stays
|
||||
// case-sensitive, so a lowercase-typed line reads as natural
|
||||
// language until the user accepts the completion.
|
||||
for (commands) |cmd| {
|
||||
if (std.ascii.startsWithIgnoreCase(cmd.name, input)) {
|
||||
const text = std.fmt.bufPrintZ(&buf, "{s}", .{cmd.name}) catch continue;
|
||||
_ = c.ic_add_completion_prim(cenv, text.ptr, null, null, @intCast(input.len), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has_space) return;
|
||||
addEnvVarCompletions(cenv, &buf, input);
|
||||
}
|
||||
|
||||
// PandaScript syntax highlighter. Invoked by isocline on every input change;
|
||||
// keep it cheap. The `pos` and `count` passed to `ic_highlight` are byte
|
||||
// offsets/lengths into `input`, not UTF-8 code points — fine here because we
|
||||
// only tokenize on ASCII boundaries (whitespace, quotes, `=`, `$`).
|
||||
fn highlighterCallback(henv: ?*c.ic_highlight_env_t, input: [*c]const u8, _: ?*anyopaque) callconv(.c) void {
|
||||
const text = std.mem.sliceTo(@as([*:0]const u8, @ptrCast(input)), 0);
|
||||
if (text.len == 0) return;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < text.len and std.ascii.isWhitespace(text[i])) i += 1;
|
||||
if (i >= text.len) return;
|
||||
|
||||
// First word: either `/slash` form or a bare PandaScript command name.
|
||||
// Unknown leading tokens get highlighted as errors so typos are visible
|
||||
// before the user hits Enter.
|
||||
const cmd_start = i;
|
||||
while (i < text.len and !std.ascii.isWhitespace(text[i])) i += 1;
|
||||
const cmd = text[cmd_start..i];
|
||||
if (cmd.len > 0 and cmd[0] == '/') {
|
||||
const name = cmd[1..];
|
||||
const style = if (isKnownSlashName(name)) "ps-slash" else "ps-err";
|
||||
c.ic_highlight(henv, @intCast(cmd_start), @intCast(cmd.len), style.ptr);
|
||||
highlightSlashArgs(henv, text, i);
|
||||
} else {
|
||||
// PandaScript commands are ALL CAPS. Known → keyword color. ALL CAPS
|
||||
// but unknown → red (likely typo). Anything else → no highlight,
|
||||
// it's a natural-language query for the LLM.
|
||||
const style: ?[*:0]const u8 = if (isKnownCommand(cmd))
|
||||
"ps-cmd"
|
||||
else if (isAllUpper(cmd))
|
||||
"ps-err"
|
||||
else
|
||||
null;
|
||||
if (style) |s| c.ic_highlight(henv, @intCast(cmd_start), @intCast(cmd.len), s);
|
||||
highlightPandaArgs(henv, text, i);
|
||||
}
|
||||
}
|
||||
|
||||
fn isAllUpper(s: []const u8) bool {
|
||||
for (s) |ch| switch (ch) {
|
||||
'A'...'Z', '_', '0'...'9' => {},
|
||||
else => return false,
|
||||
};
|
||||
return s.len > 0;
|
||||
}
|
||||
|
||||
fn isKnownCommand(name: []const u8) bool {
|
||||
for (commands) |cmd| {
|
||||
if (std.ascii.startsWithIgnoreCase(cmd.name, input)) {
|
||||
const text = std.fmt.bufPrintZ(&buf, "{s}", .{cmd.name}) catch continue;
|
||||
_ = c.ic_add_completion_prim(cenv, text.ptr, null, null, @intCast(input.len), 0);
|
||||
if (std.mem.eql(u8, cmd.name, name)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isKnownSlashName(name: []const u8) bool {
|
||||
for (all_slash_names) |n| {
|
||||
if (std.ascii.eqlIgnoreCase(n, name)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Color a non-quoted token based on its leading character: `$` → variable,
|
||||
// `http(s)://` → URL, digits → number. Anything else falls through with no
|
||||
// highlight (lets the terminal's default foreground show through).
|
||||
fn highlightBareToken(henv: ?*c.ic_highlight_env_t, text: []const u8, start: usize, end: usize) void {
|
||||
if (start >= end) return;
|
||||
const tok = text[start..end];
|
||||
if (tok[0] == '$') {
|
||||
c.ic_highlight(henv, @intCast(start), @intCast(end - start), "ps-var".ptr);
|
||||
return;
|
||||
}
|
||||
if (std.mem.startsWith(u8, tok, "http://") or std.mem.startsWith(u8, tok, "https://")) {
|
||||
c.ic_highlight(henv, @intCast(start), @intCast(end - start), "ps-url".ptr);
|
||||
return;
|
||||
}
|
||||
if (std.ascii.isDigit(tok[0])) {
|
||||
var all_num = true;
|
||||
for (tok) |ch| if (!std.ascii.isDigit(ch) and ch != '.') {
|
||||
all_num = false;
|
||||
break;
|
||||
};
|
||||
if (all_num) c.ic_highlight(henv, @intCast(start), @intCast(end - start), "ps-num".ptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Consume a quoted token (single or double) at `start`, returning the index
|
||||
// just past the closing quote. Handles backslash escapes minimally — enough
|
||||
// not to confuse `\'` inside a single-quoted string.
|
||||
fn scanQuoted(text: []const u8, start: usize) usize {
|
||||
if (start >= text.len) return start;
|
||||
const quote = text[start];
|
||||
var i = start + 1;
|
||||
while (i < text.len and text[i] != quote) : (i += 1) {
|
||||
if (text[i] == '\\' and i + 1 < text.len) i += 1;
|
||||
}
|
||||
return if (i < text.len) i + 1 else i;
|
||||
}
|
||||
|
||||
fn highlightPandaArgs(henv: ?*c.ic_highlight_env_t, text: []const u8, start: usize) void {
|
||||
var i = start;
|
||||
while (i < text.len) {
|
||||
while (i < text.len and std.ascii.isWhitespace(text[i])) i += 1;
|
||||
if (i >= text.len) break;
|
||||
|
||||
if (text[i] == '\'' or text[i] == '"') {
|
||||
const tok_start = i;
|
||||
i = scanQuoted(text, i);
|
||||
c.ic_highlight(henv, @intCast(tok_start), @intCast(i - tok_start), "ps-string".ptr);
|
||||
continue;
|
||||
}
|
||||
const tok_start = i;
|
||||
while (i < text.len and !std.ascii.isWhitespace(text[i])) i += 1;
|
||||
highlightBareToken(henv, text, tok_start, i);
|
||||
}
|
||||
}
|
||||
|
||||
fn highlightSlashArgs(henv: ?*c.ic_highlight_env_t, text: []const u8, start: usize) void {
|
||||
var i = start;
|
||||
while (i < text.len) {
|
||||
while (i < text.len and std.ascii.isWhitespace(text[i])) i += 1;
|
||||
if (i >= text.len) break;
|
||||
|
||||
const tok_start = i;
|
||||
while (i < text.len and !std.ascii.isWhitespace(text[i]) and text[i] != '=') i += 1;
|
||||
const key_end = i;
|
||||
if (i < text.len and text[i] == '=') {
|
||||
c.ic_highlight(henv, @intCast(tok_start), @intCast(key_end - tok_start), "ps-key".ptr);
|
||||
i += 1; // consume '='
|
||||
const val_start = i;
|
||||
if (i < text.len and (text[i] == '\'' or text[i] == '"')) {
|
||||
i = scanQuoted(text, i);
|
||||
c.ic_highlight(henv, @intCast(val_start), @intCast(i - val_start), "ps-string".ptr);
|
||||
} else {
|
||||
while (i < text.len and !std.ascii.isWhitespace(text[i])) i += 1;
|
||||
highlightBareToken(henv, text, val_start, i);
|
||||
}
|
||||
}
|
||||
// bare positional (no `=`) — leave unstyled.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -935,6 +935,24 @@ fn listLpEnvNames(arena: std.mem.Allocator) ToolError![]const u8 {
|
||||
return formatLpEnvNames(arena, lines.items);
|
||||
}
|
||||
|
||||
/// Sorted `LP_*`-prefixed environment-variable names from the current
|
||||
/// process. Returned slices point into `std.os.environ`, which is stable for
|
||||
/// the process lifetime; the outer slice is allocated from `arena`. Used by
|
||||
/// the agent REPL completer to offer `$LP_*` Tab completions — same data
|
||||
/// source as the `getEnv` tool (no-name variant), just unformatted.
|
||||
pub fn lpEnvNames(arena: std.mem.Allocator) error{OutOfMemory}![]const []const u8 {
|
||||
var names: std.ArrayList([]const u8) = .empty;
|
||||
for (std.os.environ) |entry| {
|
||||
const line = std.mem.span(entry);
|
||||
const eq_idx = std.mem.indexOfScalar(u8, line, '=') orelse continue;
|
||||
const name = line[0..eq_idx];
|
||||
if (!std.ascii.startsWithIgnoreCase(name, "LP_")) continue;
|
||||
try names.append(arena, name);
|
||||
}
|
||||
std.mem.sort([]const u8, names.items, {}, lpNameLessThan);
|
||||
return names.items;
|
||||
}
|
||||
|
||||
fn formatLpEnvNames(arena: std.mem.Allocator, env_lines: []const []const u8) ToolError![]const u8 {
|
||||
var names: std.ArrayList([]const u8) = .empty;
|
||||
for (env_lines) |line| {
|
||||
|
||||
Reference in New Issue
Block a user