docs(browser.tools): clarify extract tool schema and examples

2026-06-11 01:25:53 -04:00 · 2026-05-14 10:48:13 +02:00
parent 688924e7a1
commit faba86cf82
2 changed files with 15 additions and 19 deletions
--- a/src/browser/tools.zig
+++ b/src/browser/tools.zig
@@ -136,11 +136,18 @@ pub const tool_defs = [_]ToolDef{
        \\Extract structured data from the current page using a small JSON schema. Prefer this over `markdown` or `eval` whenever the user asked for a specific value or list (a score, price, count, profile field, headlines, …) — the result is returned as JSON AND the call is recorded as an `EXTRACT` PandaScript line, so a later replay (no LLM) prints the answer to stdout. Use `markdown` / `tree` / `interactiveElements` only to discover the right selector, then commit to one `extract` call.
        \\
        \\Schema is a JSON object literal (pass it as a string in `schema`). Each value picks what to lift out:
-        \\  "<sel>"                                    → first match's textContent.trim() (string|null)
-        \\  ["<sel>"]                                  → every match's text (string[])
-        \\  {"selector":"<sel>","attr":"<name>"}       → first match's attribute (string|null)
-        \\  [{"selector":"<sel>","fields":{…}}]        → array of objects, fields resolved relative to each match
-        \\Example: schema `{"karma": "#karma"}` → `{"karma":"42"}`.
+        \\  "<sel>"                                → first match's textContent.trim() (string|null)
+        \\  ""                                     → element's own textContent.trim() (only meaningful inside `fields`)
+        \\  ["<sel>"]                              → every match's text (string[])
+        \\  {"selector":"<sel>","attr":"<name>"}   → first match's attribute (string|null)
+        \\  [{"selector":"<sel>","attr":"<name>"}] → every match's attribute (string[])
+        \\  [{"selector":"<sel>","fields":{…}}]    → array of objects, fields resolved relative to each match
+        \\
+        \\Examples (schema → result):
+        \\  {"karma": "#karma"} → {"karma":"42"}
+        \\  {"items": [".story .title"]} → {"items":["Title 1","Title 2"]}
+        \\  {"links": [{"selector":"a.title","attr":"href"}]} → {"links":["/a","/b"]}
+        \\  {"stories": [{"selector":".athing","fields":{"title":".titleline","rank":".rank"}}]} → {"stories":[{"title":"Foo","rank":"1"}]}
        ,
        .input_schema = minify(
            \\{
@@ -504,13 +511,7 @@ pub fn evalScript(

 /// Schema-driven extraction. The schema is parsed in Zig so a syntax error
 /// surfaces here instead of as a confusing V8 SyntaxError on the spliced
-/// walker. Each value in the schema object is one of:
-///   "sel"                → first match's textContent.trim() (string|null)
-///   ""                   → matched element's own textContent.trim()
-///   ["sel"]              → all matches' textContent (string[])
-///   {selector, attr}     → first match's attribute (string|null)
-///   [{selector, attr}]   → all matches' attributes (string[])
-///   [{selector, fields}] → all matches, with `fields` relative to each (object[])
+/// walker.
 pub fn extract(
    arena: std.mem.Allocator,
    session: *lp.Session,
--- a/src/script.zig
+++ b/src/script.zig
@@ -109,13 +109,8 @@ pub const mcp_driver_guidance =
    \\  prints the value to stdout. Reading the page via `markdown` and
    \\  answering only in chat does NOT survive replay.
    \\- Use `markdown` / `tree` / `interactiveElements` to *discover* the right
-    \\  selector, then commit to one `extract` call. Schema examples:
-    \\    {"karma": "#karma"}                            → single value
-    \\    {"items": [".story .title"]}                   → list of texts
-    \\    {"links": [{"selector":"a.title","attr":"href"}]}  → list of attrs
-    \\- Pass the schema as a JSON string in the `schema` argument. It must be
-    \\  a JSON object literal — see the `extract` tool description for
-    \\  the full schema grammar.
+    \\  selector, then commit to one `extract` call. See the `extract` tool
+    \\  description for the schema grammar and examples.
    \\
 ;