diff --git a/src/browser/tools.zig b/src/browser/tools.zig index 3dfdfd5e..b45b7ce5 100644 --- a/src/browser/tools.zig +++ b/src/browser/tools.zig @@ -136,11 +136,18 @@ pub const tool_defs = [_]ToolDef{ \\Extract structured data from the current page using a small JSON schema. Prefer this over `markdown` or `eval` whenever the user asked for a specific value or list (a score, price, count, profile field, headlines, …) — the result is returned as JSON AND the call is recorded as an `EXTRACT` PandaScript line, so a later replay (no LLM) prints the answer to stdout. Use `markdown` / `tree` / `interactiveElements` only to discover the right selector, then commit to one `extract` call. \\ \\Schema is a JSON object literal (pass it as a string in `schema`). Each value picks what to lift out: - \\ "" → first match's textContent.trim() (string|null) - \\ [""] → every match's text (string[]) - \\ {"selector":"","attr":""} → first match's attribute (string|null) - \\ [{"selector":"","fields":{…}}] → array of objects, fields resolved relative to each match - \\Example: schema `{"karma": "#karma"}` → `{"karma":"42"}`. + \\ "" → first match's textContent.trim() (string|null) + \\ "" → element's own textContent.trim() (only meaningful inside `fields`) + \\ [""] → every match's text (string[]) + \\ {"selector":"","attr":""} → first match's attribute (string|null) + \\ [{"selector":"","attr":""}] → every match's attribute (string[]) + \\ [{"selector":"","fields":{…}}] → array of objects, fields resolved relative to each match + \\ + \\Examples (schema → result): + \\ {"karma": "#karma"} → {"karma":"42"} + \\ {"items": [".story .title"]} → {"items":["Title 1","Title 2"]} + \\ {"links": [{"selector":"a.title","attr":"href"}]} → {"links":["/a","/b"]} + \\ {"stories": [{"selector":".athing","fields":{"title":".titleline","rank":".rank"}}]} → {"stories":[{"title":"Foo","rank":"1"}]} , .input_schema = minify( \\{ @@ -504,13 +511,7 @@ pub fn evalScript( /// Schema-driven extraction. The schema is parsed in Zig so a syntax error /// surfaces here instead of as a confusing V8 SyntaxError on the spliced -/// walker. Each value in the schema object is one of: -/// "sel" → first match's textContent.trim() (string|null) -/// "" → matched element's own textContent.trim() -/// ["sel"] → all matches' textContent (string[]) -/// {selector, attr} → first match's attribute (string|null) -/// [{selector, attr}] → all matches' attributes (string[]) -/// [{selector, fields}] → all matches, with `fields` relative to each (object[]) +/// walker. pub fn extract( arena: std.mem.Allocator, session: *lp.Session, diff --git a/src/script.zig b/src/script.zig index 740964fb..6110ba12 100644 --- a/src/script.zig +++ b/src/script.zig @@ -109,13 +109,8 @@ pub const mcp_driver_guidance = \\ prints the value to stdout. Reading the page via `markdown` and \\ answering only in chat does NOT survive replay. \\- Use `markdown` / `tree` / `interactiveElements` to *discover* the right - \\ selector, then commit to one `extract` call. Schema examples: - \\ {"karma": "#karma"} → single value - \\ {"items": [".story .title"]} → list of texts - \\ {"links": [{"selector":"a.title","attr":"href"}]} → list of attrs - \\- Pass the schema as a JSON string in the `schema` argument. It must be - \\ a JSON object literal — see the `extract` tool description for - \\ the full schema grammar. + \\ selector, then commit to one `extract` call. See the `extract` tool + \\ description for the schema grammar and examples. \\ ;