Files
seedit/scripts/generate-llms-files.mjs
2026-05-20 14:48:34 +07:00

242 lines
6.4 KiB
JavaScript

#!/usr/bin/env node
import { mkdir, readFile, writeFile } from "node:fs/promises";
import path from "node:path";
import { fileURLToPath } from "node:url";
const rootDir = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
const config = JSON.parse(await readText("scripts/llms-config.json"));
for (const key of ["title", "summary", "repoUrl", "llmsFullUrl"]) {
if (!config[key]) throw new Error(`Missing llms config key: ${key}`);
}
const docs = await loadDocs(config.docs);
const optionalDocs = await loadDocs(config.optionalDocs || []);
if (docs.length === 0) {
throw new Error("No source docs found for llms generation");
}
await writeOutputs(config.shortOutputs, buildShortFile(config, docs, optionalDocs));
await writeOutputs(config.fullOutputs, buildFullFile(config, docs, optionalDocs));
function buildShortFile(config, docs, optionalDocs) {
const lines = [
`# ${config.title}`,
"",
`> ${config.summary}`,
"",
"This file is generated by `scripts/generate-llms-files.mjs`. Do not hand-edit it; update the source docs or generator config, then run `yarn llms:generate`.",
"",
"## Canonical Links",
"",
...config.links.map(([label, url]) => `- [${label}](${url})`),
`- [llms-full.txt](${config.llmsFullUrl}): Expanded inline corpus generated from the curated source docs.`,
"",
"## Critical Context",
"",
...config.notes.map((note) => `- ${note}`),
"",
"## Source Of Truth",
"",
"- Code, tests, package manifests, source docs, and live/runtime evidence when relevant are source of truth.",
"- This generated file is compiled context for orientation. Verify behavioral claims against source files before editing or concluding.",
"- Repo-managed AI instructions live in `AGENTS.md` and any directory-specific `AGENTS.md` files.",
"",
"## Core Documents",
"",
...docs.map(formatDocLink)
];
if (optionalDocs.length > 0) {
lines.push("", "## Optional", "", ...optionalDocs.map(formatDocLink));
}
return `${lines.join("\n")}\n`;
}
function buildFullFile(config, docs, optionalDocs) {
const allDocs = [...docs, ...optionalDocs];
const lines = [
`# ${config.title} Full LLM Context`,
"",
`> ${config.summary}`,
"",
"This file is generated by `scripts/generate-llms-files.mjs`. Do not hand-edit it; update the source docs or generator config, then run `yarn llms:generate`.",
"",
"Use this as compiled context only. Source files, tests, manifests, and live/runtime evidence remain authoritative.",
"",
"## Index",
"",
...allDocs.map((doc) => `- [${doc.relativePath}](#${anchorFor(doc.relativePath)})`),
"",
"---"
];
for (const doc of allDocs) {
lines.push(
"",
`## ${doc.relativePath}`,
"",
`Source: ${doc.url}`,
"",
"```markdown",
doc.content.trim(),
"```",
"",
"---"
);
}
return `${lines.join("\n")}\n`;
}
function formatDocLink(doc) {
return `- [${doc.title}](${doc.url}): ${doc.summary}`;
}
async function loadDocs(relativePaths) {
const docs = [];
for (const relativePath of relativePaths) {
const content = await readOptional(relativePath);
if (!content) continue;
docs.push({
relativePath,
content,
title: titleFromMarkdown(content, relativePath),
summary: summaryFromMarkdown(content),
url: `${config.repoUrl}/blob/master/${encodeURI(relativePath).replace(/%2F/g, "/")}`
});
}
return docs;
}
function titleFromMarkdown(content, fallback) {
if (fallback.endsWith("CHANGELOG.md")) return "Changelog";
if (fallback.endsWith("TODO.md")) return "TODO";
let inFence = false;
let inFrontmatter = false;
let fallbackHeading = "";
const lines = content.split(/\r?\n/);
for (let index = 0; index < lines.length; index += 1) {
const trimmed = lines[index].trim();
if (index === 0 && trimmed === "---") {
inFrontmatter = true;
continue;
}
if (inFrontmatter) {
if (trimmed === "---") inFrontmatter = false;
continue;
}
if (trimmed.startsWith("```")) {
inFence = !inFence;
continue;
}
if (inFence) continue;
const h1 = trimmed.match(/^#\s+(.+)$/);
if (h1) return h1[1].trim();
const h2 = trimmed.match(/^##\s+(.+)$/);
if (h2 && !fallbackHeading) fallbackHeading = h2[1].trim();
}
return fallbackHeading || fallback;
}
function summaryFromMarkdown(content) {
const lines = content.split(/\r?\n/);
const paragraphs = [];
let current = [];
let inFence = false;
let inFrontmatter = false;
for (let index = 0; index < lines.length; index += 1) {
const line = lines[index];
const trimmed = line.trim();
if (index === 0 && trimmed === "---") {
inFrontmatter = true;
continue;
}
if (inFrontmatter) {
if (trimmed === "---") inFrontmatter = false;
continue;
}
if (trimmed.startsWith("```")) {
inFence = !inFence;
continue;
}
if (inFence || !trimmed || trimmed.startsWith("#") || trimmed.startsWith("|")) {
if (current.length > 0) {
paragraphs.push(current.join(" "));
current = [];
}
continue;
}
if (
trimmed.startsWith("[![") ||
trimmed.startsWith("_Telegram") ||
trimmed.startsWith("<img") ||
trimmed.startsWith("<p") ||
trimmed.startsWith("<br") ||
trimmed.startsWith("</") ||
trimmed.startsWith("<!--")
) {
continue;
}
current.push(trimmed.replace(/\s+/g, " "));
}
if (current.length > 0) paragraphs.push(current.join(" "));
const summary = paragraphs.find(Boolean) || "Repository documentation.";
return summary.length > 220 ? `${summary.slice(0, 217).trimEnd()}...` : summary;
}
function anchorFor(value) {
return value
.toLowerCase()
.replace(/[^a-z0-9\s-]/g, "")
.trim()
.replace(/\s+/g, "-");
}
async function writeOutputs(relativePaths, content) {
for (const relativePath of relativePaths) {
const absolutePath = path.join(rootDir, relativePath);
await mkdir(path.dirname(absolutePath), { recursive: true });
await writeFile(absolutePath, content);
console.log(`[llms] wrote ${relativePath}`);
}
}
async function readOptional(relativePath) {
try {
return await readText(relativePath);
} catch (error) {
if (error && error.code === "ENOENT") return "";
throw error;
}
}
async function readText(relativePath) {
return readFile(path.join(rootDir, relativePath), "utf8");
}