Files
rendercv/scripts/ats_proof/generate_report.py
2026-03-19 22:28:33 +03:00

118 lines
3.6 KiB
Python

"""Generate the ATS compatibility report from test results.
Renders ats_compatibility.j2.md with Jinja2 and writes the output to
docs/ats_compatibility.md.
"""
from pathlib import Path
import jinja2
from common import (
ANALYSIS_DIR,
RENDERED_DIR,
RESULTS_DIR,
THEMES,
load_json,
)
SCRIPT_DIR: Path = Path(__file__).parent
REPO_ROOT: Path = SCRIPT_DIR.parent.parent
REPORT_OUTPUT: Path = REPO_ROOT / "docs" / "ats_compatibility.md"
# Fields to show in the commercial parser table, with display names
REPORT_FIELDS: list[tuple[str, str]] = [
("contact_name", "Name"),
("contact_email", "Email"),
("contact_phone", "Phone"),
("contact_location", "Location"),
("work_company", "Company name"),
("work_position", "Job title"),
("work_start_date", "Start date"),
("work_end_date", "End date"),
("edu_institution", "Institution"),
]
PARSER_DISPLAY_NAMES: dict[str, str] = {
"edenai_affinda": "affinda",
"edenai_extracta": "extracta",
"edenai_klippa": "klippa",
}
def f1_to_checkmark(f1: float) -> str:
"""Convert an F1 score to a human-readable result."""
if f1 >= 0.90:
return "Correct"
if f1 >= 0.50:
return "Partial"
return "Not extracted"
def build_context() -> dict:
"""Load result files and build the Jinja2 template context."""
eval_results = load_json(ANALYSIS_DIR / "evaluation_results.json")
struct_summary = load_json(RESULTS_DIR / "structural" / "structural_summary.json")
extraction_summary = load_json(
RESULTS_DIR / "opensource" / "extraction_summary.json"
)
has_commercial = bool(eval_results.get("evaluations"))
# Build per-parser per-field scores
parser_scores: dict[str, dict[str, float]] = {}
for evaluation in eval_results.get("evaluations", []):
parser_name = evaluation["parser"]
parser_scores[parser_name] = evaluation["per_field"]
# Build conformance field rows with per-parser results
conformance_fields: list[dict] = []
if has_commercial:
for key, name in REPORT_FIELDS:
row: dict = {"name": name}
for parser_name, display_name in PARSER_DISPLAY_NAMES.items():
f1 = parser_scores.get(parser_name, {}).get(key, 0)
row[display_name] = f1_to_checkmark(f1)
conformance_fields.append(row)
# Build extractor rows
extractors: list[dict] = [
{"name": name, **stats}
for name, stats in extraction_summary.get("extractors", {}).items()
]
total_pdfs = len(list(RENDERED_DIR.rglob("*.pdf")))
num_themes = len(THEMES)
return {
"total_pdfs": total_pdfs,
"num_themes": num_themes,
"num_cases": total_pdfs // num_themes if num_themes else total_pdfs,
"struct_passed": struct_summary.get("passed", 0),
"struct_total": struct_summary.get("total", 0),
"struct_rate": struct_summary.get("pass_rate", "N/A"),
"extractors": extractors,
"has_commercial": has_commercial,
"conformance_fields": conformance_fields,
}
def main() -> None:
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(SCRIPT_DIR),
trim_blocks=True,
lstrip_blocks=True,
keep_trailing_newline=True,
)
template = env.get_template("ats_compatibility.j2.md")
context = build_context()
report = template.render(context)
REPORT_OUTPUT.parent.mkdir(parents=True, exist_ok=True)
REPORT_OUTPUT.write_text(report, encoding="utf-8")
print(f"Report written to {REPORT_OUTPUT}") # noqa: T201
if __name__ == "__main__":
main()