Files
rendercv/scripts/ats_proof/submit_commercial.py
2026-03-19 22:28:33 +03:00

97 lines
3.1 KiB
Python

"""Submit rendered PDFs to commercial resume parsers via Eden AI.
Eden AI forwards PDFs to Affinda and HireAbility and returns both results.
Requires environment variable:
EDENAI_API_KEY — from https://app.edenai.run/user/register
"""
import os
import sys
import time
from pathlib import Path
import httpx
from common import RENDERED_DIR, RESULTS_DIR, find_rendered_pdfs, write_json
RESULTS_SUBDIR: str = "commercial/edenai"
RATE_LIMIT_SECONDS: float = 2.0
def submit(client: httpx.Client, pdf_path: Path) -> dict:
"""Submit a PDF to Eden AI's resume parser (Affinda + HireAbility)."""
with pdf_path.open("rb") as f:
response = client.post(
"https://api.edenai.run/v2/ocr/resume_parser",
files={"file": (pdf_path.name, f, "application/pdf")},
data={"providers": "affinda,extracta,klippa"},
timeout=90,
)
response.raise_for_status()
return response.json()
def main() -> None:
api_key = os.environ.get("EDENAI_API_KEY", "")
if not api_key:
print("EDENAI_API_KEY not set. Sign up at https://app.edenai.run/user/register") # noqa: T201
print("and create an API key at https://app.edenai.run/admin/account/settings.") # noqa: T201
print() # noqa: T201
print("Then run:") # noqa: T201
print(" EDENAI_API_KEY=your_key uv run python submit_commercial.py") # noqa: T201
sys.exit(1)
pdfs = find_rendered_pdfs()
if not pdfs:
print("No PDFs found. Run render_pdfs.py first.") # noqa: T201
sys.exit(1)
results_dir = RESULTS_DIR / RESULTS_SUBDIR
results_dir.mkdir(parents=True, exist_ok=True)
client = httpx.Client(
headers={"Authorization": f"Bearer {api_key}"},
timeout=120,
)
success = 0
failed = 0
print(f"Submitting {len(pdfs)} PDFs to Eden AI (Affinda, Extracta, Klippa)...") # noqa: T201
try:
for i, pdf_path in enumerate(pdfs):
rel = pdf_path.relative_to(RENDERED_DIR)
cache_name = "_".join(rel.parts).replace(".pdf", "") + ".json"
output_path = results_dir / cache_name
if output_path.exists():
print(f" [{i + 1}/{len(pdfs)}] SKIP (cached): {rel}") # noqa: T201
success += 1
continue
try:
result = submit(client, pdf_path)
write_json(output_path, result)
success += 1
print(f" [{i + 1}/{len(pdfs)}] OK: {rel}") # noqa: T201
time.sleep(RATE_LIMIT_SECONDS)
except httpx.HTTPStatusError as e:
failed += 1
print(f" [{i + 1}/{len(pdfs)}] FAIL ({e.response.status_code}): {rel}") # noqa: T201
if e.response.status_code == 429:
print(" Rate limited. Waiting 30s...") # noqa: T201
time.sleep(30)
except Exception as e:
failed += 1
print(f" [{i + 1}/{len(pdfs)}] ERROR: {rel} - {e}") # noqa: T201
finally:
client.close()
print(f"\nDone. {success} succeeded, {failed} failed.") # noqa: T201
if __name__ == "__main__":
main()