diff --git a/.gitignore b/.gitignore index ab3a5707..c4b31d00 100644 --- a/.gitignore +++ b/.gitignore @@ -46,4 +46,5 @@ docs/_templates/ docs/Makefile src/ocrmypdf/_version.py -.idea/ \ No newline at end of file +.idea/ +.aider* diff --git a/src/ocrmypdf/api.py b/src/ocrmypdf/api.py index 0abb46cd..0d737aef 100644 --- a/src/ocrmypdf/api.py +++ b/src/ocrmypdf/api.py @@ -9,14 +9,14 @@ OCR operations programmatically without using the command line interface. Main Functions: ocr(): The primary function for OCR processing. Takes an input PDF or image file and produces an OCR'd PDF with searchable text. - + configure_logging(): Set up logging to match the command line interface behavior, with support for progress bars and colored output. Experimental Functions: _pdf_to_hocr(): Extract text from PDF pages and save as hOCR files for manual editing before final PDF generation. - + _hocr_to_ocr_pdf(): Convert hOCR files back to a searchable PDF after manual text corrections. @@ -26,10 +26,10 @@ at a time. For parallel processing, use multiple Python processes. Example: import ocrmypdf - + # Configure logging (optional) ocrmypdf.configure_logging(ocrmypdf.Verbosity.default) - + # Perform OCR ocrmypdf.ocr('input.pdf', 'output.pdf', language='eng')