diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py index 0358dc78..e2fa9726 100644 --- a/src/ocrmypdf/_exec/ghostscript.py +++ b/src/ocrmypdf/_exec/ghostscript.py @@ -81,8 +81,8 @@ def rasterize_pdf( raster_device: str, raster_dpi: Resolution, pageno: int = 1, - page_dpi: Resolution = None, - rotation: int = None, + page_dpi: Optional[Resolution] = None, + rotation: Optional[int] = None, filter_vector: bool = False, ): """Rasterize one page of a PDF at resolution raster_dpi in canvas units.""" diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index a37963e3..67bb26c8 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -332,8 +332,10 @@ def rasterize_preview(input_file: Path, page_context: PageContext): output_file=output_file, raster_device='jpeggray', raster_dpi=canvas_dpi, - page_dpi=page_dpi, pageno=page_context.pageinfo.pageno + 1, + page_dpi=page_dpi, + rotation=0, + filter_vector=False, ) return output_file @@ -433,7 +435,7 @@ def rasterize( device = colorspaces[device_idx] - log.debug(f"Rasterize with {device}") + log.debug(f"Rasterize with {device}, rotation {correction}") # Produce the page image with square resolution or else deskew and OCR # will not work properly. @@ -534,6 +536,9 @@ def create_ocr_image(image: Path, page_context: PageContext): # Pillow requires integer DPI dpi = tuple(round(coord) for coord in im.info['dpi']) + if page_context.pageinfo.rotation != 0: + log.info(f"Rotating {page_context.pageinfo.rotation}") + im = im.rotate(page_context.pageinfo.rotation) im.save(output_file, dpi=dpi) return output_file diff --git a/src/ocrmypdf/builtin_plugins/ghostscript.py b/src/ocrmypdf/builtin_plugins/ghostscript.py index de21fe30..3be822d0 100644 --- a/src/ocrmypdf/builtin_plugins/ghostscript.py +++ b/src/ocrmypdf/builtin_plugins/ghostscript.py @@ -61,9 +61,9 @@ def rasterize_pdf_page( raster_device, raster_dpi, pageno, - page_dpi=None, - rotation=None, - filter_vector=False, + page_dpi, + rotation, + filter_vector, ): ghostscript.rasterize_pdf( input_file, diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py index 469b183f..b34e467f 100644 --- a/src/ocrmypdf/pluginspec.py +++ b/src/ocrmypdf/pluginspec.py @@ -89,9 +89,9 @@ def rasterize_pdf_page( raster_device: str, raster_dpi: Resolution, pageno: int, - page_dpi: Optional[Resolution] = None, - rotation: Optional[int] = None, - filter_vector: bool = False, + page_dpi: Optional[Resolution], + rotation: Optional[int], + filter_vector: bool, ) -> Path: """Rasterize one page of a PDF at resolution raster_dpi in canvas units.