mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-04 20:54:18 -04:00
Fix page rotation issue (again)
Commit1327ab3introduced a fix for a regression, which was reported in #581, #634. It appears that the actual cause of this issue was default parameters to rasterize_pdf_page in pluggy not working as expected, causing a default rotation=0 even when a rotation was needed. As such the OCR image was generated with the wrong orientation, causing the initial regression and fix in commit1327ab3. Now that the real problem is identified, it's apparent that the logic prior to1327ab3was found and we can revert to1327ab3since it fixes all known cases including #658. This reverts1327ab3except for retaining improves to rotation output.
This commit is contained in:
@@ -109,6 +109,7 @@ class OcrGrafter:
|
||||
if textpdf and not self.font:
|
||||
self.font, self.font_key = self._find_font(textpdf)
|
||||
|
||||
emplaced_page = False
|
||||
content_rotation = self.pdfinfo[pageno].rotation
|
||||
path_image = Path(image).resolve() if image else None
|
||||
if path_image is not None and path_image != self.path_base:
|
||||
@@ -122,24 +123,21 @@ class OcrGrafter:
|
||||
local_image_page = self.pdf_base.pages[-1]
|
||||
self.pdf_base.pages[pageno].emplace(local_image_page)
|
||||
del self.pdf_base.pages[-1]
|
||||
# The pdf_image_page will always be created with any /Rotate applied
|
||||
# applied already
|
||||
content_rotation = 0
|
||||
emplaced_page = True
|
||||
|
||||
if content_rotation != 0:
|
||||
# Text can be misaligned on a /Rotate'd page.
|
||||
# That is because we rasterize pages with /Rotate applied,
|
||||
# so that the OCR image text is upright and comes back upright.
|
||||
text_misaligned = (autorotate_correction - content_rotation) % 360
|
||||
log.debug(
|
||||
f"Text rotation: (autorotate, content) -> text misalignment = "
|
||||
f"({autorotate_correction}, {content_rotation}) -> {text_misaligned}"
|
||||
)
|
||||
else:
|
||||
text_misaligned = 0
|
||||
# Calculate if the text is misaligned compared to the content
|
||||
if emplaced_page:
|
||||
content_rotation = autorotate_correction
|
||||
text_rotation = autorotate_correction
|
||||
text_misaligned = (text_rotation - content_rotation) % 360
|
||||
log.debug(
|
||||
f"Text rotation: (text, autorotate, content) -> text misalignment = "
|
||||
f"({text_rotation}, {autorotate_correction}, {content_rotation}) -> {text_misaligned}"
|
||||
)
|
||||
|
||||
if textpdf and self.font:
|
||||
# Graft the text layer onto this page, whether new or old
|
||||
# Graft the text layer onto this page, whether new or old, possibly
|
||||
# rotating the text layer by the amount is misaligned.
|
||||
strip_old = self.context.options.redo_ocr
|
||||
self._graft_text_layer(
|
||||
page_num=pageno + 1,
|
||||
@@ -151,14 +149,14 @@ class OcrGrafter:
|
||||
strip_old_text=strip_old,
|
||||
)
|
||||
|
||||
# Correct the page rotation
|
||||
# Correct the overall page rotation if needed, now that the text and content
|
||||
# are aligned
|
||||
page_rotation = (content_rotation - autorotate_correction) % 360
|
||||
self.pdf_base.pages[pageno].Rotate = page_rotation
|
||||
log.debug(
|
||||
f"Page rotation: (content, auto) -> page = "
|
||||
f"({content_rotation}, {autorotate_correction}) -> {page_rotation}"
|
||||
)
|
||||
|
||||
if self.emplacements % MAX_REPLACE_PAGES == 0:
|
||||
self.save_and_reload()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user