Files
OCRmyPDF/tests/test_page_boxes.py
2025-12-23 11:20:41 -08:00

123 lines
3.0 KiB
Python

# SPDX-FileCopyrightText: 2025 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations
import pikepdf
import pytest
from .conftest import check_ocrmypdf
page_rect = [0, 0, 612, 792]
inset_rect = [200, 200, 612, 792]
wh_rect = [0, 0, 412, 592]
neg_rect = [-100, -100, 512, 692]
mediabox_testdata = [
('hocr', 'pdfa', 'ccitt.pdf', None, inset_rect, wh_rect),
('sandwich', 'pdfa', 'ccitt.pdf', None, inset_rect, wh_rect),
('hocr', 'pdf', 'ccitt.pdf', None, inset_rect, inset_rect),
('sandwich', 'pdf', 'ccitt.pdf', None, inset_rect, inset_rect),
(
'hocr',
'pdfa',
'ccitt.pdf',
'--force-ocr',
inset_rect,
wh_rect,
),
(
'hocr',
'pdf',
'ccitt.pdf',
'--force-ocr',
inset_rect,
wh_rect,
),
('hocr', 'pdfa', 'ccitt.pdf', '--force-ocr', neg_rect, page_rect),
('hocr', 'pdf', 'ccitt.pdf', '--force-ocr', neg_rect, page_rect),
]
@pytest.mark.parametrize(
'renderer, output_type, in_pdf, mode, crop_to, crop_expected', mediabox_testdata
)
def test_media_box(
resources, outdir, renderer, output_type, in_pdf, mode, crop_to, crop_expected
):
with pikepdf.open(resources / in_pdf) as pdf:
page = pdf.pages[0]
page.MediaBox = crop_to
pdf.save(outdir / 'cropped.pdf')
args = [
'--jobs',
'1',
'--pdf-renderer',
renderer,
'--output-type',
output_type,
]
if mode:
args.append(mode)
check_ocrmypdf(outdir / 'cropped.pdf', outdir / 'processed.pdf', *args)
with pikepdf.open(outdir / 'processed.pdf') as pdf:
page = pdf.pages[0]
assert page.MediaBox == crop_expected
cropbox_testdata = [
('hocr', 'pdfa', 'ccitt.pdf', None, inset_rect, inset_rect),
('sandwich', 'pdfa', 'ccitt.pdf', None, inset_rect, inset_rect),
('hocr', 'pdf', 'ccitt.pdf', None, inset_rect, inset_rect),
('sandwich', 'pdf', 'ccitt.pdf', None, inset_rect, inset_rect),
(
'hocr',
'pdfa',
'ccitt.pdf',
'--force-ocr',
inset_rect,
inset_rect,
),
(
'hocr',
'pdf',
'ccitt.pdf',
'--force-ocr',
inset_rect,
inset_rect,
),
]
@pytest.mark.parametrize(
'renderer, output_type, in_pdf, mode, crop_to, crop_expected', cropbox_testdata
)
def test_crop_box(
resources, outdir, renderer, output_type, in_pdf, mode, crop_to, crop_expected
):
with pikepdf.open(resources / in_pdf) as pdf:
page = pdf.pages[0]
page.CropBox = crop_to
pdf.save(outdir / 'cropped.pdf')
args = [
'--jobs',
'1',
'--pdf-renderer',
renderer,
'--output-type',
output_type,
'--optimize',
'0',
]
if mode:
args.append(mode)
check_ocrmypdf(outdir / 'cropped.pdf', outdir / 'processed.pdf', *args)
with pikepdf.open(outdir / 'processed.pdf') as pdf:
page = pdf.pages[0]
assert page.CropBox == crop_expected