diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 97d5b718..6de1f2e9 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -206,17 +206,21 @@ def validate_pdfinfo_options(context: PdfContext): context.plugin_manager.hook.validate(pdfinfo=pdfinfo, options=options) +def _vector_page_dpi(pageinfo): + return VECTOR_PAGE_DPI if pageinfo.has_vector or pageinfo.has_text else 0.0 + + def get_page_dpi(pageinfo, options): "Get the DPI when nonsquare DPI is tolerable" xres = max( pageinfo.dpi.x or VECTOR_PAGE_DPI, options.oversample or 0.0, - VECTOR_PAGE_DPI if pageinfo.has_vector else 0.0, + _vector_page_dpi(pageinfo), ) yres = max( pageinfo.dpi.y or VECTOR_PAGE_DPI, options.oversample or 0, - VECTOR_PAGE_DPI if pageinfo.has_vector else 0.0, + _vector_page_dpi(pageinfo), ) return Resolution(float(xres), float(yres)) @@ -230,7 +234,7 @@ def get_page_square_dpi(pageinfo, options) -> Resolution: max( (xres * userunit) or VECTOR_PAGE_DPI, (yres * userunit) or VECTOR_PAGE_DPI, - VECTOR_PAGE_DPI if pageinfo.has_vector else 0.0, + _vector_page_dpi(pageinfo), options.oversample or 0.0, ) ) @@ -243,7 +247,7 @@ def get_canvas_square_dpi(pageinfo, options) -> Resolution: max( (pageinfo.dpi.x) or VECTOR_PAGE_DPI, (pageinfo.dpi.y) or VECTOR_PAGE_DPI, - VECTOR_PAGE_DPI if pageinfo.has_vector else 0.0, + _vector_page_dpi(pageinfo), options.oversample or 0.0, ) ) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 00000000..64254e84 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,63 @@ +# © 2021 James R. Barlow: github.com/jbarlow83 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from unittest.mock import Mock + +import pytest +from PIL import Image +from reportlab.lib.units import inch +from reportlab.lib.utils import ImageReader +from reportlab.pdfgen.canvas import Canvas + +from ocrmypdf import _pipeline, pdfinfo +from ocrmypdf.helpers import Resolution + + +@pytest.fixture(scope='session') +def rgb_image(): + im = Image.new('RGB', (8, 8)) + im.putpixel((4, 4), (255, 0, 0)) + im.putpixel((5, 5), (0, 255, 0)) + im.putpixel((6, 6), (0, 0, 255)) + return ImageReader(im) + + +DUMMY_OVERSAMPLE_RESOLUTION = Resolution(42.0, 42.0) +VECTOR_RESOLUTION = Resolution(_pipeline.VECTOR_PAGE_DPI, _pipeline.VECTOR_PAGE_DPI) + + +@pytest.mark.parametrize( + 'image, text, vector, result', + [ + (False, False, False, VECTOR_RESOLUTION), + (False, True, False, VECTOR_RESOLUTION), + (True, False, False, DUMMY_OVERSAMPLE_RESOLUTION), + (True, True, False, VECTOR_RESOLUTION), + (False, False, True, VECTOR_RESOLUTION), + (False, True, True, VECTOR_RESOLUTION), + (True, False, True, VECTOR_RESOLUTION), + (True, True, True, VECTOR_RESOLUTION), + ], +) +def test_dpi_needed(image, text, vector, result, rgb_image, outdir): + + c = Canvas(str(outdir / 'dpi.pdf'), pagesize=(5 * inch, 5 * inch)) + if image: + c.drawImage(rgb_image, 1 * inch, 1 * inch, width=1 * inch, height=1 * inch) + if text: + c.drawString(1 * inch, 4 * inch, "Actual text") + if vector: + c.ellipse(3 * inch, 3 * inch, 4 * inch, 4 * inch) + c.showPage() + c.save() + + mock = Mock() + mock.oversample = DUMMY_OVERSAMPLE_RESOLUTION[0] + + pi = pdfinfo.PdfInfo(outdir / 'dpi.pdf') + + assert _pipeline.get_canvas_square_dpi(pi[0], mock) == result + assert _pipeline.get_page_square_dpi(pi[0], mock) == result