pageinfo: fix “decimal.InvalidOperation: quantize result has too many digits”

And add new test case for this.
2026-05-24 22:46:07 -04:00 · 2016-12-08 16:04:14 -08:00
parent 1ae1d116c7
commit e57aa0eee2
5 changed files with 14 additions and 4 deletions
--- a/ocrmypdf/pageinfo.py
+++ b/ocrmypdf/pageinfo.py
@@ -2,7 +2,7 @@
 # © 2015 James R. Barlow: github.com/jbarlow83

 from subprocess import Popen, PIPE
-from decimal import Decimal, getcontext
+from decimal import Decimal
 from math import hypot
 import re
 import sys
@@ -330,9 +330,9 @@ def _find_page_regular_images(page, pageinfo, contentsinfo):
            image['dpi_h'] = max(dpi_h, image.get('dpi_h', 0))

        DPI_PREC = Decimal('1.000')
+        dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
        image['dpi_w'] = Decimal(image['dpi_w']).quantize(DPI_PREC)
        image['dpi_h'] = Decimal(image['dpi_h']).quantize(DPI_PREC)
-        dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
        image['dpi'] = dpi.quantize(DPI_PREC)
        yield image

@@ -407,7 +407,6 @@ def _pdf_get_pageinfo(infile, pageno: int):

 def pdf_get_all_pageinfo(infile):
    pdf = pypdf.PdfFileReader(infile)
-    getcontext().prec = 6
    return [_pdf_get_pageinfo(infile, n) for n in range(pdf.numPages)]


--- a/tests/resources/2400dpi.pdf
+++ b/tests/resources/2400dpi.pdf
--- a/tests/resources/README.rst
+++ b/tests/resources/README.rst
@@ -31,6 +31,9 @@ In some cases they were converted from one image format to another without other
    *   - LinnSequencer.jpg, linn.pdf, linn.txt
        - `Wikimedia: LinnSequencer`_
        - Creative Commons BY-SA 3.0
+    *   - typewriter.png, 2400dpi.pdf
+        - `Wikimedia: Triumph typewrtier text Linzensoep`_
+        * Creative Commons BY-SA 2.5


 Files generated for this project
@@ -104,4 +107,6 @@ These test resources are assemblies from other previously mentioned files, relea

 .. _`Wikimedia: JPEG2000 Lichtenstein`: https://en.wikipedia.org/wiki/JPEG_2000#/media/File:Jpeg2000_2-level_wavelet_transform-lichtenstein.png

-.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux 
+.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux 
+
+.. _`Wikimedia: Triumph typewrtier text Linzensoep`: https://commons.wikimedia.org/wiki/File:Triumph.typewriter_text_Linzensoep.gif
--- a/tests/resources/typewriter.png
+++ b/tests/resources/typewriter.png
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -748,3 +748,9 @@ def test_ghostscript_pdfa_failure(spoof_no_tess_no_pdfa):
 def test_ghostscript_feature_elision(spoof_no_tess_pdfa_warning):
    check_ocrmypdf('ccitt.pdf', 'test_feature_elision.pdf',
                   env=spoof_no_tess_pdfa_warning)
+
+
+def test_very_high_dpi(spoof_tesseract_cache):
+    "Checks for a Decimal quantize error with high DPI, etc"
+    check_ocrmypdf('2400dpi.pdf', 'test_2400dpi.pdf',
+                   env=spoof_tesseract_cache)