pageinfo: fix “decimal.InvalidOperation: quantize result has too many digits”

And add new test case for this.
This commit is contained in:
James R. Barlow
2016-12-08 16:04:14 -08:00
parent 1ae1d116c7
commit e57aa0eee2
5 changed files with 14 additions and 4 deletions

View File

@@ -2,7 +2,7 @@
# © 2015 James R. Barlow: github.com/jbarlow83
from subprocess import Popen, PIPE
from decimal import Decimal, getcontext
from decimal import Decimal
from math import hypot
import re
import sys
@@ -330,9 +330,9 @@ def _find_page_regular_images(page, pageinfo, contentsinfo):
image['dpi_h'] = max(dpi_h, image.get('dpi_h', 0))
DPI_PREC = Decimal('1.000')
dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
image['dpi_w'] = Decimal(image['dpi_w']).quantize(DPI_PREC)
image['dpi_h'] = Decimal(image['dpi_h']).quantize(DPI_PREC)
dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
image['dpi'] = dpi.quantize(DPI_PREC)
yield image
@@ -407,7 +407,6 @@ def _pdf_get_pageinfo(infile, pageno: int):
def pdf_get_all_pageinfo(infile):
pdf = pypdf.PdfFileReader(infile)
getcontext().prec = 6
return [_pdf_get_pageinfo(infile, n) for n in range(pdf.numPages)]

BIN
tests/resources/2400dpi.pdf Normal file
View File

Binary file not shown.

View File

@@ -31,6 +31,9 @@ In some cases they were converted from one image format to another without other
* - LinnSequencer.jpg, linn.pdf, linn.txt
- `Wikimedia: LinnSequencer`_
- Creative Commons BY-SA 3.0
* - typewriter.png, 2400dpi.pdf
- `Wikimedia: Triumph typewrtier text Linzensoep`_
* Creative Commons BY-SA 2.5
Files generated for this project
@@ -104,4 +107,6 @@ These test resources are assemblies from other previously mentioned files, relea
.. _`Wikimedia: JPEG2000 Lichtenstein`: https://en.wikipedia.org/wiki/JPEG_2000#/media/File:Jpeg2000_2-level_wavelet_transform-lichtenstein.png
.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux
.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux
.. _`Wikimedia: Triumph typewrtier text Linzensoep`: https://commons.wikimedia.org/wiki/File:Triumph.typewriter_text_Linzensoep.gif

View File

Binary file not shown.

View File

@@ -748,3 +748,9 @@ def test_ghostscript_pdfa_failure(spoof_no_tess_no_pdfa):
def test_ghostscript_feature_elision(spoof_no_tess_pdfa_warning):
check_ocrmypdf('ccitt.pdf', 'test_feature_elision.pdf',
env=spoof_no_tess_pdfa_warning)
def test_very_high_dpi(spoof_tesseract_cache):
"Checks for a Decimal quantize error with high DPI, etc"
check_ocrmypdf('2400dpi.pdf', 'test_2400dpi.pdf',
env=spoof_tesseract_cache)