mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-19 12:04:44 -04:00
pageinfo: fix “decimal.InvalidOperation: quantize result has too many digits”
And add new test case for this.
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# © 2015 James R. Barlow: github.com/jbarlow83
|
||||
|
||||
from subprocess import Popen, PIPE
|
||||
from decimal import Decimal, getcontext
|
||||
from decimal import Decimal
|
||||
from math import hypot
|
||||
import re
|
||||
import sys
|
||||
@@ -330,9 +330,9 @@ def _find_page_regular_images(page, pageinfo, contentsinfo):
|
||||
image['dpi_h'] = max(dpi_h, image.get('dpi_h', 0))
|
||||
|
||||
DPI_PREC = Decimal('1.000')
|
||||
dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
|
||||
image['dpi_w'] = Decimal(image['dpi_w']).quantize(DPI_PREC)
|
||||
image['dpi_h'] = Decimal(image['dpi_h']).quantize(DPI_PREC)
|
||||
dpi = Decimal(image['dpi_w'] * image['dpi_h']).sqrt()
|
||||
image['dpi'] = dpi.quantize(DPI_PREC)
|
||||
yield image
|
||||
|
||||
@@ -407,7 +407,6 @@ def _pdf_get_pageinfo(infile, pageno: int):
|
||||
|
||||
def pdf_get_all_pageinfo(infile):
|
||||
pdf = pypdf.PdfFileReader(infile)
|
||||
getcontext().prec = 6
|
||||
return [_pdf_get_pageinfo(infile, n) for n in range(pdf.numPages)]
|
||||
|
||||
|
||||
|
||||
BIN
tests/resources/2400dpi.pdf
Normal file
BIN
tests/resources/2400dpi.pdf
Normal file
Binary file not shown.
@@ -31,6 +31,9 @@ In some cases they were converted from one image format to another without other
|
||||
* - LinnSequencer.jpg, linn.pdf, linn.txt
|
||||
- `Wikimedia: LinnSequencer`_
|
||||
- Creative Commons BY-SA 3.0
|
||||
* - typewriter.png, 2400dpi.pdf
|
||||
- `Wikimedia: Triumph typewrtier text Linzensoep`_
|
||||
* Creative Commons BY-SA 2.5
|
||||
|
||||
|
||||
Files generated for this project
|
||||
@@ -104,4 +107,6 @@ These test resources are assemblies from other previously mentioned files, relea
|
||||
|
||||
.. _`Wikimedia: JPEG2000 Lichtenstein`: https://en.wikipedia.org/wiki/JPEG_2000#/media/File:Jpeg2000_2-level_wavelet_transform-lichtenstein.png
|
||||
|
||||
.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux
|
||||
.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux
|
||||
|
||||
.. _`Wikimedia: Triumph typewrtier text Linzensoep`: https://commons.wikimedia.org/wiki/File:Triumph.typewriter_text_Linzensoep.gif
|
||||
BIN
tests/resources/typewriter.png
Normal file
BIN
tests/resources/typewriter.png
Normal file
Binary file not shown.
@@ -748,3 +748,9 @@ def test_ghostscript_pdfa_failure(spoof_no_tess_no_pdfa):
|
||||
def test_ghostscript_feature_elision(spoof_no_tess_pdfa_warning):
|
||||
check_ocrmypdf('ccitt.pdf', 'test_feature_elision.pdf',
|
||||
env=spoof_no_tess_pdfa_warning)
|
||||
|
||||
|
||||
def test_very_high_dpi(spoof_tesseract_cache):
|
||||
"Checks for a Decimal quantize error with high DPI, etc"
|
||||
check_ocrmypdf('2400dpi.pdf', 'test_2400dpi.pdf',
|
||||
env=spoof_tesseract_cache)
|
||||
|
||||
Reference in New Issue
Block a user