Refactor from ImageInfo index to attribute accessing

This commit is contained in:
James R. Barlow
2017-05-18 18:39:14 -07:00
parent 127706153d
commit 56d2aae963
5 changed files with 33 additions and 27 deletions

View File

@@ -619,6 +619,10 @@ class PageInfo(MutableMapping):
def yres(self):
return self._pageinfo['yres']
@property
def images(self):
return self._pageinfo['images']
def __getitem__(self, item):
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
return self._pageinfo[item]

View File

@@ -397,13 +397,13 @@ def rasterize_with_ghostscript(
device = 'png16m' # 24-bit
if pageinfo.images:
if all(image['comp'] == 1 for image in pageinfo.images):
if all(image['bpc'] == 1 for image in pageinfo.images):
if all(image.comp == 1 for image in pageinfo.images):
if all(image.bpc == 1 for image in pageinfo.images):
device = 'pngmono'
elif all(image['bpc'] > 1 and image['color'] == 'index'
elif all(image.bpc > 1 and image.color == 'index'
for image in pageinfo.images):
device = 'png256'
elif all(image['bpc'] > 1 and image['color'] == 'gray'
elif all(image.bpc > 1 and image.color == 'gray'
for image in pageinfo.images):
device = 'pnggray'
@@ -430,7 +430,7 @@ def preprocess_remove_background(
pageinfo = get_pageinfo(input_file, context)
if any(image['bpc'] > 1 for image in pageinfo.images):
if any(image.bpc > 1 for image in pageinfo.images):
leptonica.remove_background(input_file, output_file)
else:
log.info("{0:4d}: background removal skipped on mono page".format(

View File

@@ -12,6 +12,7 @@ from ocrmypdf import leptonica
from ocrmypdf.pdfa import file_claims_pdfa
from ocrmypdf.exec import ghostscript
import logging
from math import isclose
check_ocrmypdf = pytest.helpers.check_ocrmypdf
@@ -629,7 +630,7 @@ def test_jbig2_passthrough(spoof_tesseract_cache, resources, outpdf):
env=spoof_tesseract_cache)
out_pageinfo = PdfInfo(out)
assert out_pageinfo[0].images[0]['enc'] == 'jbig2'
assert out_pageinfo[0].images[0].enc == 'jbig2'
def test_stdin(spoof_tesseract_noop, ocrmypdf_exec, resources, outpdf):
@@ -746,8 +747,8 @@ def test_very_high_dpi(spoof_tesseract_cache, resources, outpdf):
pdfinfo = PdfInfo(outpdf)
image = pdfinfo[0].images[0]
assert image['dpi_w'] == image['dpi_h']
assert image['dpi_w'] == 2400
assert isclose(image.xres, image.yres)
assert isclose(image.xres, 2400)
def test_overlay(spoof_tesseract_noop, resources, outpdf):
@@ -905,16 +906,16 @@ def test_compression_preserved(spoof_tesseract_noop, ocrmypdf_exec,
pdfimage = pdfinfo[0].images[0]
if input_file.endswith('.png'):
assert pdfimage['enc'] != 'jpeg', \
assert pdfimage.enc != 'jpeg', \
"Lossless compression changed to lossy!"
elif input_file.endswith('.jpg'):
assert pdfimage['enc'] == 'jpeg', \
assert pdfimage.enc == 'jpeg', \
"Lossy compression changed to lossless!"
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
assert pdfimage['color'] == 'rgb', \
assert pdfimage.color == 'rgb', \
"Colorspace changed"
elif im.mode.startswith('L'):
assert pdfimage['color'] == 'gray', \
assert pdfimage.color == 'gray', \
"Colorspace changed"
@@ -950,15 +951,15 @@ def test_compression_changed(spoof_tesseract_noop, ocrmypdf_exec,
pdfimage = pdfinfo[0].images[0]
if compression == 'jpeg':
assert pdfimage['enc'] == 'jpeg'
assert pdfimage.enc == 'jpeg'
elif compression == 'lossless':
assert pdfimage['enc'] == 'image'
assert pdfimage.enc == 'image'
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
assert pdfimage['color'] == 'rgb', \
assert pdfimage.color == 'rgb', \
"Colorspace changed"
elif im.mode.startswith('L'):
assert pdfimage['color'] == 'gray', \
assert pdfimage.color == 'gray', \
"Colorspace changed"

View File

@@ -5,6 +5,7 @@ from ocrmypdf import pageinfo
from reportlab.pdfgen.canvas import Canvas
from PIL import Image
from tempfile import NamedTemporaryFile
from math import isclose
from contextlib import suppress
import os
import shutil
@@ -62,12 +63,12 @@ def test_single_page_image(outdir):
assert len(page.images) == 1
pdfimage = page.images[0]
assert pdfimage['width'] == 8
assert pdfimage['color'] == 'gray'
assert pdfimage.width == 8
assert pdfimage.color == 'gray'
# DPI in a 1"x1" is the image width
assert abs(pdfimage['dpi_w'] - 8) < 1e-5
assert abs(pdfimage['dpi_h'] - 8) < 1e-5
assert isclose(pdfimage.xres, 8)
assert isclose(pdfimage.yres, 8)
def test_single_page_inline_image(outdir):
@@ -86,9 +87,9 @@ def test_single_page_inline_image(outdir):
pdfinfo = pageinfo.PdfInfo(filename)
print(pdfinfo)
pdfimage = pdfinfo[0].images[0]
assert (pdfimage['dpi_w'] - 8) < 1e-5
assert pdfimage['color'] != '-'
assert pdfimage['width'] == 8
assert isclose(pdfimage.xres, 8)
assert pdfimage.color != '-'
assert pdfimage.width == 8
def test_jpeg(resources, outdir):
@@ -97,8 +98,8 @@ def test_jpeg(resources, outdir):
pdfinfo = pageinfo.PdfInfo(filename)
pdfimage = pdfinfo[0].images[0]
assert pdfimage['enc'] == 'jpeg'
assert (pdfimage['dpi_w'] - 150) < 1e-5
assert pdfimage.enc == 'jpeg'
assert isclose(pdfimage.xres, 150)
def test_form_xobject(resources):
@@ -106,7 +107,7 @@ def test_form_xobject(resources):
pdfinfo = pageinfo.PdfInfo(filename)
pdfimage = pdfinfo[0].images[0]
assert pdfimage['width'] == 50
assert pdfimage.width == 50
def test_no_contents(resources):

View File

@@ -117,4 +117,4 @@ def test_content_preservation(ensure_tess4, resources, outpdf):
info = pageinfo.PdfInfo(outpdf)
page = info[0]
assert len(page['images']) > 1, "masked were rasterized"
assert len(page.images) > 1, "masked were rasterized"