mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-18 19:47:48 -04:00
Refactor from ImageInfo index to attribute accessing
This commit is contained in:
@@ -619,6 +619,10 @@ class PageInfo(MutableMapping):
|
||||
def yres(self):
|
||||
return self._pageinfo['yres']
|
||||
|
||||
@property
|
||||
def images(self):
|
||||
return self._pageinfo['images']
|
||||
|
||||
def __getitem__(self, item):
|
||||
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
|
||||
return self._pageinfo[item]
|
||||
|
||||
@@ -397,13 +397,13 @@ def rasterize_with_ghostscript(
|
||||
|
||||
device = 'png16m' # 24-bit
|
||||
if pageinfo.images:
|
||||
if all(image['comp'] == 1 for image in pageinfo.images):
|
||||
if all(image['bpc'] == 1 for image in pageinfo.images):
|
||||
if all(image.comp == 1 for image in pageinfo.images):
|
||||
if all(image.bpc == 1 for image in pageinfo.images):
|
||||
device = 'pngmono'
|
||||
elif all(image['bpc'] > 1 and image['color'] == 'index'
|
||||
elif all(image.bpc > 1 and image.color == 'index'
|
||||
for image in pageinfo.images):
|
||||
device = 'png256'
|
||||
elif all(image['bpc'] > 1 and image['color'] == 'gray'
|
||||
elif all(image.bpc > 1 and image.color == 'gray'
|
||||
for image in pageinfo.images):
|
||||
device = 'pnggray'
|
||||
|
||||
@@ -430,7 +430,7 @@ def preprocess_remove_background(
|
||||
|
||||
pageinfo = get_pageinfo(input_file, context)
|
||||
|
||||
if any(image['bpc'] > 1 for image in pageinfo.images):
|
||||
if any(image.bpc > 1 for image in pageinfo.images):
|
||||
leptonica.remove_background(input_file, output_file)
|
||||
else:
|
||||
log.info("{0:4d}: background removal skipped on mono page".format(
|
||||
|
||||
@@ -12,6 +12,7 @@ from ocrmypdf import leptonica
|
||||
from ocrmypdf.pdfa import file_claims_pdfa
|
||||
from ocrmypdf.exec import ghostscript
|
||||
import logging
|
||||
from math import isclose
|
||||
|
||||
|
||||
check_ocrmypdf = pytest.helpers.check_ocrmypdf
|
||||
@@ -629,7 +630,7 @@ def test_jbig2_passthrough(spoof_tesseract_cache, resources, outpdf):
|
||||
env=spoof_tesseract_cache)
|
||||
|
||||
out_pageinfo = PdfInfo(out)
|
||||
assert out_pageinfo[0].images[0]['enc'] == 'jbig2'
|
||||
assert out_pageinfo[0].images[0].enc == 'jbig2'
|
||||
|
||||
|
||||
def test_stdin(spoof_tesseract_noop, ocrmypdf_exec, resources, outpdf):
|
||||
@@ -746,8 +747,8 @@ def test_very_high_dpi(spoof_tesseract_cache, resources, outpdf):
|
||||
pdfinfo = PdfInfo(outpdf)
|
||||
|
||||
image = pdfinfo[0].images[0]
|
||||
assert image['dpi_w'] == image['dpi_h']
|
||||
assert image['dpi_w'] == 2400
|
||||
assert isclose(image.xres, image.yres)
|
||||
assert isclose(image.xres, 2400)
|
||||
|
||||
|
||||
def test_overlay(spoof_tesseract_noop, resources, outpdf):
|
||||
@@ -905,16 +906,16 @@ def test_compression_preserved(spoof_tesseract_noop, ocrmypdf_exec,
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
|
||||
if input_file.endswith('.png'):
|
||||
assert pdfimage['enc'] != 'jpeg', \
|
||||
assert pdfimage.enc != 'jpeg', \
|
||||
"Lossless compression changed to lossy!"
|
||||
elif input_file.endswith('.jpg'):
|
||||
assert pdfimage['enc'] == 'jpeg', \
|
||||
assert pdfimage.enc == 'jpeg', \
|
||||
"Lossy compression changed to lossless!"
|
||||
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
|
||||
assert pdfimage['color'] == 'rgb', \
|
||||
assert pdfimage.color == 'rgb', \
|
||||
"Colorspace changed"
|
||||
elif im.mode.startswith('L'):
|
||||
assert pdfimage['color'] == 'gray', \
|
||||
assert pdfimage.color == 'gray', \
|
||||
"Colorspace changed"
|
||||
|
||||
|
||||
@@ -950,15 +951,15 @@ def test_compression_changed(spoof_tesseract_noop, ocrmypdf_exec,
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
|
||||
if compression == 'jpeg':
|
||||
assert pdfimage['enc'] == 'jpeg'
|
||||
assert pdfimage.enc == 'jpeg'
|
||||
elif compression == 'lossless':
|
||||
assert pdfimage['enc'] == 'image'
|
||||
assert pdfimage.enc == 'image'
|
||||
|
||||
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
|
||||
assert pdfimage['color'] == 'rgb', \
|
||||
assert pdfimage.color == 'rgb', \
|
||||
"Colorspace changed"
|
||||
elif im.mode.startswith('L'):
|
||||
assert pdfimage['color'] == 'gray', \
|
||||
assert pdfimage.color == 'gray', \
|
||||
"Colorspace changed"
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from ocrmypdf import pageinfo
|
||||
from reportlab.pdfgen.canvas import Canvas
|
||||
from PIL import Image
|
||||
from tempfile import NamedTemporaryFile
|
||||
from math import isclose
|
||||
from contextlib import suppress
|
||||
import os
|
||||
import shutil
|
||||
@@ -62,12 +63,12 @@ def test_single_page_image(outdir):
|
||||
assert len(page.images) == 1
|
||||
|
||||
pdfimage = page.images[0]
|
||||
assert pdfimage['width'] == 8
|
||||
assert pdfimage['color'] == 'gray'
|
||||
assert pdfimage.width == 8
|
||||
assert pdfimage.color == 'gray'
|
||||
|
||||
# DPI in a 1"x1" is the image width
|
||||
assert abs(pdfimage['dpi_w'] - 8) < 1e-5
|
||||
assert abs(pdfimage['dpi_h'] - 8) < 1e-5
|
||||
assert isclose(pdfimage.xres, 8)
|
||||
assert isclose(pdfimage.yres, 8)
|
||||
|
||||
|
||||
def test_single_page_inline_image(outdir):
|
||||
@@ -86,9 +87,9 @@ def test_single_page_inline_image(outdir):
|
||||
pdfinfo = pageinfo.PdfInfo(filename)
|
||||
print(pdfinfo)
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
assert (pdfimage['dpi_w'] - 8) < 1e-5
|
||||
assert pdfimage['color'] != '-'
|
||||
assert pdfimage['width'] == 8
|
||||
assert isclose(pdfimage.xres, 8)
|
||||
assert pdfimage.color != '-'
|
||||
assert pdfimage.width == 8
|
||||
|
||||
|
||||
def test_jpeg(resources, outdir):
|
||||
@@ -97,8 +98,8 @@ def test_jpeg(resources, outdir):
|
||||
pdfinfo = pageinfo.PdfInfo(filename)
|
||||
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
assert pdfimage['enc'] == 'jpeg'
|
||||
assert (pdfimage['dpi_w'] - 150) < 1e-5
|
||||
assert pdfimage.enc == 'jpeg'
|
||||
assert isclose(pdfimage.xres, 150)
|
||||
|
||||
|
||||
def test_form_xobject(resources):
|
||||
@@ -106,7 +107,7 @@ def test_form_xobject(resources):
|
||||
|
||||
pdfinfo = pageinfo.PdfInfo(filename)
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
assert pdfimage['width'] == 50
|
||||
assert pdfimage.width == 50
|
||||
|
||||
|
||||
def test_no_contents(resources):
|
||||
|
||||
@@ -117,4 +117,4 @@ def test_content_preservation(ensure_tess4, resources, outpdf):
|
||||
|
||||
info = pageinfo.PdfInfo(outpdf)
|
||||
page = info[0]
|
||||
assert len(page['images']) > 1, "masked were rasterized"
|
||||
assert len(page.images) > 1, "masked were rasterized"
|
||||
Reference in New Issue
Block a user