pdfinfo: replace most remaining dict-style access

This commit is contained in:
James R. Barlow
2017-05-19 16:17:36 -07:00
parent 3e73fa81bf
commit d9005a1074
5 changed files with 54 additions and 45 deletions

View File

@@ -734,7 +734,7 @@ def run_pipeline():
180: 's', 270: 'w'}
orientations = []
for n, page in enumerate(pdfinfo):
angle = pdfinfo[n].get('rotated', 0)
angle = pdfinfo[n].rotation or 0
if angle != 0:
orientations.append('{0}{1}'.format(
n + 1,

View File

@@ -592,14 +592,14 @@ def _pdf_get_pageinfo(pdf, pageno: int):
return pageinfo
def pdf_get_all_pageinfo(infile):
def _pdf_get_all_pageinfo(infile):
if isinstance(infile, Path):
infile = str(infile)
pdf = pypdf.PdfFileReader(infile)
return [PageInfo(pdf, n) for n in range(pdf.numPages)]
class PageInfo(MutableMapping):
class PageInfo:
def __init__(self, infile, pageno):
self._infile = infile
self._pageno = pageno
@@ -631,7 +631,14 @@ class PageInfo(MutableMapping):
@property
def rotation(self):
return self._pageinfo['rotate']
return self._pageinfo.get('rotate', None)
@rotation.setter
def rotation(self, value):
if value in (0, 90, 180, 270, 360, -90, -180, -270):
self._pageinfo['rotate'] = value
else:
raise ValueError("rotation must be a cardinal angle")
@property
def images(self):
@@ -645,27 +652,21 @@ class PageInfo(MutableMapping):
def yres(self):
return self._pageinfo.get('yres', None)
@property
def userunit(self):
return self._pageinfo.get('userunit', None)
@property
def min_version(self):
if self.userunit is not None:
return '1.6'
else:
return '1.5'
@property
def images(self):
return self._pageinfo['images']
def __getitem__(self, item):
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
return self._pageinfo[item]
def __len__(self):
return len(self._pageinfo)
def __iter__(self):
return iter(self._pageinfo)
def __setitem__(self, key, value):
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
self._pageinfo[key] = value
def __delitem__(self, key):
del self._pageinfo[key]
def __repr__(self):
return (
'<PageInfo '
@@ -677,13 +678,21 @@ class PageInfo(MutableMapping):
class PdfInfo:
"""Get summary information about a PDF
"""
def __init__(self, infile):
self._pages = pdf_get_all_pageinfo(infile)
self._pages = _pdf_get_all_pageinfo(infile)
@property
def pages(self):
return self._pages
@property
def min_version(self):
# The minimum PDF is the maximum version that any particular page needs
return max(page.min_version for page in self.pages)
def __getitem__(self, item):
return self._pages[item]
@@ -699,7 +708,7 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument('infile')
args = parser.parse_args()
info = pdf_get_all_pageinfo(args.infile)
info = _pdf_get_all_pageinfo(args.infile)
from pprint import pprint
pprint(info)

View File

@@ -196,16 +196,16 @@ def get_pageinfo(input_file, context):
def get_page_dpi(pageinfo, options):
"Get the DPI when nonsquare DPI is tolerable"
xres = max(pageinfo.get('xres', VECTOR_PAGE_DPI), options.oversample or 0)
yres = max(pageinfo.get('yres', VECTOR_PAGE_DPI), options.oversample or 0)
xres = max(pageinfo.xres or VECTOR_PAGE_DPI, options.oversample or 0)
yres = max(pageinfo.yres or VECTOR_PAGE_DPI, options.oversample or 0)
return (float(xres), float(yres))
def get_page_square_dpi(pageinfo, options):
"Get the DPI when we require xres == yres"
return float(max(
pageinfo.get('xres', VECTOR_PAGE_DPI),
pageinfo.get('yres', VECTOR_PAGE_DPI),
pageinfo.xres or VECTOR_PAGE_DPI,
pageinfo.yres or VECTOR_PAGE_DPI,
options.oversample or 0))
@@ -383,7 +383,7 @@ def orient_page(
pageno = int(os.path.basename(page_pdf)[0:6]) - 1
pdfinfo = context.get_pdfinfo()
pdfinfo[pageno]['rotated'] = orient_conf.angle
pdfinfo[pageno].rotation = orient_conf.angle
context.set_pdfinfo(pdfinfo)

View File

@@ -132,4 +132,4 @@ def first_page_dimensions(pdf):
from ocrmypdf import pdfinfo
info = pdfinfo.PdfInfo(pdf)
page0 = info[0]
return (page0['width_inches'], page0['height_inches'])
return (page0.width_inches, page0.height_inches)

View File

@@ -28,10 +28,10 @@ def test_single_page_text(outdir):
pdf.showPage()
pdf.save()
pdfinfo = pdfinfo.PdfInfo(filename)
info = pdfinfo.PdfInfo(filename)
assert len(pdfinfo) == 1
page = pdfinfo[0]
assert len(info) == 1
page = info[0]
assert page.has_text
assert len(page.images) == 0
@@ -55,10 +55,10 @@ def test_single_page_image(outdir):
layout_fun=layout_fun)
filename.write_bytes(pdf_bytes)
pdfinfo = pdfinfo.PdfInfo(filename)
info = pdfinfo.PdfInfo(filename)
assert len(pdfinfo) == 1
page = pdfinfo[0]
assert len(info) == 1
page = info[0]
assert not page.has_text
assert len(page.images) == 1
@@ -85,9 +85,9 @@ def test_single_page_inline_image(outdir):
pdf.showPage()
pdf.save()
pdfinfo = pdfinfo.PdfInfo(filename)
print(pdfinfo)
pdfimage = pdfinfo[0].images[0]
pdf = pdfinfo.PdfInfo(filename)
print(pdf)
pdfimage = pdf[0].images[0]
assert isclose(pdfimage.xres, 8)
assert pdfimage.color == Colorspace.rgb # reportlab produces color image
assert pdfimage.width == 8
@@ -96,9 +96,9 @@ def test_single_page_inline_image(outdir):
def test_jpeg(resources, outdir):
filename = resources / 'c02-22.pdf'
pdfinfo = pdfinfo.PdfInfo(filename)
pdf = pdfinfo.PdfInfo(filename)
pdfimage = pdfinfo[0].images[0]
pdfimage = pdf[0].images[0]
assert pdfimage.enc == Encoding.jpeg
assert isclose(pdfimage.xres, 150)
@@ -106,14 +106,14 @@ def test_jpeg(resources, outdir):
def test_form_xobject(resources):
filename = resources / 'formxobject.pdf'
pdfinfo = pdfinfo.PdfInfo(filename)
pdfimage = pdfinfo[0].images[0]
pdf = pdfinfo.PdfInfo(filename)
pdfimage = pdf[0].images[0]
assert pdfimage.width == 50
def test_no_contents(resources):
filename = resources / 'no_contents.pdf'
pdfinfo = pdfinfo.PdfInfo(filename)
assert len(pdfinfo[0].images) == 0
assert pdfinfo[0].has_text == False
pdf = pdfinfo.PdfInfo(filename)
assert len(pdf[0].images) == 0
assert pdf[0].has_text == False