mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-05 13:16:55 -04:00
pdfinfo: replace most remaining dict-style access
This commit is contained in:
@@ -734,7 +734,7 @@ def run_pipeline():
|
||||
180: 's', 270: 'w'}
|
||||
orientations = []
|
||||
for n, page in enumerate(pdfinfo):
|
||||
angle = pdfinfo[n].get('rotated', 0)
|
||||
angle = pdfinfo[n].rotation or 0
|
||||
if angle != 0:
|
||||
orientations.append('{0}{1}'.format(
|
||||
n + 1,
|
||||
|
||||
@@ -592,14 +592,14 @@ def _pdf_get_pageinfo(pdf, pageno: int):
|
||||
return pageinfo
|
||||
|
||||
|
||||
def pdf_get_all_pageinfo(infile):
|
||||
def _pdf_get_all_pageinfo(infile):
|
||||
if isinstance(infile, Path):
|
||||
infile = str(infile)
|
||||
pdf = pypdf.PdfFileReader(infile)
|
||||
return [PageInfo(pdf, n) for n in range(pdf.numPages)]
|
||||
|
||||
|
||||
class PageInfo(MutableMapping):
|
||||
class PageInfo:
|
||||
def __init__(self, infile, pageno):
|
||||
self._infile = infile
|
||||
self._pageno = pageno
|
||||
@@ -631,7 +631,14 @@ class PageInfo(MutableMapping):
|
||||
|
||||
@property
|
||||
def rotation(self):
|
||||
return self._pageinfo['rotate']
|
||||
return self._pageinfo.get('rotate', None)
|
||||
|
||||
@rotation.setter
|
||||
def rotation(self, value):
|
||||
if value in (0, 90, 180, 270, 360, -90, -180, -270):
|
||||
self._pageinfo['rotate'] = value
|
||||
else:
|
||||
raise ValueError("rotation must be a cardinal angle")
|
||||
|
||||
@property
|
||||
def images(self):
|
||||
@@ -645,27 +652,21 @@ class PageInfo(MutableMapping):
|
||||
def yres(self):
|
||||
return self._pageinfo.get('yres', None)
|
||||
|
||||
@property
|
||||
def userunit(self):
|
||||
return self._pageinfo.get('userunit', None)
|
||||
|
||||
@property
|
||||
def min_version(self):
|
||||
if self.userunit is not None:
|
||||
return '1.6'
|
||||
else:
|
||||
return '1.5'
|
||||
|
||||
@property
|
||||
def images(self):
|
||||
return self._pageinfo['images']
|
||||
|
||||
def __getitem__(self, item):
|
||||
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
|
||||
return self._pageinfo[item]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._pageinfo)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._pageinfo)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
warnings.warn("pageinfo[item] is deprecated", DeprecationWarning)
|
||||
self._pageinfo[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._pageinfo[key]
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
'<PageInfo '
|
||||
@@ -677,13 +678,21 @@ class PageInfo(MutableMapping):
|
||||
|
||||
|
||||
class PdfInfo:
|
||||
"""Get summary information about a PDF
|
||||
|
||||
"""
|
||||
def __init__(self, infile):
|
||||
self._pages = pdf_get_all_pageinfo(infile)
|
||||
self._pages = _pdf_get_all_pageinfo(infile)
|
||||
|
||||
@property
|
||||
def pages(self):
|
||||
return self._pages
|
||||
|
||||
@property
|
||||
def min_version(self):
|
||||
# The minimum PDF is the maximum version that any particular page needs
|
||||
return max(page.min_version for page in self.pages)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._pages[item]
|
||||
|
||||
@@ -699,7 +708,7 @@ def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('infile')
|
||||
args = parser.parse_args()
|
||||
info = pdf_get_all_pageinfo(args.infile)
|
||||
info = _pdf_get_all_pageinfo(args.infile)
|
||||
from pprint import pprint
|
||||
pprint(info)
|
||||
|
||||
|
||||
@@ -196,16 +196,16 @@ def get_pageinfo(input_file, context):
|
||||
|
||||
def get_page_dpi(pageinfo, options):
|
||||
"Get the DPI when nonsquare DPI is tolerable"
|
||||
xres = max(pageinfo.get('xres', VECTOR_PAGE_DPI), options.oversample or 0)
|
||||
yres = max(pageinfo.get('yres', VECTOR_PAGE_DPI), options.oversample or 0)
|
||||
xres = max(pageinfo.xres or VECTOR_PAGE_DPI, options.oversample or 0)
|
||||
yres = max(pageinfo.yres or VECTOR_PAGE_DPI, options.oversample or 0)
|
||||
return (float(xres), float(yres))
|
||||
|
||||
|
||||
def get_page_square_dpi(pageinfo, options):
|
||||
"Get the DPI when we require xres == yres"
|
||||
return float(max(
|
||||
pageinfo.get('xres', VECTOR_PAGE_DPI),
|
||||
pageinfo.get('yres', VECTOR_PAGE_DPI),
|
||||
pageinfo.xres or VECTOR_PAGE_DPI,
|
||||
pageinfo.yres or VECTOR_PAGE_DPI,
|
||||
options.oversample or 0))
|
||||
|
||||
|
||||
@@ -383,7 +383,7 @@ def orient_page(
|
||||
|
||||
pageno = int(os.path.basename(page_pdf)[0:6]) - 1
|
||||
pdfinfo = context.get_pdfinfo()
|
||||
pdfinfo[pageno]['rotated'] = orient_conf.angle
|
||||
pdfinfo[pageno].rotation = orient_conf.angle
|
||||
context.set_pdfinfo(pdfinfo)
|
||||
|
||||
|
||||
|
||||
@@ -132,4 +132,4 @@ def first_page_dimensions(pdf):
|
||||
from ocrmypdf import pdfinfo
|
||||
info = pdfinfo.PdfInfo(pdf)
|
||||
page0 = info[0]
|
||||
return (page0['width_inches'], page0['height_inches'])
|
||||
return (page0.width_inches, page0.height_inches)
|
||||
|
||||
@@ -28,10 +28,10 @@ def test_single_page_text(outdir):
|
||||
pdf.showPage()
|
||||
pdf.save()
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
info = pdfinfo.PdfInfo(filename)
|
||||
|
||||
assert len(pdfinfo) == 1
|
||||
page = pdfinfo[0]
|
||||
assert len(info) == 1
|
||||
page = info[0]
|
||||
|
||||
assert page.has_text
|
||||
assert len(page.images) == 0
|
||||
@@ -55,10 +55,10 @@ def test_single_page_image(outdir):
|
||||
layout_fun=layout_fun)
|
||||
filename.write_bytes(pdf_bytes)
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
info = pdfinfo.PdfInfo(filename)
|
||||
|
||||
assert len(pdfinfo) == 1
|
||||
page = pdfinfo[0]
|
||||
assert len(info) == 1
|
||||
page = info[0]
|
||||
|
||||
assert not page.has_text
|
||||
assert len(page.images) == 1
|
||||
@@ -85,9 +85,9 @@ def test_single_page_inline_image(outdir):
|
||||
pdf.showPage()
|
||||
pdf.save()
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
print(pdfinfo)
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
pdf = pdfinfo.PdfInfo(filename)
|
||||
print(pdf)
|
||||
pdfimage = pdf[0].images[0]
|
||||
assert isclose(pdfimage.xres, 8)
|
||||
assert pdfimage.color == Colorspace.rgb # reportlab produces color image
|
||||
assert pdfimage.width == 8
|
||||
@@ -96,9 +96,9 @@ def test_single_page_inline_image(outdir):
|
||||
def test_jpeg(resources, outdir):
|
||||
filename = resources / 'c02-22.pdf'
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
pdf = pdfinfo.PdfInfo(filename)
|
||||
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
pdfimage = pdf[0].images[0]
|
||||
assert pdfimage.enc == Encoding.jpeg
|
||||
assert isclose(pdfimage.xres, 150)
|
||||
|
||||
@@ -106,14 +106,14 @@ def test_jpeg(resources, outdir):
|
||||
def test_form_xobject(resources):
|
||||
filename = resources / 'formxobject.pdf'
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
pdfimage = pdfinfo[0].images[0]
|
||||
pdf = pdfinfo.PdfInfo(filename)
|
||||
pdfimage = pdf[0].images[0]
|
||||
assert pdfimage.width == 50
|
||||
|
||||
|
||||
def test_no_contents(resources):
|
||||
filename = resources / 'no_contents.pdf'
|
||||
|
||||
pdfinfo = pdfinfo.PdfInfo(filename)
|
||||
assert len(pdfinfo[0].images) == 0
|
||||
assert pdfinfo[0].has_text == False
|
||||
pdf = pdfinfo.PdfInfo(filename)
|
||||
assert len(pdf[0].images) == 0
|
||||
assert pdf[0].has_text == False
|
||||
Reference in New Issue
Block a user