Fix pikepdf 0.9.0

This commit is contained in:
James R. Barlow
2018-12-14 23:21:13 -08:00
parent 5a7a8e573b
commit ed9bb985e2
4 changed files with 12 additions and 11 deletions

View File

@@ -798,7 +798,7 @@ def ocr_tesseract_textonly_pdf(
def get_docinfo(base_pdf, options):
def from_document_info(key):
try:
s = base_pdf.metadata[key]
s = base_pdf.docinfo[key]
return str(s)
except (KeyError, TypeError):
return ''

View File

@@ -299,8 +299,9 @@ def convert_to_jbig2(pike, jbig2_groups, root, log, options):
jbig2_im_data = jbig2_im_file.read_bytes()
im_obj = pike.get_object(xref, 0)
im_obj.write(
jbig2_im_data, pikepdf.Name('/JBIG2Decode'),
jbig2_globals_dict
jbig2_im_data,
filter=pikepdf.Name('/JBIG2Decode'),
decode_parms=jbig2_globals_dict
)

View File

@@ -545,7 +545,7 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext):
fspath(infile), pageno, xmltext=xmltext, height=height_pt)
pageinfo['bboxes'] = bboxes
else:
pscript5_mode = str(pdf.metadata.get('/Creator')).startswith('PScript5')
pscript5_mode = str(pdf.docinfo.get('/Creator')).startswith('PScript5')
miner = get_page_analysis(infile, pageno, pscript5_mode)
pageinfo['textboxes'] = list(simplify_textboxes(miner))
bboxes = (box.bbox for box in pageinfo['textboxes'])

View File

@@ -25,7 +25,7 @@ from unittest.mock import patch
import datetime
import pikepdf
from pikepdf.models.metadata import encode_pdf_date, decode_pdf_date
from pikepdf.models.metadata import decode_pdf_date
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.helpers import fspath
@@ -67,7 +67,7 @@ def test_preserve_metadata(spoof_tesseract_noop, output_type,
pdf_after = pikepdf.open(output)
for key in ('/Title', '/Author'):
assert pdf_before.metadata[key] == pdf_after.metadata[key]
assert pdf_before.docinfo[key] == pdf_after.docinfo[key]
pdfa_info = file_claims_pdfa(str(output))
assert pdfa_info['output'] == output_type
@@ -94,12 +94,12 @@ def test_override_metadata(spoof_tesseract_noop, output_type, resources,
before = pikepdf.open(input_file)
after = pikepdf.open(outpdf)
assert after.metadata.Title == german, after.metadata
assert after.metadata.Author == chinese, after.metadata
assert after.metadata.get('/Keywords', '') == ''
assert after.docinfo.Title == german, after.docinfo
assert after.docinfo.Author == chinese, after.docinfo
assert after.docinfo.get('/Keywords', '') == ''
before_date = decode_pdf_date(str(before.metadata.CreationDate))
after_date = decode_pdf_date(str(after.metadata.CreationDate))
before_date = decode_pdf_date(str(before.docinfo.CreationDate))
after_date = decode_pdf_date(str(after.docinfo.CreationDate))
assert before_date == after_date
pdfa_info = file_claims_pdfa(outpdf)