mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-04 20:54:18 -04:00
Fix pikepdf 0.9.0
This commit is contained in:
@@ -798,7 +798,7 @@ def ocr_tesseract_textonly_pdf(
|
||||
def get_docinfo(base_pdf, options):
|
||||
def from_document_info(key):
|
||||
try:
|
||||
s = base_pdf.metadata[key]
|
||||
s = base_pdf.docinfo[key]
|
||||
return str(s)
|
||||
except (KeyError, TypeError):
|
||||
return ''
|
||||
|
||||
@@ -299,8 +299,9 @@ def convert_to_jbig2(pike, jbig2_groups, root, log, options):
|
||||
jbig2_im_data = jbig2_im_file.read_bytes()
|
||||
im_obj = pike.get_object(xref, 0)
|
||||
im_obj.write(
|
||||
jbig2_im_data, pikepdf.Name('/JBIG2Decode'),
|
||||
jbig2_globals_dict
|
||||
jbig2_im_data,
|
||||
filter=pikepdf.Name('/JBIG2Decode'),
|
||||
decode_parms=jbig2_globals_dict
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -545,7 +545,7 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext):
|
||||
fspath(infile), pageno, xmltext=xmltext, height=height_pt)
|
||||
pageinfo['bboxes'] = bboxes
|
||||
else:
|
||||
pscript5_mode = str(pdf.metadata.get('/Creator')).startswith('PScript5')
|
||||
pscript5_mode = str(pdf.docinfo.get('/Creator')).startswith('PScript5')
|
||||
miner = get_page_analysis(infile, pageno, pscript5_mode)
|
||||
pageinfo['textboxes'] = list(simplify_textboxes(miner))
|
||||
bboxes = (box.bbox for box in pageinfo['textboxes'])
|
||||
|
||||
@@ -25,7 +25,7 @@ from unittest.mock import patch
|
||||
import datetime
|
||||
|
||||
import pikepdf
|
||||
from pikepdf.models.metadata import encode_pdf_date, decode_pdf_date
|
||||
from pikepdf.models.metadata import decode_pdf_date
|
||||
|
||||
from ocrmypdf.exceptions import ExitCode
|
||||
from ocrmypdf.helpers import fspath
|
||||
@@ -67,7 +67,7 @@ def test_preserve_metadata(spoof_tesseract_noop, output_type,
|
||||
pdf_after = pikepdf.open(output)
|
||||
|
||||
for key in ('/Title', '/Author'):
|
||||
assert pdf_before.metadata[key] == pdf_after.metadata[key]
|
||||
assert pdf_before.docinfo[key] == pdf_after.docinfo[key]
|
||||
|
||||
pdfa_info = file_claims_pdfa(str(output))
|
||||
assert pdfa_info['output'] == output_type
|
||||
@@ -94,12 +94,12 @@ def test_override_metadata(spoof_tesseract_noop, output_type, resources,
|
||||
before = pikepdf.open(input_file)
|
||||
after = pikepdf.open(outpdf)
|
||||
|
||||
assert after.metadata.Title == german, after.metadata
|
||||
assert after.metadata.Author == chinese, after.metadata
|
||||
assert after.metadata.get('/Keywords', '') == ''
|
||||
assert after.docinfo.Title == german, after.docinfo
|
||||
assert after.docinfo.Author == chinese, after.docinfo
|
||||
assert after.docinfo.get('/Keywords', '') == ''
|
||||
|
||||
before_date = decode_pdf_date(str(before.metadata.CreationDate))
|
||||
after_date = decode_pdf_date(str(after.metadata.CreationDate))
|
||||
before_date = decode_pdf_date(str(before.docinfo.CreationDate))
|
||||
after_date = decode_pdf_date(str(after.docinfo.CreationDate))
|
||||
assert before_date == after_date
|
||||
|
||||
pdfa_info = file_claims_pdfa(outpdf)
|
||||
|
||||
Reference in New Issue
Block a user