From eecab9b95d64cf684147ec6edc594f6a4df1a18a Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Wed, 9 Nov 2016 05:41:12 -0800 Subject: [PATCH] =?UTF-8?q?pdfa:=20fix=20KeyError=20on=20pdfa=5Fdict=20if?= =?UTF-8?q?=20document=20has=20some=20xmp=20metadata=20but=20not=20exactly?= =?UTF-8?q?=20what=20we=E2=80=99re=20looking=20for?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrmypdf/pdfa.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ocrmypdf/pdfa.py b/ocrmypdf/pdfa.py index 115de315..4b1e7d2a 100644 --- a/ocrmypdf/pdfa.py +++ b/ocrmypdf/pdfa.py @@ -140,21 +140,20 @@ def file_claims_pdfa(filename): 'conformance': 'No XMP metadata'} pdfa_dict = {attr.localName: attr.value for attr in pdfa_nodes} - pdfa_dict['pass'] = False - pdfa_dict['output'] = 'pdf' - if pdfa_dict: - part_conformance = pdfa_dict['part'] + pdfa_dict['conformance'] - valid_part_conforms = {'1A', '1B', '2A', '2B', '2U', '3A', '3B', '3U'} + if not pdfa_dict: + return {'pass': False, 'output': 'pdf', + 'conformance': 'No XMP metadata'} - conformance = 'PDF/A-{}'.format( - part_conformance) + part_conformance = pdfa_dict['part'] + pdfa_dict['conformance'] + valid_part_conforms = {'1A', '1B', '2A', '2B', '2U', '3A', '3B', '3U'} - if part_conformance in valid_part_conforms: - pdfa_dict['pass'] = True - pdfa_dict['output'] = 'pdfa' - pdfa_dict['conformance'] = conformance - else: - pdfa_dict['conformance'] = 'PDF' + conformance = 'PDF/A-{}'.format( + part_conformance) + + if part_conformance in valid_part_conforms: + pdfa_dict['pass'] = True + pdfa_dict['output'] = 'pdfa' + pdfa_dict['conformance'] = conformance return pdfa_dict