diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 01f346b8..aa27fd86 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -27,6 +27,7 @@ from ocrmypdf._jobcontext import PageContext, PdfContext from ocrmypdf._version import PROGRAM_NAME from ocrmypdf._version import __version__ as VERSION from ocrmypdf.exceptions import ( + DigitalSignatureError, DpiError, EncryptedPdfError, InputFileError, @@ -197,6 +198,8 @@ def validate_pdfinfo_options(context: PdfContext) -> None: "Designer and can only be read by Adobe Acrobat or Adobe Reader." ) if pdfinfo.has_acroform: + if pdfinfo.has_signature: + raise DigitalSignatureError() if options.redo_ocr: raise InputFileError( "This PDF has a user fillable form. --redo-ocr is not " diff --git a/src/ocrmypdf/exceptions.py b/src/ocrmypdf/exceptions.py index 3db7328f..7c1a6414 100644 --- a/src/ocrmypdf/exceptions.py +++ b/src/ocrmypdf/exceptions.py @@ -108,6 +108,18 @@ class EncryptedPdfError(ExitCodeException): ) +class DigitalSignatureError(ExitCodeException): + """PDF has a digital signature.""" + + exit_code = ExitCode.input_file + message = dedent( + """\ + Input PDF has a digital signature. OCR would alter the document, + invalidating the signature. + """ + ) + + class TesseractConfigError(ExitCodeException): """Tesseract config can't be parsed.""" diff --git a/tests/test_acroform.py b/tests/test_acroform.py index b45d2d4e..90e885a0 100644 --- a/tests/test_acroform.py +++ b/tests/test_acroform.py @@ -5,6 +5,7 @@ from __future__ import annotations import logging +import pikepdf import pytest import ocrmypdf @@ -31,3 +32,17 @@ def test_acroform_message(acroform, caplog, outpdf): check_ocrmypdf(acroform, outpdf, '--plugin', 'tests/plugins/tesseract_noop.py') assert 'fillable form' in caplog.text assert '--force-ocr' in caplog.text + + +@pytest.fixture +def digitally_signed(acroform, outdir): + out = outdir / 'acroform_signed.pdf' + with pikepdf.open(acroform) as pdf: + pdf.Root.AcroForm.SigFlags = 3 + pdf.save(out) + yield out + + +def test_digital_signature(digitally_signed, no_outpdf): + with pytest.raises(ocrmypdf.exceptions.DigitalSignatureError): + check_ocrmypdf(digitally_signed, no_outpdf)