Implement digital signature detection

This commit is contained in:
James R. Barlow
2023-08-12 01:20:20 -07:00
parent 6e20439c91
commit 45added738
3 changed files with 30 additions and 0 deletions

View File

@@ -27,6 +27,7 @@ from ocrmypdf._jobcontext import PageContext, PdfContext
from ocrmypdf._version import PROGRAM_NAME
from ocrmypdf._version import __version__ as VERSION
from ocrmypdf.exceptions import (
DigitalSignatureError,
DpiError,
EncryptedPdfError,
InputFileError,
@@ -197,6 +198,8 @@ def validate_pdfinfo_options(context: PdfContext) -> None:
"Designer and can only be read by Adobe Acrobat or Adobe Reader."
)
if pdfinfo.has_acroform:
if pdfinfo.has_signature:
raise DigitalSignatureError()
if options.redo_ocr:
raise InputFileError(
"This PDF has a user fillable form. --redo-ocr is not "

View File

@@ -108,6 +108,18 @@ class EncryptedPdfError(ExitCodeException):
)
class DigitalSignatureError(ExitCodeException):
"""PDF has a digital signature."""
exit_code = ExitCode.input_file
message = dedent(
"""\
Input PDF has a digital signature. OCR would alter the document,
invalidating the signature.
"""
)
class TesseractConfigError(ExitCodeException):
"""Tesseract config can't be parsed."""

View File

@@ -5,6 +5,7 @@ from __future__ import annotations
import logging
import pikepdf
import pytest
import ocrmypdf
@@ -31,3 +32,17 @@ def test_acroform_message(acroform, caplog, outpdf):
check_ocrmypdf(acroform, outpdf, '--plugin', 'tests/plugins/tesseract_noop.py')
assert 'fillable form' in caplog.text
assert '--force-ocr' in caplog.text
@pytest.fixture
def digitally_signed(acroform, outdir):
out = outdir / 'acroform_signed.pdf'
with pikepdf.open(acroform) as pdf:
pdf.Root.AcroForm.SigFlags = 3
pdf.save(out)
yield out
def test_digital_signature(digitally_signed, no_outpdf):
with pytest.raises(ocrmypdf.exceptions.DigitalSignatureError):
check_ocrmypdf(digitally_signed, no_outpdf)