mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-12-23 22:28:05 -05:00
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
|
# SPDX-License-Identifier: MPL-2.0
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
|
|
import pikepdf
|
|
import pytest
|
|
|
|
from ocrmypdf.exceptions import MissingDependencyError
|
|
|
|
from .conftest import check_ocrmypdf
|
|
|
|
|
|
@pytest.mark.parametrize('optimize', (0, 3))
|
|
@pytest.mark.parametrize('pdfa_level', (1, 2, 3))
|
|
def test_pdfa(resources, outpdf, optimize, pdfa_level):
|
|
try:
|
|
check_ocrmypdf(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
f'--output-type=pdfa-{pdfa_level}',
|
|
f'--optimize={optimize}',
|
|
)
|
|
except MissingDependencyError as e:
|
|
if 'pngquant' in str(e) and optimize in (2, 3) and os.name == 'nt':
|
|
pytest.xfail("pngquant currently not available on Windows")
|
|
if pdfa_level in (2, 3):
|
|
# PDF/A-2 allows ObjStm
|
|
assert b'/ObjStm' in outpdf.read_bytes()
|
|
elif pdfa_level == 1:
|
|
# PDF/A-1 might allow ObjStm, but Acrobat does not approve it, so
|
|
# we don't use it
|
|
assert b'/ObjStm' not in outpdf.read_bytes()
|
|
|
|
with pikepdf.open(outpdf) as pdf:
|
|
with pdf.open_metadata() as m:
|
|
assert m.pdfa_status == f'{pdfa_level}B'
|