mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-02-09 05:43:10 -05:00
- Add new '--output-type auto' option (now the default) that produces best-effort PDF/A without requiring Ghostscript - When verapdf is available, use speculative PDF/A conversion - Without verapdf, pass through as PDF/A if safe (input claims PDF/A or --force-ocr was used), otherwise output as regular PDF - Make Ghostscript check conditional - only required for pdfa* output types - Update soft error tests to explicitly use --output-type pdfa since they exercise Ghostscript failure modes - Fix Tesseract OSD error handling to check both stdout and stderr for known non-fatal messages like "Too few characters"
68 lines
1.8 KiB
Python
68 lines
1.8 KiB
Python
# SPDX-FileCopyrightText: 2023 James R. Barlow
|
|
# SPDX-License-Identifier: MPL-2.0
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from ocrmypdf.exceptions import ExitCode
|
|
|
|
from .conftest import run_ocrmypdf_api
|
|
|
|
|
|
def test_raster_continue_on_soft_error(resources, outpdf):
|
|
exitcode = run_ocrmypdf_api(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--continue-on-soft-render-error',
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
'--plugin',
|
|
'tests/plugins/gs_raster_soft_error.py',
|
|
)
|
|
assert exitcode == ExitCode.ok
|
|
|
|
|
|
def test_raster_stop_on_soft_error(resources, outpdf):
|
|
exitcode = run_ocrmypdf_api(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
'--plugin',
|
|
'tests/plugins/gs_raster_soft_error.py',
|
|
)
|
|
assert exitcode == ExitCode.child_process_error
|
|
|
|
|
|
def test_render_continue_on_soft_error(resources, outpdf):
|
|
exitcode = run_ocrmypdf_api(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--output-type',
|
|
'pdfa', # Required to trigger Ghostscript PDF/A generation
|
|
'--continue-on-soft-render-error',
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
'--plugin',
|
|
'tests/plugins/gs_render_soft_error.py',
|
|
)
|
|
assert exitcode == ExitCode.ok
|
|
|
|
|
|
@pytest.mark.skipif(os.name == 'nt', reason='Ghostscript on Windows errors out')
|
|
def test_render_stop_on_soft_error(resources, outpdf):
|
|
exitcode = run_ocrmypdf_api(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--output-type',
|
|
'pdfa', # Required to trigger Ghostscript PDF/A generation
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
'--plugin',
|
|
'tests/plugins/gs_render_soft_error.py',
|
|
)
|
|
assert exitcode == ExitCode.child_process_error
|