Files
OCRmyPDF/tests/test_soft_error.py
James R. Barlow 0c4ee5af4e Add 'auto' output type for best-effort PDF/A without Ghostscript
- Add new '--output-type auto' option (now the default) that produces
  best-effort PDF/A without requiring Ghostscript
- When verapdf is available, use speculative PDF/A conversion
- Without verapdf, pass through as PDF/A if safe (input claims PDF/A
  or --force-ocr was used), otherwise output as regular PDF
- Make Ghostscript check conditional - only required for pdfa* output types
- Update soft error tests to explicitly use --output-type pdfa since they
  exercise Ghostscript failure modes
- Fix Tesseract OSD error handling to check both stdout and stderr for
  known non-fatal messages like "Too few characters"
2026-01-09 00:56:00 -08:00

68 lines
1.8 KiB
Python

# SPDX-FileCopyrightText: 2023 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
from __future__ import annotations
import os
import pytest
from ocrmypdf.exceptions import ExitCode
from .conftest import run_ocrmypdf_api
def test_raster_continue_on_soft_error(resources, outpdf):
exitcode = run_ocrmypdf_api(
resources / 'francais.pdf',
outpdf,
'--continue-on-soft-render-error',
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_raster_soft_error.py',
)
assert exitcode == ExitCode.ok
def test_raster_stop_on_soft_error(resources, outpdf):
exitcode = run_ocrmypdf_api(
resources / 'francais.pdf',
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_raster_soft_error.py',
)
assert exitcode == ExitCode.child_process_error
def test_render_continue_on_soft_error(resources, outpdf):
exitcode = run_ocrmypdf_api(
resources / 'francais.pdf',
outpdf,
'--output-type',
'pdfa', # Required to trigger Ghostscript PDF/A generation
'--continue-on-soft-render-error',
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_render_soft_error.py',
)
assert exitcode == ExitCode.ok
@pytest.mark.skipif(os.name == 'nt', reason='Ghostscript on Windows errors out')
def test_render_stop_on_soft_error(resources, outpdf):
exitcode = run_ocrmypdf_api(
resources / 'francais.pdf',
outpdf,
'--output-type',
'pdfa', # Required to trigger Ghostscript PDF/A generation
'--plugin',
'tests/plugins/tesseract_noop.py',
'--plugin',
'tests/plugins/gs_render_soft_error.py',
)
assert exitcode == ExitCode.child_process_error