From fb8b161f6c0faaea66cb9f4e00d416983a5130d4 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Wed, 3 Oct 2018 17:39:50 -0700 Subject: [PATCH] Fix suppression of tesseract config error messages --- src/ocrmypdf/__main__.py | 15 +++++++++------ src/ocrmypdf/exec/tesseract.py | 7 ++++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/ocrmypdf/__main__.py b/src/ocrmypdf/__main__.py index a9d0e9ff..47fa4c63 100755 --- a/src/ocrmypdf/__main__.py +++ b/src/ocrmypdf/__main__.py @@ -624,6 +624,11 @@ def do_ruffus_exception(ruffus_five_tuple, options, log): # rather than a str. So reach into the object and get its name. exc_name = exc_name.__name__ + if exc_name.startswith('ocrmypdf.exceptions.'): + base_exc_name = exc_name.replace('ocrmypdf.exceptions.', '') + exc_class = getattr(ocrmypdf_exceptions, base_exc_name) + exit_code = getattr(exc_class, 'exit_code', ExitCode.other_error) + if exc_name in ('builtins.SystemExit', 'SystemExit'): match = re.search(r"\.(.+?)\)", exc_value) exit_code_name = match.groups()[0] @@ -652,7 +657,6 @@ def do_ruffus_exception(ruffus_five_tuple, options, log): qpdf --decrypt [--password=[password]] infilename """)) - exit_code = ExitCode.encrypted_pdf elif exc_name == 'ocrmypdf.exceptions.PdfMergeFailedError': log.error(textwrap.dedent("""\ Failed to merge PDF image layer with OCR layer @@ -663,11 +667,10 @@ def do_ruffus_exception(ruffus_five_tuple, options, log): Try using ocrmypdf --pdf-renderer sandwich [..other args..] """)) - exit_code = ExitCode.input_file - elif exc_name.startswith('ocrmypdf.exceptions.'): - base_exc_name = exc_name.replace('ocrmypdf.exceptions.', '') - exc_class = getattr(ocrmypdf_exceptions, base_exc_name) - exit_code = exc_class.exit_code + elif exc_name == 'ocrmypdf.exceptions.TesseractConfigError': + log.error(textwrap.dedent("""\ + Error occurred while parsing a tesseract configuration file + """)) elif exc_name == 'PIL.Image.DecompressionBombError': msg = cleanup_ruffus_error_message(exc_value) msg += ("\nUse the --max-image-mpixels argument to set increase the " diff --git a/src/ocrmypdf/exec/tesseract.py b/src/ocrmypdf/exec/tesseract.py index 867c86cc..1dce012c 100644 --- a/src/ocrmypdf/exec/tesseract.py +++ b/src/ocrmypdf/exec/tesseract.py @@ -190,13 +190,14 @@ def tesseract_log_output(log, stdout, input_file): pass # Appears to be spurious/problem with nonwhite borders elif 'Error in boxClipToRectangle' in line: pass # Always appears with pixScanForForeground message + elif 'parameter not found: ' in line.lower(): + log.error(prefix + line.strip()) + problem = line.split('found: ')[1] + raise TesseractConfigError(problem) elif 'error' in line.lower() or 'exception' in line.lower(): log.error(prefix + line.strip()) elif 'warning' in line.lower(): log.warning(prefix + line.strip()) - elif 'parameter not found: ' in line.lower(): - problem = line.split('found: ')[1] - raise TesseractConfigError(problem) elif 'read_params_file' in line.lower(): log.error(prefix + line.strip()) else: