Fix suppression of tesseract config error messages

This commit is contained in:
James R. Barlow
2018-10-03 17:39:50 -07:00
parent baddd6d233
commit fb8b161f6c
2 changed files with 13 additions and 9 deletions

View File

@@ -624,6 +624,11 @@ def do_ruffus_exception(ruffus_five_tuple, options, log):
# rather than a str. So reach into the object and get its name.
exc_name = exc_name.__name__
if exc_name.startswith('ocrmypdf.exceptions.'):
base_exc_name = exc_name.replace('ocrmypdf.exceptions.', '')
exc_class = getattr(ocrmypdf_exceptions, base_exc_name)
exit_code = getattr(exc_class, 'exit_code', ExitCode.other_error)
if exc_name in ('builtins.SystemExit', 'SystemExit'):
match = re.search(r"\.(.+?)\)", exc_value)
exit_code_name = match.groups()[0]
@@ -652,7 +657,6 @@ def do_ruffus_exception(ruffus_five_tuple, options, log):
qpdf --decrypt [--password=[password]] infilename
"""))
exit_code = ExitCode.encrypted_pdf
elif exc_name == 'ocrmypdf.exceptions.PdfMergeFailedError':
log.error(textwrap.dedent("""\
Failed to merge PDF image layer with OCR layer
@@ -663,11 +667,10 @@ def do_ruffus_exception(ruffus_five_tuple, options, log):
Try using
ocrmypdf --pdf-renderer sandwich [..other args..]
"""))
exit_code = ExitCode.input_file
elif exc_name.startswith('ocrmypdf.exceptions.'):
base_exc_name = exc_name.replace('ocrmypdf.exceptions.', '')
exc_class = getattr(ocrmypdf_exceptions, base_exc_name)
exit_code = exc_class.exit_code
elif exc_name == 'ocrmypdf.exceptions.TesseractConfigError':
log.error(textwrap.dedent("""\
Error occurred while parsing a tesseract configuration file
"""))
elif exc_name == 'PIL.Image.DecompressionBombError':
msg = cleanup_ruffus_error_message(exc_value)
msg += ("\nUse the --max-image-mpixels argument to set increase the "

View File

@@ -190,13 +190,14 @@ def tesseract_log_output(log, stdout, input_file):
pass # Appears to be spurious/problem with nonwhite borders
elif 'Error in boxClipToRectangle' in line:
pass # Always appears with pixScanForForeground message
elif 'parameter not found: ' in line.lower():
log.error(prefix + line.strip())
problem = line.split('found: ')[1]
raise TesseractConfigError(problem)
elif 'error' in line.lower() or 'exception' in line.lower():
log.error(prefix + line.strip())
elif 'warning' in line.lower():
log.warning(prefix + line.strip())
elif 'parameter not found: ' in line.lower():
problem = line.split('found: ')[1]
raise TesseractConfigError(problem)
elif 'read_params_file' in line.lower():
log.error(prefix + line.strip())
else: