From 53a7c0e66892acff8afeee41fd4c846738ca160f Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Thu, 17 Dec 2015 08:19:53 -0800 Subject: [PATCH] Refactor qpdf subprocess calls into module --- ocrmypdf/main.py | 24 ++---------------------- ocrmypdf/qpdf.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 ocrmypdf/qpdf.py diff --git a/ocrmypdf/main.py b/ocrmypdf/main.py index 199bb657..85a0a068 100755 --- a/ocrmypdf/main.py +++ b/ocrmypdf/main.py @@ -35,6 +35,7 @@ from .pageinfo import pdf_get_all_pageinfo from .pdfa import generate_pdfa_def from . import ghostscript from . import tesseract +from . import qpdf from . import ExitCode warnings.simplefilter('ignore', pypdf.utils.PdfReadWarning) @@ -352,29 +353,8 @@ def repair_pdf( log, pdfinfo, pdfinfo_lock): - args_qpdf = [ - 'qpdf', input_file, output_file - ] - try: - out = check_output(args_qpdf, stderr=STDOUT, universal_newlines=True) - except CalledProcessError as e: - exit_with_error = True - if e.returncode == 2: - print("{0}: not a valid PDF, and could not repair it.".format( - options.input_file)) - print("Details:") - print(e.output) - elif e.returncode == 3 and e.output.find("operation succeeded"): - exit_with_error = False - out = e.output - print(e.output) - else: - print(e.output) - if exit_with_error: - sys.exit(ExitCode.input_file) - - log.debug(out) + qpdf.repair(input_file, output_file, log) with pdfinfo_lock: pdfinfo.extend(pdf_get_all_pageinfo(output_file)) log.info(pdfinfo) diff --git a/ocrmypdf/qpdf.py b/ocrmypdf/qpdf.py new file mode 100644 index 00000000..481a1adf --- /dev/null +++ b/ocrmypdf/qpdf.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# © 2015 James R. Barlow: github.com/jbarlow83 + +from subprocess import CalledProcessError, check_output, STDOUT + +from . import ExitCode + + +def repair(input_file, output_file, log): + args_qpdf = [ + 'qpdf', input_file, output_file + ] + try: + check_output(args_qpdf, stderr=STDOUT, universal_newlines=True) + except CalledProcessError as e: + if e.returncode == 3 and e.output.find("operation succeeded"): + log.debug('qpdf found and fixed errors:') + log.debug(e.output) + print(e.output) + return + + if e.returncode == 2: + print("{0}: not a valid PDF, and could not repair it.".format( + input_file)) + print("Details:") + print(e.output) + sys.exit(ExitCode.input_file) + else: + print("{0}: unknown error".format( + input_file)) + print(e.output) + sys.exit(ExitCode.unknown) +