diff --git a/ocrmypdf/exec/__init__.py b/ocrmypdf/exec/__init__.py index 9959e87a..2a5c63af 100644 --- a/ocrmypdf/exec/__init__.py +++ b/ocrmypdf/exec/__init__.py @@ -4,9 +4,40 @@ """Wrappers to manage subprocess calls""" import os +import re +import sys +from subprocess import run, STDOUT, PIPE, CalledProcessError +from ..exceptions import MissingDependencyError def get_program(name): "Check environment variables for overrides to this program" envvar = 'OCRMYPDF_' + name.upper() return os.environ.get(envvar, name) + + +def get_version(program, *, + version_arg='--version', regex=r'(\d+(\.\d+)*)'): + "Get the version of the specified program, " + args_prog = [ + get_program(program), + version_arg + ] + try: + proc = run( + args_prog, close_fds=True, universal_newlines=True, + stdout=PIPE, stderr=STDOUT, check=True) + output = proc.stdout + except CalledProcessError as e: + raise MissingDependencyError( + "Could not find program '{}' on the PATH".format(program)) from e + + try: + version = re.match(regex, output.strip()).group(1) + except AttributeError as e: + raise MissingDependencyError( + ("The program '{}' did not report its version. " + "Message was:\n{}").format(program, output) + ) + + return version \ No newline at end of file diff --git a/ocrmypdf/exec/ghostscript.py b/ocrmypdf/exec/ghostscript.py index 5035abc9..b94c56fb 100644 --- a/ocrmypdf/exec/ghostscript.py +++ b/ocrmypdf/exec/ghostscript.py @@ -8,28 +8,14 @@ from functools import lru_cache import re import sys from PIL import Image -from . import get_program +from . import get_program, get_version from ..exceptions import SubprocessOutputError, MissingDependencyError from ..helpers import fspath @lru_cache(maxsize=1) def version(): - args_gs = [ - get_program('gs'), - '--version' - ] - try: - proc = run( - args_gs, close_fds=True, universal_newlines=True, - stdout=PIPE, stderr=STDOUT, check=True) - ver = proc.stdout - except CalledProcessError as e: - print("Could not find Ghostscript executable on system PATH.", - file=sys.stderr) - raise MissingDependencyError from e - - return ver.strip() + return get_version('gs') def _gs_error_reported(stream): diff --git a/ocrmypdf/exec/qpdf.py b/ocrmypdf/exec/qpdf.py index d989a7bf..d05d63e2 100644 --- a/ocrmypdf/exec/qpdf.py +++ b/ocrmypdf/exec/qpdf.py @@ -9,25 +9,12 @@ import re from ..exceptions import InputFileError, SubprocessOutputError, \ MissingDependencyError, EncryptedPdfError -from . import get_program +from . import get_program, get_version @lru_cache(maxsize=1) def version(): - args_qpdf = [ - get_program('qpdf'), - '--version' - ] - try: - p = run(args_qpdf, universal_newlines=True, stderr=STDOUT, - stdout=PIPE) - except CalledProcessError as e: - print("Could not find qpdf executable on system PATH.", - file=sys.stderr) - raise MissingDependencyError() from e - - qpdf_version = re.match(r'qpdf version (.+)', p.stdout).group(1) - return qpdf_version + return get_version('qpdf', regex=r'qpdf version (.+)') def check(input_file, log=None): diff --git a/ocrmypdf/exec/tesseract.py b/ocrmypdf/exec/tesseract.py index b488b22d..352046e1 100644 --- a/ocrmypdf/exec/tesseract.py +++ b/ocrmypdf/exec/tesseract.py @@ -14,7 +14,7 @@ from subprocess import PIPE, CalledProcessError, \ from ..exceptions import MissingDependencyError, TesseractConfigError from ..helpers import page_number -from . import get_program +from . import get_program, get_version OrientationConfidence = namedtuple( 'OrientationConfidence', @@ -40,21 +40,7 @@ HOCR_TEMPLATE = """ @lru_cache(maxsize=1) def version(): - args_tess = [ - get_program('tesseract'), - '--version' - ] - try: - versions = check_output( - args_tess, close_fds=True, universal_newlines=True, - stderr=STDOUT) - except CalledProcessError as e: - print("Could not find Tesseract executable on system PATH.", - file=sys.stderr) - raise MissingDependencyError from e - - tesseract_version = re.match(r'tesseract\s(.+)', versions).group(1) - return tesseract_version + return get_version('tesseract', regex=r'tesseract\s(.+)') def v4(): diff --git a/ocrmypdf/exec/unpaper.py b/ocrmypdf/exec/unpaper.py index e1ceade8..eea9099a 100644 --- a/ocrmypdf/exec/unpaper.py +++ b/ocrmypdf/exec/unpaper.py @@ -9,19 +9,7 @@ import sys import os from functools import lru_cache from ..exceptions import MissingDependencyError -from . import get_program - - -@lru_cache(maxsize=1) -def version(): - args_unpaper = [ - get_program('unpaper'), - '--version' - ] - ver = check_output( - args_unpaper, close_fds=True, universal_newlines=True, - stderr=STDOUT, timeout=5) - return ver.strip() +from . import get_program, get_version try: @@ -31,6 +19,11 @@ except ImportError: raise +@lru_cache(maxsize=1) +def version(): + return get_version('unpaper') + + def run(input_file, output_file, dpi, log, mode_args): args_unpaper = [ get_program('unpaper'),