Files
OCRmyPDF/src/ocrmypdf/subprocess/__init__.py
James R. Barlow b9bffa97ba Fix ABCMeta typing
2022-07-12 02:20:22 -07:00

359 lines
12 KiB
Python

# © 2020 James R. Barlow: github.com/jbarlow83
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Wrappers to manage subprocess calls"""
import logging
import os
import re
import sys
from contextlib import suppress
from functools import lru_cache
from pathlib import Path
from subprocess import PIPE, STDOUT, CalledProcessError, CompletedProcess, Popen
from subprocess import run as subprocess_run
from typing import Callable, Mapping, Optional, Sequence, Tuple, Type, Union
from packaging.version import Version
from ocrmypdf.exceptions import MissingDependencyError
# pylint: disable=logging-format-interpolation
log = logging.getLogger(__name__)
Args = Sequence[Union[Path, str]]
OsEnviron = os._Environ # pylint: disable=protected-access
def run(
args: Args,
*,
env: Optional[OsEnviron] = None,
logs_errors_to_stdout: bool = False,
check: bool = False,
**kwargs,
) -> CompletedProcess:
"""Wrapper around :py:func:`subprocess.run`
The main purpose of this wrapper is to log subprocess output in an orderly
fashion that indentifies the responsible subprocess. An additional
task is that this function goes to greater lengths to find possible Windows
locations of our dependencies when they are not on the system PATH.
Arguments should be identical to ``subprocess.run``, except for following:
Arguments:
logs_errors_to_stdout: If True, indicates that the process writes its error
messages to stdout rather than stderr, so stdout should be logged
if there is an error. If False, stderr is logged. Could be used with
stderr=STDOUT, stdout=PIPE for example.
"""
args, env, process_log, _text = _fix_process_args(args, env, kwargs)
stderr = None
stderr_name = 'stderr' if not logs_errors_to_stdout else 'stdout'
try:
proc = subprocess_run(args, env=env, check=check, **kwargs)
except CalledProcessError as e:
stderr = getattr(e, stderr_name, None)
raise
else:
stderr = getattr(proc, stderr_name, None)
finally:
if process_log.isEnabledFor(logging.DEBUG) and stderr:
with suppress(AttributeError, UnicodeDecodeError):
stderr = stderr.decode('utf-8', 'replace')
if logs_errors_to_stdout:
process_log.debug("stdout/stderr = %s", stderr)
else:
process_log.debug("stderr = %s", stderr)
return proc
def run_polling_stderr(
args: Args,
*,
callback: Callable[[str], None],
check: bool = False,
env: Optional[OsEnviron] = None,
**kwargs,
) -> CompletedProcess:
"""Run a process like ``ocrmypdf.subprocess.run``, and poll stderr.
Every line of produced by stderr will be forwarded to the callback function.
The intended use is monitoring progress of subprocesses that output their
own progress indicators. In addition, each line will be logged if debug
logging is enabled.
Requires stderr to be opened in text mode for ease of handling errors. In
addition the expected encoding= and errors= arguments should be set. Note
that if stdout is already set up, it need not be binary.
"""
args, env, process_log, text = _fix_process_args(args, env, kwargs)
assert text, "Must use text=True"
with Popen(args, env=env, **kwargs) as proc:
lines = []
while proc.poll() is None:
if proc.stderr is None:
continue
for msg in iter(proc.stderr.readline, ''):
if process_log.isEnabledFor(logging.DEBUG):
process_log.debug(msg.strip())
callback(msg)
lines.append(msg)
stderr = ''.join(lines)
if check and proc.returncode != 0:
raise CalledProcessError(proc.returncode, args, output=None, stderr=stderr)
return CompletedProcess(args, proc.returncode, None, stderr=stderr)
def _fix_process_args(
args: Args, env: Optional[OsEnviron], kwargs
) -> Tuple[Args, OsEnviron, logging.Logger, bool]:
assert 'universal_newlines' not in kwargs, "Use text= instead of universal_newlines"
if not env:
env = os.environ
# Search in spoof path if necessary
program = str(args[0])
if os.name == 'nt':
# pylint: disable=import-outside-toplevel
from ocrmypdf.subprocess._windows import fix_windows_args
args = fix_windows_args(program, args, env)
log.debug("Running: %s", args)
process_log = log.getChild(os.path.basename(program))
text = bool(kwargs.get('text', False))
return args, env, process_log, text
@lru_cache(maxsize=None)
def get_version(
program: str,
*,
version_arg: str = '--version',
regex=r'(\d+(\.\d+)*)',
env: Optional[OsEnviron] = None,
) -> str:
"""Get the version of the specified program
Arguments:
program: The program to version check.
version_arg: The argument needed to ask for its version, e.g. ``--version``.
regex: A regular expression to parse the program's output and obtain the
version.
env: Custom ``os.environ`` in which to run program.
"""
args_prog = [program, version_arg]
try:
proc = run(
args_prog,
close_fds=True,
text=True,
stdout=PIPE,
stderr=STDOUT,
check=True,
env=env,
)
output: str = proc.stdout
except FileNotFoundError as e:
raise MissingDependencyError(
f"Could not find program '{program}' on the PATH"
) from e
except CalledProcessError as e:
if e.returncode != 0:
raise MissingDependencyError(
f"Ran program '{program}' but it exited with an error:\n{e.output}"
) from e
raise MissingDependencyError(
f"Could not find program '{program}' on the PATH"
) from e
match = re.match(regex, output.strip())
if not match:
raise MissingDependencyError(
f"The program '{program}' did not report its version. "
f"Message was:\n{output}"
)
version = match.group(1)
return version
MISSING_PROGRAM = '''
The program '{program}' could not be executed or was not found on your
system PATH.
'''
MISSING_OPTIONAL_PROGRAM = '''
The program '{program}' could not be executed or was not found on your
system PATH. This program is required when you use the
{required_for} arguments. You could try omitting these arguments, or install
the package.
'''
MISSING_RECOMMEND_PROGRAM = '''
The program '{program}' could not be executed or was not found on your
system PATH. This program is recommended when using the {required_for} arguments,
but not required, so we will proceed. For best results, install the program.
'''
OLD_VERSION = '''
OCRmyPDF requires '{program}' {need_version} or higher. Your system appears
to have {found_version}. Please update this program.
'''
OLD_VERSION_REQUIRED_FOR = '''
OCRmyPDF requires '{program}' {need_version} or higher when run with the
{required_for} arguments. If you omit these arguments, OCRmyPDF may be able to
proceed. For best results, install the program.
'''
OSX_INSTALL_ADVICE = '''
If you have homebrew installed, try these command to install the missing
package:
brew install {package}
'''
LINUX_INSTALL_ADVICE = '''
On systems with the aptitude package manager (Debian, Ubuntu), try these
commands:
sudo apt-get update
sudo apt-get install {package}
On RPM-based systems (Red Hat, Fedora), search for instructions on
installing the RPM for {program}.
'''
WINDOWS_INSTALL_ADVICE = '''
If not already installed, install the Chocolatey package manager. Then use
a command prompt to install the missing package:
choco install {package}
'''
def _get_platform() -> str:
if sys.platform.startswith('freebsd'):
return 'freebsd'
elif sys.platform.startswith('linux'):
return 'linux'
elif sys.platform.startswith('win'):
return 'windows'
return sys.platform
def _error_trailer(
program: str, package: Union[str, Mapping[str, str]], **kwargs
) -> None:
del kwargs
if isinstance(package, Mapping):
package = package.get(_get_platform(), program)
if _get_platform() == 'darwin':
log.info(OSX_INSTALL_ADVICE.format(**locals()))
elif _get_platform() == 'linux':
log.info(LINUX_INSTALL_ADVICE.format(**locals()))
elif _get_platform() == 'windows':
log.info(WINDOWS_INSTALL_ADVICE.format(**locals()))
def _error_missing_program(
program: str, package: str, required_for: Optional[str], recommended: bool
) -> None:
# pylint: disable=unused-argument
if recommended:
log.warning(MISSING_RECOMMEND_PROGRAM.format(**locals()))
elif required_for:
log.error(MISSING_OPTIONAL_PROGRAM.format(**locals()))
else:
log.error(MISSING_PROGRAM.format(**locals()))
_error_trailer(**locals())
def _error_old_version(
program: str,
package: str,
need_version: str,
found_version: str,
required_for: Optional[str],
) -> None:
# pylint: disable=unused-argument
if required_for:
log.error(OLD_VERSION_REQUIRED_FOR.format(**locals()))
else:
log.error(OLD_VERSION.format(**locals()))
_error_trailer(**locals())
def _remove_leading_v(s: str) -> str:
if sys.version_info >= (3, 9):
return s.removeprefix('v')
if s.startswith('v'):
return s[1:]
return s
def check_external_program(
*,
program: str,
package: str,
version_checker: Callable[[], str],
need_version: str,
required_for: Optional[str] = None,
recommended: bool = False,
version_parser: Type[Version] = Version,
) -> None:
"""Check for required version of external program and raise exception if not.
Args:
program: The name of the program to test.
package: The name of a software package that typically supplies this program.
Usually the same as program.
version_check: A callable without arguments that retrieves the installed
version of program.
need_version: The minimum required version.
required_for: The name of an argument of feature that requires this program.
recommended: If this external program is recommended, instead of raising
an exception, log a warning and allow execution to continue.
version_parser: A class that should be used to parse and compare version
numbers. Used when version numbers do not follow standard conventions.
"""
try:
if callable(version_checker):
found_version = version_checker()
else: # deprecated
found_version = version_checker
except (CalledProcessError, FileNotFoundError) as e:
_error_missing_program(program, package, required_for, recommended)
if not recommended:
raise MissingDependencyError(program) from e
return
except MissingDependencyError:
_error_missing_program(program, package, required_for, recommended)
if not recommended:
raise
return
found_version = _remove_leading_v(found_version)
need_version = _remove_leading_v(need_version)
if found_version and version_parser(found_version) < version_parser(need_version):
_error_old_version(program, package, need_version, found_version, required_for)
if not recommended:
raise MissingDependencyError(program)
log.debug('Found %s %s', program, found_version)