Configure pylint in pyproject and delint

2026-05-04 04:35:49 -04:00 · 2022-06-11 01:15:30 -07:00
parent d640c2ded3
commit b17fb61389
33 changed files with 323 additions and 157 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,3 +99,8 @@ module = [
  'libxmp.utils'
 ]
 ignore_missing_imports = true
+
+[tool.pylint.basic]
+good-names = ["i", "j", "k", "ex", "Run", "_", "e", "p", "im", "w", "h", "m", "x", "y", "a", "b", "fp", "n", "f", "s", "v", "q", "dx", "dy"]
+logging-format-style = "old"
+disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-symbolic-message-instead", "logging-fstring-interpolation", "missing-function-docstring", "too-few-public-methods"]
--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""setup.py to support older setuptools and pip."""

 from setuptools import setup

--- a/src/ocrmypdf/init.py
+++ b/src/ocrmypdf/init.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Adds OCR layer to PDFs."""

 from pluggy import HookimplMarker as _HookimplMarker

--- a/src/ocrmypdf/main.py
+++ b/src/ocrmypdf/main.py
@@ -5,6 +5,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""ocrmypdf command line entrypoint."""

 import logging
 import os
--- a/src/ocrmypdf/_concurrent.py
+++ b/src/ocrmypdf/_concurrent.py
@@ -4,6 +4,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""OCRmyPDF concurrency abstractions."""
+
 import threading
 from abc import ABC, abstractmethod
 from typing import Callable, Iterable, Optional
@@ -14,6 +16,8 @@ def _task_noop(*_args, **_kwargs):


 class NullProgressBar:
+    """Progress bar API that takes no actions."""
+
    def __init__(self, **kwargs):
        pass

@@ -28,6 +32,8 @@ class NullProgressBar:


 class Executor(ABC):
+    """Abstract concurrent executor."""
+
    pool_lock = threading.Lock()
    pbar_class = NullProgressBar

--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -14,13 +14,12 @@ import sys
 from io import BytesIO
 from os import fspath
 from pathlib import Path
-from shutil import which
 from subprocess import PIPE, CalledProcessError
 from typing import Optional

 from PIL import Image, UnidentifiedImageError

-from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError
+from ocrmypdf.exceptions import SubprocessOutputError
 from ocrmypdf.helpers import Resolution
 from ocrmypdf.subprocess import get_version, run, run_polling_stderr

@@ -33,29 +32,18 @@ except AttributeError:

 log = logging.getLogger(__name__)

-missing_gs_error = """
---------------------------------------------------------------------
-This error normally occurs when ocrmypdf find can't Ghostscript.
-Please ensure Ghostscript is installed and its location is added to
-the system PATH environment variable.
-
-For details see:
-    https://ocrmypdf.readthedocs.io/en/latest/installation.html
---------------------------------------------------------------------
-"""
-
 # Most reliable what to get the bitness of Python interpreter, according to Python docs
-_is_64bit = sys.maxsize > 2**32
+_IS_64BIT = sys.maxsize > 2**32

-_gswin = None
+_GSWIN = None
 if os.name == 'nt':
-    if _is_64bit:
-        _gswin = 'gswin64c'
+    if _IS_64BIT:
+        _GSWIN = 'gswin64c'
    else:
-        _gswin = 'gswin32c'
+        _GSWIN = 'gswin32c'

-GS = _gswin if _gswin else 'gs'
-del _gswin
+GS = _GSWIN if _GSWIN else 'gs'
+del _GSWIN


 def version():
@@ -126,7 +114,7 @@ def rasterize_pdf(
        p = run(args_gs, stdout=PIPE, stderr=PIPE, check=True)
    except CalledProcessError as e:
        log.error(e.stderr.decode(errors='replace'))
-        raise SubprocessOutputError('Ghostscript rasterizing failed')
+        raise SubprocessOutputError('Ghostscript rasterizing failed') from e
    else:
        stderr = p.stderr.decode(errors='replace')
        if _gs_error_reported(stderr):
@@ -156,6 +144,8 @@ def rasterize_pdf(


 class GhostscriptFollower:
+    """Parses the output of Ghostscript and uses it to update the progress bar."""
+
    re_process = re.compile(r"Processing pages \d+ through (\d+).")
    re_page = re.compile(r"Page (\d+)")

--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@@ -13,7 +13,7 @@ from math import pi
 from os import fspath
 from pathlib import Path
 from subprocess import PIPE, STDOUT, CalledProcessError, TimeoutExpired
-from typing import Dict, Iterator, List, Optional
+from typing import Dict, List, Optional

 from packaging.version import Version
 from PIL import Image
@@ -55,6 +55,8 @@ TESSERACT_THRESHOLDING_METHODS: Dict[str, int] = {


 class TesseractLoggerAdapter(logging.LoggerAdapter):
+    "Prepend [tesseract] to messages emitted from tesseract"
+
    def process(self, msg, kwargs):
        kwargs['extra'] = self.extra
        return f'[tesseract] {msg}', kwargs
@@ -105,6 +107,7 @@ TESSERACT_VERSION_PATTERN = r"""


 class TesseractVersion(Version):
+    "Modify standard packaging.Version regex to support Tesseract idiosyncracies."
    _regex = re.compile(
        r"^\s*" + TESSERACT_VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE
    )
@@ -169,14 +172,14 @@ def tess_base_args(langs: List[str], engine_mode: Optional[int]) -> List[str]:


 def _parse_tesseract_output(binary_output: bytes) -> Dict[str, str]:
-    def g():
+    def gen():
        for line in binary_output.decode().splitlines():
            line = line.strip()
            parts = line.split(':', maxsplit=2)
            if len(parts) == 2:
                yield parts[0].strip(), parts[1].strip()

-    return {k: v for k, v in g()}
+    return dict(gen())


 def get_orientation(
@@ -205,10 +208,10 @@ def get_orientation(

    osd = _parse_tesseract_output(p.stdout)
    angle = int(osd.get('Orientation in degrees', 0))
-    oc = OrientationConfidence(
+    orient_conf = OrientationConfidence(
        angle=angle, confidence=float(osd.get('Orientation confidence', 0))
    )
-    return oc
+    return orient_conf


 def get_deskew(
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -30,12 +30,16 @@ if sys.version_info >= (3, 10):
 else:
    from tempfile import TemporaryDirectory as _TemporaryDirectory

-    # Consume the ignore_cleanup_errors kwarg in Python 3.9 and older, without acting
-    # on this keyword. Users who need this issue full resolved should upgrade to Python
-    # 3.10.
-    # See: https://github.com/python/cpython/pull/24793
-
    class TemporaryDirectory(_TemporaryDirectory):
+        """Shim to consume ignore_cleanup_errors kwarg on Python 3.9 and older.
+
+        The argument is consumed without action. If users are getting errors related
+        to temporary file cleanup, they should upgrade to Python 3.10 which properly
+        cleans up temporary directories on Windows.
+
+        See: https://github.com/python/cpython/pull/24793
+        """
+
        def __init__(self, ignore_cleanup_errors=False, **kwargs):
            super().__init__(**kwargs)

@@ -50,6 +54,8 @@ log = logging.getLogger(__name__)


 class UnpaperImageTooLargeError(Exception):
+    """To capture details when an image is too large for unpaper."""
+
    def __init__(
        self,
        w,
@@ -66,8 +72,10 @@ def version() -> str:
    return get_version('unpaper')


+SUFFIXES = {'1': '.pbm', 'L': '.pgm', 'RGB': '.ppm'}
+
+
 def _convert_image(im: Image.Image) -> Tuple[Image.Image, bool, str]:
-    SUFFIXES = {'1': '.pbm', 'L': '.pgm', 'RGB': '.ppm'}
    im_modified = False

    if im.mode not in SUFFIXES:
--- a/src/ocrmypdf/_graft.py
+++ b/src/ocrmypdf/_graft.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""For grafting text-only PDF pages onto freeform PDF pages."""

 import logging
 import uuid
@@ -16,7 +17,6 @@ from pikepdf import (
    Name,
    Object,
    Operator,
-    Page,
    Pdf,
    PdfError,
    PdfMatrix,
@@ -81,6 +81,8 @@ def strip_invisible_text(pdf, page):


 class OcrGrafter:
+    """Manages grafting text-only PDFs onto regular PDFs."""
+
    def __init__(self, context):
        self.context = context
        self.path_base = context.origin
@@ -236,6 +238,8 @@ class OcrGrafter:
    ):
        """Insert the text layer from text page 0 on to pdf_base at page_num"""

+        # pylint: disable=invalid-name
+
        log.debug("Grafting")
        if Path(textpdf).stat().st_size == 0:
            return
--- a/src/ocrmypdf/_jobcontext.py
+++ b/src/ocrmypdf/_jobcontext.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Defines context objects that are passed to child processes/threads."""

 import os
 import shutil
--- a/src/ocrmypdf/_logging.py
+++ b/src/ocrmypdf/_logging.py
@@ -4,15 +4,17 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Logging support classes."""

 import logging
-import sys
 from contextlib import suppress

 from tqdm import tqdm


 class PageNumberFilter(logging.Filter):
+    """Insert PDF page number that emitted log message to log record."""
+
    def filter(self, record):
        pageno = getattr(record, 'pageno', None)
        if isinstance(pageno, int):
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""OCRmyPDF page processing pipeline functions."""

 import logging
 import os
@@ -332,7 +333,8 @@ def is_ocr_required(page_context: PageContext):
            ocr_required = False
            log.warning(
                "page too big, skipping OCR "
-                f"({(pixel_count / 1_000_000):.1f} MPixels > {options.skip_big:.1f} MPixels --skip-big)"
+                f"({(pixel_count / 1_000_000):.1f} MPixels > "
+                f"{options.skip_big:.1f} MPixels --skip-big)"
            )
    return ocr_required

@@ -430,8 +432,8 @@ def rasterize(
    output_file = page_context.get_path(f'rasterize{output_tag}.png')
    pageinfo = page_context.pageinfo

-    def at_least(cs):
-        return max(device_idx, colorspaces.index(cs))
+    def at_least(colorspace):
+        return max(device_idx, colorspaces.index(colorspace))

    for image in pageinfo.images:
        if image.type_ != 'image':
@@ -471,10 +473,10 @@ def rasterize(

 def preprocess_remove_background(input_file: Path, page_context: PageContext):
    if any(image.bpc > 1 for image in page_context.pageinfo.images):
-        output_file = page_context.get_path('pp_rm_bg.png')
-        # leptonica.remove_background(input_file, output_file)
        raise NotImplementedError("--remove-background is temporarily not implemented")
-        return output_file
+        # output_file = page_context.get_path('pp_rm_bg.png')
+        # leptonica.remove_background(input_file, output_file)
+        # return output_file
    else:
        log.info("background removal skipped on mono page")
        return input_file
@@ -858,8 +860,8 @@ def enumerate_compress_ranges(iterable):
 def merge_sidecars(txt_files: Iterable[Optional[Path]], context: PdfContext):
    output_file = context.get_path('sidecar.txt')
    with open(output_file, 'w', encoding="utf-8") as stream:
-        for (frm, to), txt_file in enumerate_compress_ranges(txt_files):
-            if frm != 1:
+        for (from_, to_), txt_file in enumerate_compress_ranges(txt_files):
+            if from_ != 1:
                stream.write('\f')  # Form feed between pages
            if txt_file:
                with open(txt_file, encoding="utf-8") as in_:
@@ -872,10 +874,10 @@ def merge_sidecars(txt_files: Iterable[Optional[Path]], context: PdfContext):
                    else:
                        stream.write(txt)
            else:
-                if frm != to:
-                    pages = f'{frm}-{to}'
+                if from_ != to_:
+                    pages = f'{from_}-{to_}'
                else:
-                    pages = f'{frm}'
+                    pages = f'{from_}'
                stream.write(f'[OCR skipped on page(s) {pages}]')
    return output_file

--- a/src/ocrmypdf/_plugin_manager.py
+++ b/src/ocrmypdf/_plugin_manager.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Plugin manager using pluggy."""

 import argparse
 import importlib
@@ -101,12 +102,11 @@ class OcrmypdfPluginManager(pluggy.PluginManager):


 def get_plugin_manager(plugins: List[Union[str, Path]], builtins=True):
-    pm = OcrmypdfPluginManager(
+    return OcrmypdfPluginManager(
        project_name='ocrmypdf',
        plugins=plugins,
        builtins=builtins,
    )
-    return pm


 def get_parser_options_plugins(
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@@ -4,6 +4,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Implements the concurrent and page synchronous parts of the pipeline."""
+

 import argparse
 import logging
@@ -68,7 +70,9 @@ from ocrmypdf.pdfa import file_claims_pdfa
 log = logging.getLogger(__name__)


-class PageResult(NamedTuple):  # pylint: disable=inherit-non-class
+class PageResult(NamedTuple):
+    """Result when a page is finished processing."""
+
    pageno: int
    pdf_page_from_image: Optional[Path]
    ocr: Optional[Path]
@@ -425,7 +429,7 @@ def run_pipeline(
        else:
            log.error(type(e).__name__)
        return e.exit_code
-    except (PIL.Image.DecompressionBombError if not api else NeverRaise) as e:
+    except (PIL.Image.DecompressionBombError if not api else NeverRaise):
        log.exception(
            "A decompression bomb error was encountered while executing the "
            "pipeline. Use the argument --max-image-mpixels to raise the maximum "
@@ -435,7 +439,7 @@ def run_pipeline(
    except (
        BrokenProcessPool if not api else NeverRaise,
        BrokenThreadPool if not api else NeverRaise,
-    ) as e:
+    ):
        log.exception(
            "A worker process was terminated unexpectedly. This is known to occur if "
            "processing your file takes all available swap space and RAM. It may "
--- a/src/ocrmypdf/_validation.py
+++ b/src/ocrmypdf/_validation.py
@@ -5,6 +5,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Validate a work order from API or command line."""

 import locale
 import logging
@@ -25,7 +26,7 @@ from ocrmypdf.exceptions import (
    MissingDependencyError,
    OutputFileAccessError,
 )
-from ocrmypdf.helpers import is_file_writable, monotonic, safe_symlink, samefile
+from ocrmypdf.helpers import is_file_writable, monotonic, safe_symlink
 from ocrmypdf.hocrtransform import HOCR_OK_LANGS
 from ocrmypdf.subprocess import check_external_program

@@ -146,13 +147,13 @@ def check_options_preprocessing(options):
 def _pages_from_ranges(ranges: str) -> Set[int]:
    pages: List[int] = []
    page_groups = ranges.replace(' ', '').split(',')
-    for g in page_groups:
-        if not g:
+    for group in page_groups:
+        if not group:
            continue
        try:
-            start, end = g.split('-')
+            start, end = group.split('-')
        except ValueError:
-            pages.append(int(g) - 1)
+            pages.append(int(group) - 1)
        else:
            try:
                new_pages = list(range(int(start) - 1, int(end)))
@@ -162,7 +163,7 @@ def _pages_from_ranges(ranges: str) -> Set[int]:
                    ) from None
                pages.extend(new_pages)
            except ValueError:
-                raise BadArgsError(f"invalid page subrange '{g}'") from None
+                raise BadArgsError(f"invalid page subrange '{group}'") from None

    if not pages:
        raise BadArgsError(
@@ -237,13 +238,13 @@ def check_options_advanced(options):
 def check_options_metadata(options):
    docinfo = [options.title, options.author, options.keywords, options.subject]
    for s in (m for m in docinfo if m):
-        for c in s:
-            if unicodedata.category(c) == 'Co' or ord(c) >= 0x10000:
+        for char in s:
+            if unicodedata.category(char) == 'Co' or ord(char) >= 0x10000:
+                hexchar = hex(ord(char))[2:].upper()
                raise ValueError(
                    "One of the metadata strings contains "
-                    "an unsupported Unicode character: '{}' (U+{})".format(
-                        c, hex(ord(c))[2:].upper()
-                    )
+                    "an unsupported Unicode character: "
+                    f"{char} (U+{hexchar})"
                )


@@ -293,7 +294,7 @@ def create_input_file(options, work_folder: Path) -> Tuple[Path, str]:
            target = work_folder / 'origin'
            safe_symlink(options.input_file, target)
            return target, os.fspath(options.input_file)
-        except FileNotFoundError:
+        except FileNotFoundError as e:
            msg = f"File not found - {options.input_file}"
            if Path('/.dockerenv').exists():  # pragma: no cover
                msg += (
@@ -304,7 +305,7 @@ def create_input_file(options, work_folder: Path) -> Tuple[Path, str]:
                    "\n"
                    "\tdocker run -i --rm jbarlow83/ocrmypdf - - <input.pdf >output.pdf\n"
                )
-            raise InputFileError(msg)
+            raise InputFileError(msg) from e


 def check_requested_output_file(options):
@@ -324,7 +325,9 @@ def check_requested_output_file(options):
        )


-def report_output_file_size(options, input_file, output_file):
+def report_output_file_size(
+    options, input_file, output_file, file_overhead=4000, page_overhead=3000
+):
    try:
        output_size = Path(output_file).stat().st_size
        input_size = Path(input_file).stat().st_size
@@ -333,9 +336,7 @@ def report_output_file_size(options, input_file, output_file):
    with pikepdf.open(output_file) as p:
        # Overhead constants obtained by estimating amount of data added by OCR
        # PDF/A conversion, and possible XMP metadata addition, with compression
-        FILE_OVERHEAD = 4000
-        OCR_PER_PAGE_OVERHEAD = 3000
-        reasonable_overhead = FILE_OVERHEAD + OCR_PER_PAGE_OVERHEAD * len(p.pages)
+        reasonable_overhead = file_overhead + page_overhead * len(p.pages)
    ratio = output_size / input_size
    reasonable_ratio = output_size / (input_size + reasonable_overhead)
    if reasonable_ratio < 1.35 or input_size < 25000:
--- a/src/ocrmypdf/_version.py
+++ b/src/ocrmypdf/_version.py
@@ -4,6 +4,10 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Get version by introspecting package information.
+
+OCRmyPDF uses setuptools_scm to derive version from git tags.
+"""

 try:
    from importlib_metadata import version as _package_version
--- a/src/ocrmypdf/builtin_plugins/concurrency.py
+++ b/src/ocrmypdf/builtin_plugins/concurrency.py
@@ -11,6 +11,8 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.


+"""OCRmyPDF's multiprocessing/multithreading abstraction layer."""
+
 import logging
 import logging.handlers
 import multiprocessing
@@ -21,7 +23,6 @@ import sys
 import threading
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 from contextlib import suppress
-from multiprocessing.pool import Pool, ThreadPool
 from typing import Callable, Iterable, Type, Union

 from tqdm import tqdm
@@ -44,7 +45,8 @@ def log_listener(q: Queue):
    should actually write to sys.stderr or whatever we're using, so if this is
    made into a process the main application needs to be directed to it.

-    See https://docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
+    See:
+    https://docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
    """

    while True:
@@ -89,6 +91,8 @@ def process_init(q: Queue, user_init: UserInit, loglevel) -> None:


 def thread_init(q: Queue, user_init: UserInit, loglevel) -> None:
+    del q  # unused but required argument
+    del loglevel  # unused but required argument
    # As a thread, block SIGBUS so the main thread deals with it...
    with suppress(AttributeError):
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGBUS})
@@ -98,6 +102,8 @@ def thread_init(q: Queue, user_init: UserInit, loglevel) -> None:


 class StandardExecutor(Executor):
+    """Standard OCRmyPDF concurrent task executor."""
+
    def _execute(
        self,
        *,
--- a/src/ocrmypdf/builtin_plugins/default_filters.py
+++ b/src/ocrmypdf/builtin_plugins/default_filters.py
@@ -4,6 +4,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""OCRmyPDF automatically installs these filters as plugins."""
+
 from ocrmypdf import hookimpl


--- a/src/ocrmypdf/builtin_plugins/ghostscript.py
+++ b/src/ocrmypdf/builtin_plugins/ghostscript.py
@@ -5,6 +5,8 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.


+"""Built-in plugin to implement PDF page rasterization and PDF/A production."""
+
 import logging

 from ocrmypdf import hookimpl
--- a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
+++ b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
@@ -4,6 +4,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Built-in plugin to implement OCR using Tesseract."""
+

 import logging
 import os
@@ -138,6 +140,8 @@ def validate(pdfinfo, options):


 class TesseractOcrEngine(OcrEngine):
+    """Implements OCR with Tesseract."""
+
    @staticmethod
    def version():
        return tesseract.version()
--- a/src/ocrmypdf/cli.py
+++ b/src/ocrmypdf/cli.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Command line interface customization and validation."""

 import argparse
 from typing import Any, Callable, Mapping, Optional, TypeVar
@@ -42,7 +43,7 @@ def str_to_int(mapping: Mapping[str, int]):
        except KeyError:
            raise argparse.ArgumentTypeError(
                f"{s!r} must be one of: {', '.join(mapping.keys())}"
-            )
+            ) from None

    return _str_to_int

@@ -51,6 +52,11 @@ class ArgumentParser(argparse.ArgumentParser):
    """Override parser's default behavior of calling sys.exit()

    https://stackoverflow.com/questions/5943249/python-argparse-and-controlling-overriding-the-exit-status-code
+
+    OCRmyPDF began as a CLI but eventually acquired an API. The API works inside out,
+    by synthesizing a command line argument. So we subclass the standard parser with
+    one that doesn't call sys.exit(). Obviously this is not the ideal way to do things
+    but it works for us.
    """

    def __init__(self, *args, **kwargs):
@@ -65,6 +71,8 @@ class ArgumentParser(argparse.ArgumentParser):


 class LanguageSetAction(argparse.Action):
+    """Manages a list of languages."""
+
    def __init__(self, option_strings, dest, default=None, **kwargs):
        if default is None:
            default = set()
--- a/src/ocrmypdf/exceptions.py
+++ b/src/ocrmypdf/exceptions.py
@@ -4,12 +4,16 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""OCRmyPDF's exceptions."""

 from enum import IntEnum
 from textwrap import dedent


 class ExitCode(IntEnum):
+    """OCRmyPDF's exit codes."""
+
+    # pylint: disable=invalid-name
    ok = 0
    bad_args = 1
    input_file = 2
@@ -26,6 +30,8 @@ class ExitCode(IntEnum):


 class ExitCodeException(Exception):
+    """An exception which should return an exit code with sys.exit()."""
+
    exit_code = ExitCode.other_error
    message = ""

@@ -37,17 +43,24 @@ class ExitCodeException(Exception):


 class BadArgsError(ExitCodeException):
+    """Invalid arguments on the command line or API."""
+
    exit_code = ExitCode.bad_args


-class PdfMergeFailedError(ExitCodeException):
+class PdfMergeFailedError(ExitCodeException):  # deprecated
+    """An intermediate PDF can't be merged.
+
+    No longer in use.
+    """
+
    exit_code = ExitCode.input_file
    message = dedent(
        '''\
        Failed to merge PDF image layer with OCR layer

        Usually this happens because the input PDF file is malformed and
-        ocrmypdf cannot automatically correct the problem on its own.
+        ocrmypdf cannot correct the problem on its own.

        Try using
            ocrmypdf --pdf-renderer sandwich  [..other args..]
@@ -56,34 +69,50 @@ class PdfMergeFailedError(ExitCodeException):


 class MissingDependencyError(ExitCodeException):
+    """A third-party dependency is missing."""
+
    exit_code = ExitCode.missing_dependency


 class UnsupportedImageFormatError(ExitCodeException):
+    """The image format is not supported."""
+
    exit_code = ExitCode.input_file


 class DpiError(ExitCodeException):
+    """Missing information about input image DPI."""
+
    exit_code = ExitCode.input_file


 class OutputFileAccessError(ExitCodeException):
+    """Cannot access the intended output file path."""
+
    exit_code = ExitCode.file_access_error


 class PriorOcrFoundError(ExitCodeException):
+    """This file already has OCR."""
+
    exit_code = ExitCode.already_done_ocr


 class InputFileError(ExitCodeException):
+    """Something is wrong with the input file."""
+
    exit_code = ExitCode.input_file


 class SubprocessOutputError(ExitCodeException):
+    """A subprocess returned an unexpected error."""
+
    exit_code = ExitCode.child_process_error


 class EncryptedPdfError(ExitCodeException):
+    """Input PDF is encrypted."""
+
    exit_code = ExitCode.encrypted_pdf
    message = dedent(
        '''\
@@ -100,5 +129,7 @@ class EncryptedPdfError(ExitCodeException):


 class TesseractConfigError(ExitCodeException):
+    """Tesseract config can't be parsed."""
+
    exit_code = ExitCode.invalid_config
    message = "Error occurred while parsing a Tesseract configuration file"
--- a/src/ocrmypdf/extra_plugins/semfree.py
+++ b/src/ocrmypdf/extra_plugins/semfree.py
@@ -37,9 +37,11 @@ from ocrmypdf.helpers import remove_all_log_handlers


 class MessageType(Enum):
-    exception = auto()
-    result = auto()
-    complete = auto()
+    """Implement basic IPC messaging."""
+
+    exception = auto()  # pylint: disable=invalid-name
+    result = auto()  # pylint: disable=invalid-name
+    complete = auto()  # pylint: disable=invalid-name


 def split_every(n: int, iterable: Iterable) -> Iterator:
@@ -59,6 +61,8 @@ def process_sigbus(*args):


 class ConnectionLogHandler(logging.handlers.QueueHandler):
+    """Handler used by child processes to forward log messages to parent."""
+
    def __init__(self, conn: Connection) -> None:
        # sets the parent's queue to None - parent only touches queue
        # in enqueue() which we override
@@ -91,7 +95,7 @@ def process_loop(
    for args in task_args:
        try:
            result = task(args)
-        except Exception as e:
+        except Exception as e:  # pylint: disable=broad-except
            conn.send((MessageType.exception, e))
            break
        else:
@@ -103,6 +107,8 @@ def process_loop(


 class LambdaExecutor(Executor):
+    """Executor for AWS Lambda or similar environments that lack semaphores."""
+
    def _execute(
        self,
        *,
@@ -153,13 +159,13 @@ class LambdaExecutor(Executor):

        with self.pbar_class(**tqdm_kwargs) as pbar:
            while connections:
-                for r in wait(connections):
-                    if not isinstance(r, Connection):
+                for result in wait(connections):
+                    if not isinstance(result, Connection):
                        raise NotImplementedError("We only support Connection()")
                    try:
-                        msg_type, msg = r.recv()
+                        msg_type, msg = result.recv()
                    except EOFError:
-                        connections.remove(r)
+                        connections.remove(result)
                        continue

                    if msg_type == MessageType.result:
@@ -170,7 +176,7 @@ class LambdaExecutor(Executor):
                        logger = logging.getLogger(record.name)
                        logger.handle(record)
                    elif msg_type == MessageType.complete:
-                        connections.remove(r)
+                        connections.remove(result)
                    elif msg_type == MessageType.exception:
                        for process in processes:
                            process.terminate()
--- a/src/ocrmypdf/helpers.py
+++ b/src/ocrmypdf/helpers.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Support functions."""

 import logging
 import multiprocessing
@@ -137,11 +138,11 @@ def safe_symlink(input_file: os.PathLike, soft_link_name: os.PathLike):
    os.symlink(os.path.abspath(input_file), soft_link_name)


-def samefile(f1: os.PathLike, f2: os.PathLike):
+def samefile(file1: os.PathLike, file2: os.PathLike):
    if os.name == 'nt':
-        return f1 == f2
+        return file1 == file2
    else:
-        return os.path.samefile(f1, f2)
+        return os.path.samefile(file1, file2)


 def is_iterable_notstr(thing: Any) -> bool:
@@ -149,9 +150,9 @@ def is_iterable_notstr(thing: Any) -> bool:
    return isinstance(thing, Iterable) and not isinstance(thing, str)


-def monotonic(L: Sequence) -> bool:
+def monotonic(seq: Sequence) -> bool:
    """Does this sequence increase monotonically?"""
-    return all(b > a for a, b in zip(L, L[1:]))
+    return all(b > a for a, b in zip(seq, seq[1:]))


 def page_number(input_file: os.PathLike) -> int:
@@ -166,7 +167,7 @@ def available_cpu_count() -> int:
    except NotImplementedError:
        pass
    warnings.warn(
-        "Could not get CPU count.  Assuming one (1) CPU." "Use -j N to set manually."
+        "Could not get CPU count. Assuming one (1) CPU. Use -j N to set manually."
    )
    return 1

@@ -190,16 +191,16 @@ def is_file_writable(test_file: os.PathLike) -> bool:
                os.W_OK,
                effective_ids=(os.access in os.supports_effective_ids),
            )
+
+        try:
+            fp = p.open('wb')
+        except OSError:
+            return False
        else:
-            try:
-                fp = p.open('wb')
-            except OSError:
-                return False
-            else:
-                fp.close()
-                with suppress(OSError):
-                    p.unlink()
-            return True
+            fp.close()
+            with suppress(OSError):
+                p.unlink()
+        return True
    except (OSError, RuntimeError) as e:
        log.debug(e)
        log.error(str(e))
--- a/src/ocrmypdf/hocrtransform.py
+++ b/src/ocrmypdf/hocrtransform.py
@@ -28,6 +28,8 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+"""Transform .hocr and page image to text PDF."""
+
 import argparse
 import os
 import re
@@ -99,7 +101,7 @@ HOCR_OK_LANGS = frozenset(
 Element = ElementTree.Element


-class Rect(NamedTuple):  # pylint: disable=inherit-non-class
+class Rect(NamedTuple):
    """A rectangle for managing PDF coordinates."""

    x1: Any
@@ -109,7 +111,7 @@ class Rect(NamedTuple):  # pylint: disable=inherit-non-class


 class HocrTransformError(Exception):
-    pass
+    """Error while applying hOCR transform."""


 class HocrTransform:
@@ -287,7 +289,7 @@ class HocrTransform:
                continue

            pxl_coords = self.element_coordinates(elem)
-            pt = self.pt_from_pixel(pxl_coords)
+            pt = self.pt_from_pixel(pxl_coords)  # pylint: disable=invalid-name

            # draw the bbox border
            if show_bounding_boxes:  # pragma: no cover
--- a/src/ocrmypdf/optimize.py
+++ b/src/ocrmypdf/optimize.py
@@ -4,6 +4,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""Post-processing image optimization of OCR PDFs."""
+

 import logging
 import sys
@@ -55,7 +57,9 @@ DEFAULT_PNG_QUALITY = 70
 Xref = NewType('Xref', int)


-class XrefExt(NamedTuple):  # pylint: disable=inherit-non-class
+class XrefExt(NamedTuple):
+    """A PDF xref and image extension pair."""
+
    xref: Xref
    ext: str

@@ -466,7 +470,7 @@ def _find_deflatable_jpeg(
    result = extract_image_filter(pike, root, image, xref)
    if result is None:
        return None
-    pim, filtdp = result
+    _pim, filtdp = result

    if filtdp[0] == Name.DCTDecode and not filtdp[1] and options.optimize >= 1:
        return XrefExt(xref, '.memory')
@@ -707,9 +711,9 @@ def main(infile, outfile, level, jobs=1):
        jb2lossy=False,
    )

-    with TemporaryDirectory() as td:
-        context = PdfContext(options, td, infile, None, None)
-        tmpout = Path(td) / 'out.pdf'
+    with TemporaryDirectory() as tmpdir:
+        context = PdfContext(options, tmpdir, infile, None, None)
+        tmpout = Path(tmpdir) / 'out.pdf'
        optimize(
            infile,
            tmpout,
--- a/src/ocrmypdf/pdfa.py
+++ b/src/ocrmypdf/pdfa.py
@@ -97,7 +97,8 @@ def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
        target_filename: filename to save
        icc: ICC identifier such as 'sRGB'
    References:
-        Adobe PDFMARK Reference: https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdfmark_reference.pdf
+        Adobe PDFMARK Reference:
+        https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdfmark_reference.pdf
    """
    if icc != 'sRGB':
        raise NotImplementedError("Only supporting sRGB")
@@ -105,11 +106,11 @@ def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
    bytes_icc_profile = (
        package_files('ocrmypdf.data') / SRGB_ICC_PROFILE_NAME
    ).read_bytes()
-    ps = '\n'.join(_make_postscript(icc, bytes_icc_profile, 3))
+    postscript = '\n'.join(_make_postscript(icc, bytes_icc_profile, 3))

    # We should have encoded everything to pure ASCII by this point, and
    # to be safe, only allow ASCII in PostScript
-    Path(target_filename).write_text(ps, encoding='ascii')
+    Path(target_filename).write_text(postscript, encoding='ascii')
    return target_filename


--- a/src/ocrmypdf/pdfinfo/init.py
+++ b/src/ocrmypdf/pdfinfo/init.py
@@ -6,4 +6,6 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.


+"""For extracting information about PDFs prior to OCR."""
+
 from ocrmypdf.pdfinfo.info import Colorspace, Encoding, PdfInfo
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@@ -6,13 +6,15 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.


+"""Extract information about the content of a PDF."""
+
 import atexit
 import logging
 import re
 from collections import defaultdict
 from contextlib import ExitStack
 from decimal import Decimal
-from enum import Enum
+from enum import Enum, auto
 from functools import partial
 from math import hypot, inf, isclose
 from os import PathLike
@@ -20,6 +22,7 @@ from pathlib import Path
 from typing import (
    Container,
    Dict,
+    Iterable,
    Iterator,
    List,
    Mapping,
@@ -48,11 +51,39 @@ from ocrmypdf.pdfinfo.layout import get_page_analysis, get_text_boxes

 logger = logging.getLogger()

-Colorspace = Enum('Colorspace', 'gray rgb cmyk lab icc index sep devn pattern jpeg2000')

-Encoding = Enum(
-    'Encoding', 'ccitt jpeg jpeg2000 jbig2 asciihex ascii85 lzw flate runlength'
-)
+class Colorspace(Enum):
+    """Description of common image colorspaces in a PDF."""
+
+    # pylint: disable=invalid-name
+    gray = auto()
+    rgb = auto()
+    cmyk = auto()
+    lab = auto()
+    icc = auto()
+    index = auto()
+    sep = auto()
+    devn = auto()
+    pattern = auto()
+    jpeg2000 = auto()
+
+
+class Encoding(Enum):
+    """Description of common image encodings in a PDF."""
+
+    # pylint: disable=invalid-name
+    ccitt = auto()
+    jpeg = auto()
+    jpeg2000 = auto()
+    jbig2 = auto()
+    asciihex = auto()
+    ascii85 = auto()
+    lzw = auto()
+    flate = auto()
+    runlength = auto()
+
+
+FloatRect = Tuple[float, float, float, float]

 FRIENDLY_COLORSPACE: Dict[str, Colorspace] = {
    '/DeviceGray': Colorspace.gray,
@@ -105,18 +136,24 @@ def _is_unit_square(shorthand):


 class XobjectSettings(NamedTuple):
+    """Info about an XObject found in a PDF."""
+
    name: str
    shorthand: Tuple[float, float, float, float, float, float]
    stack_depth: int


 class InlineSettings(NamedTuple):
+    """Info about an inline image found in a PDF."""
+
    iimage: PdfInlineImage
    shorthand: Tuple[float, float, float, float, float, float]
    stack_depth: int


 class ContentsInfo(NamedTuple):
+    """Info about various objects found in a PDF."""
+
    xobject_settings: List[XobjectSettings]
    inline_images: List[InlineSettings]
    found_vector: bool
@@ -125,17 +162,19 @@ class ContentsInfo(NamedTuple):


 class TextboxInfo(NamedTuple):
+    """Info about a text box found in a PDF."""
+
    bbox: Tuple[float, float, float, float]
    is_visible: bool
    is_corrupt: bool


 class VectorMarker:
-    pass
+    """Sentinel indicating vector drawing operations were found on a page."""


 class TextMarker:
-    pass
+    """Sentinel indicating text drawing operations were found on a page."""


 def _normalize_stack(graphobjs):
@@ -197,7 +236,7 @@ def _interpret_contents(contentstream: Object, initial_shorthand=UNIT_SQUARE):
            if len(stack) > 32:  # See docstring
                if len(stack) > 128:
                    raise RuntimeError(
-                        "PDF graphics stack overflowed hard limit, operator %i" % n
+                        f"PDF graphics stack overflowed hard limit at operator {n}"
                    )
                warn("PDF graphics stack overflowed spec limit")
        elif operator == 'Q':
@@ -283,7 +322,7 @@ def _get_dpi(ctm_shorthand, image_size) -> Resolution:

    """

-    a, b, c, d, _, _ = ctm_shorthand
+    a, b, c, d, _, _ = ctm_shorthand  # pylint: disable=invalid-name

    # Calculate the width and height of the image in PDF units
    image_drawn = hypot(a, b), hypot(c, d)
@@ -299,6 +338,8 @@ def _get_dpi(ctm_shorthand, image_size) -> Resolution:


 class ImageInfo:
+    """Information about an image found in a PDF."""
+
    DPI_PREC = Decimal('1.000')

    _comp: Optional[int]
@@ -428,7 +469,7 @@ def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]:

    for n, inline in enumerate(contentsinfo.inline_images):
        yield ImageInfo(
-            name='inline-%02d' % n, shorthand=inline.shorthand, inline=inline.iimage
+            name=f'inline-{n:02d}', shorthand=inline.shorthand, inline=inline.iimage
        )


@@ -569,10 +610,10 @@ def _process_content_streams(
    yield from _find_form_xobject_images(pdf, container, contentsinfo)


-def _page_has_text(text_blocks, page_width, page_height) -> bool:
+def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) -> bool:
    """Smarter text detection that ignores text in margins"""

-    pw, ph = float(page_width), float(page_height)
+    pw, ph = float(page_width), float(page_height)  # pylint: disable=invalid-name

    margin_ratio = 0.125
    interior_bbox = (
@@ -582,7 +623,7 @@ def _page_has_text(text_blocks, page_width, page_height) -> bool:
        margin_ratio * ph,  # bottom  (first quadrant: bottom < top)
    )

-    def rects_intersect(a, b) -> bool:
+    def rects_intersect(a: FloatRect, b: FloatRect) -> bool:
        """
        Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
        https://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
@@ -604,19 +645,19 @@ def simplify_textboxes(miner, textbox_getter) -> Iterator[TextboxInfo]:
    We do this to save memory and ensure that our objects are pickleable.
    """
    for box in textbox_getter(miner):
-        first_line = box._objs[0]
-        first_char = first_line._objs[0]
+        first_line = box._objs[0]  # pylint: disable=protected-access
+        first_char = first_line._objs[0]  # pylint: disable=protected-access

        visible = first_char.rendermode != 3
        corrupt = first_char.get_text() == '\ufffd'
        yield TextboxInfo(box.bbox, visible, corrupt)


-worker_pdf = None
+worker_pdf = None  # pylint: disable=invalid-name


 def _pdf_pageinfo_sync_init(pdf: Pdf, infile: Path, pdfminer_loglevel):
-    global worker_pdf  # pylint: disable=global-statement
+    global worker_pdf  # pylint: disable=global-statement,invalid-name
    pikepdf_enable_mmap()

    logging.getLogger('pdfminer').setLevel(pdfminer_loglevel)
@@ -701,6 +742,8 @@ def _pdf_pageinfo_concurrent(


 class PageInfo:
+    """Information about type of contents on each page in a PDF."""
+
    _has_text: Optional[bool]
    _has_vector: Optional[bool]
    _images: List[ImageInfo]
@@ -762,15 +805,15 @@ class PageInfo:
            self._has_vector = False
            self._has_text = False
            self._images = []
-            for ci in _process_content_streams(
+            for info in _process_content_streams(
                pdf=pdf, container=page, shorthand=userunit_shorthand
            ):
-                if isinstance(ci, VectorMarker):
+                if isinstance(info, VectorMarker):
                    self._has_vector = True
-                elif isinstance(ci, TextMarker):
+                elif isinstance(info, TextMarker):
                    self._has_text = True
-                elif isinstance(ci, ImageInfo):
-                    self._images.append(ci)
+                elif isinstance(info, ImageInfo):
+                    self._images.append(info)
                else:
                    raise NotImplementedError()
        else:
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -4,6 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

+"""OCRmyPDF pluggy plugin specification."""

 from abc import ABC, abstractmethod
 from argparse import ArgumentParser, Namespace
--- a/src/ocrmypdf/subprocess/init.py
+++ b/src/ocrmypdf/subprocess/init.py
@@ -28,7 +28,12 @@ log = logging.getLogger(__name__)


 def run(
-    args, *, env=None, logs_errors_to_stdout: bool = False, **kwargs
+    args,
+    *,
+    env=None,
+    logs_errors_to_stdout: bool = False,
+    check: bool = False,
+    **kwargs,
 ) -> CompletedProcess:
    """Wrapper around :py:func:`subprocess.run`

@@ -50,7 +55,7 @@ def run(
    stderr = None
    stderr_name = 'stderr' if not logs_errors_to_stdout else 'stdout'
    try:
-        proc = subprocess_run(args, env=env, **kwargs)
+        proc = subprocess_run(args, env=env, check=check, **kwargs)
    except CalledProcessError as e:
        stderr = getattr(e, stderr_name, None)
        raise
@@ -111,6 +116,7 @@ def _fix_process_args(args, env, kwargs):
    program = str(args[0])

    if os.name == 'nt':
+        # pylint: disable=import-outside-toplevel
        from ocrmypdf.subprocess._windows import fix_windows_args

        args = fix_windows_args(program, args, env)
@@ -171,42 +177,42 @@ def get_version(
    return version


-missing_program = '''
+MISSING_PROGRAM = '''
 The program '{program}' could not be executed or was not found on your
 system PATH.
 '''

-missing_optional_program = '''
+MISSING_OPTIONAL_PROGRAM = '''
 The program '{program}' could not be executed or was not found on your
 system PATH.  This program is required when you use the
 {required_for} arguments.  You could try omitting these arguments, or install
 the package.
 '''

-missing_recommend_program = '''
+MISSING_RECOMMEND_PROGRAM = '''
 The program '{program}' could not be executed or was not found on your
 system PATH.  This program is recommended when using the {required_for} arguments,
 but not required, so we will proceed.  For best results, install the program.
 '''

-old_version = '''
+OLD_VERSION = '''
 OCRmyPDF requires '{program}' {need_version} or higher.  Your system appears
 to have {found_version}.  Please update this program.
 '''

-old_version_required_for = '''
+OLD_VERSION_REQUIRED_FOR = '''
 OCRmyPDF requires '{program}' {need_version} or higher when run with the
 {required_for} arguments.  If you omit these arguments, OCRmyPDF may be able to
 proceed.  For best results, install the program.
 '''

-osx_install_advice = '''
+OSX_INSTALL_ADVICE = '''
 If you have homebrew installed, try these command to install the missing
 package:
    brew install {package}
 '''

-linux_install_advice = '''
+LINUX_INSTALL_ADVICE = '''
 On systems with the aptitude package manager (Debian, Ubuntu), try these
 commands:
    sudo apt-get update
@@ -216,7 +222,7 @@ On RPM-based systems (Red Hat, Fedora), search for instructions on
 installing the RPM for {program}.
 '''

-windows_install_advice = '''
+WINDOWS_INSTALL_ADVICE = '''
 If not already installed, install the Chocolatey package manager. Then use
 a command prompt to install the missing package:
    choco install {package}
@@ -234,32 +240,35 @@ def _get_platform():


 def _error_trailer(program, package, **kwargs):
+    del kwargs
    if isinstance(package, Mapping):
        package = package.get(_get_platform(), program)

    if _get_platform() == 'darwin':
-        log.info(osx_install_advice.format(**locals()))
+        log.info(OSX_INSTALL_ADVICE.format(**locals()))
    elif _get_platform() == 'linux':
-        log.info(linux_install_advice.format(**locals()))
+        log.info(LINUX_INSTALL_ADVICE.format(**locals()))
    elif _get_platform() == 'windows':
-        log.info(windows_install_advice.format(**locals()))
+        log.info(WINDOWS_INSTALL_ADVICE.format(**locals()))


 def _error_missing_program(program, package, required_for, recommended):
+    # pylint: disable=unused-argument
    if recommended:
-        log.warning(missing_recommend_program.format(**locals()))
+        log.warning(MISSING_RECOMMEND_PROGRAM.format(**locals()))
    elif required_for:
-        log.error(missing_optional_program.format(**locals()))
+        log.error(MISSING_OPTIONAL_PROGRAM.format(**locals()))
    else:
-        log.error(missing_program.format(**locals()))
+        log.error(MISSING_PROGRAM.format(**locals()))
    _error_trailer(**locals())


 def _error_old_version(program, package, need_version, found_version, required_for):
+    # pylint: disable=unused-argument
    if required_for:
-        log.error(old_version_required_for.format(**locals()))
+        log.error(OLD_VERSION_REQUIRED_FOR.format(**locals()))
    else:
-        log.error(old_version.format(**locals()))
+        log.error(OLD_VERSION.format(**locals()))
    _error_trailer(**locals())


@@ -294,10 +303,15 @@ def check_external_program(
            found_version = version_checker()
        else:  # deprecated
            found_version = version_checker
-    except (CalledProcessError, FileNotFoundError, MissingDependencyError):
+    except (CalledProcessError, FileNotFoundError) as e:
        _error_missing_program(program, package, required_for, recommended)
        if not recommended:
-            raise MissingDependencyError(program)
+            raise MissingDependencyError(program) from e
+        return
+    except MissingDependencyError:
+        _error_missing_program(program, package, required_for, recommended)
+        if not recommended:
+            raise
        return

    def remove_leading_v(s):
--- a/src/ocrmypdf/subprocess/_windows.py
+++ b/src/ocrmypdf/subprocess/_windows.py
@@ -7,6 +7,8 @@
 # type: ignore
 # Non-Windows mypy now breaks when trying to typecheck winreg

+"""Find Tesseract and Ghostscript binaries on Windows using the registry."""
+
 import logging
 import os
 import shutil
@@ -17,9 +19,9 @@ from typing import Any, Callable, Iterable, Iterator, Set, Tuple, TypeVar

 try:
    import winreg
-except ModuleNotFoundError as e:
-    raise ModuleNotFoundError("This module is for Windows only") from e
-
+except ModuleNotFoundError as _notfound_ex:
+    raise ModuleNotFoundError("This module is for Windows only") from _notfound_ex
+del _notfound_ex

 log = logging.getLogger(__name__)

@@ -40,15 +42,15 @@ def ghostscript_version_key(s: str) -> Tuple[int, int, int]:
 def registry_enum(
    key: winreg.HKEYType, enum_fn: Callable[[winreg.HKEYType, int], T]
 ) -> Iterator[T]:
-    LIMIT = 999
+    limit = 999
    n = 0
-    while n < LIMIT:
+    while n < limit:
        try:
            yield enum_fn(key, n)
            n += 1
        except OSError:
            break
-    if n == LIMIT:
+    if n == limit:
        raise ValueError(f"Too many registry keys under {key}")


@@ -61,6 +63,7 @@ def registry_values(key: winreg.HKEYType) -> Iterator[Tuple[str, Any, int]]:


 def registry_path_ghostscript(env=None) -> Iterator[Path]:
+    del env  # unused (but needed for protocol)
    try:
        with winreg.OpenKey(
            winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Artifex\GPL Ghostscript"
@@ -78,6 +81,7 @@ def registry_path_ghostscript(env=None) -> Iterator[Path]:


 def registry_path_tesseract(env=None) -> Iterator[Path]:
+    del env  # unused (but needed for protocol)
    try:
        with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR") as k:
            for subkey, val, _valtype in registry_values(k):
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -8,7 +8,6 @@
 import datetime
 import warnings
 from datetime import timezone
-from os import fspath
 from shutil import copyfile

 import pikepdf
@@ -326,6 +325,9 @@ def test_metadata_fixup_warning(resources, outdir, caplog):
    assert any(record.levelname == 'WARNING' for record in caplog.records)


+XMP_MAGIC = b'W5M0MpCehiHzreSzNTczkc9d'
+
+
 def test_prevent_gs_invalid_xml(resources, outdir):
    generate_pdfa_ps(outdir / 'pdfa.ps')
    copyfile(resources / 'trivial.pdf', outdir / 'layers.rendered.pdf')
@@ -352,7 +354,7 @@ def test_prevent_gs_invalid_xml(resources, outdir):
    contents = (outdir / 'pdfa.pdf').read_bytes()
    # Since the XML may be invalid, we scan instead of actually feeding it
    # to a parser.
-    XMP_MAGIC = b'W5M0MpCehiHzreSzNTczkc9d'
+
    xmp_start = contents.find(XMP_MAGIC)
    xmp_end = contents.rfind(b'<?xpacket end', xmp_start)
    assert 0 < xmp_start < xmp_end