From 8a3b82e364aec0c55d52a697ac06745c25c871ce Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 14:46:01 -0700
Subject: [PATCH 01/20] Make Python 3.8 minimum requirement

---
 .github/workflows/build.yml         | 2 --
 .readthedocs.yaml                   | 2 +-
 README.md                           | 2 +-
 docs/api.rst                        | 8 --------
 docs/installation.rst               | 4 ++--
 setup.cfg                           | 6 ++----
 src/ocrmypdf/__main__.py            | 2 --
 src/ocrmypdf/subprocess/_windows.py | 5 -----
 tests/test_stdio.py                 | 4 ----
 9 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a895b0fc..0615005e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -20,8 +20,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - os: ubuntu-18.04
-            python: "3.7"
           - os: ubuntu-20.04
             python: "3.8"
           - os: ubuntu-20.04
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 38fa8dce..cc3e49f4 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -14,7 +14,7 @@ formats:
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
-  version: "3.7"
+  version: "3.8"
   install:
     - method: pip
       path: .
diff --git a/README.md b/README.md
index 6d7a0a35..e90526c8 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ Linux, Windows, macOS and FreeBSD are supported. Docker images are also availabl
 | macOS (Homebrew)              | ``brew install ocrmypdf``     |
 | macOS (nix)                   | ``nix-env -i  ocrmypdf``      |
 | LinuxBrew                     | ``brew install ocrmypdf``     |
-| FreeBSD                       | ``pkg install py37-ocrmypdf`` |
+| FreeBSD                       | ``pkg install py38-ocrmypdf`` |
 | Conda                         | ``conda install ocrmypdf``    |
 | Ubuntu Snap                   | ``snap install ocrmypdf``     |
 
diff --git a/docs/api.rst b/docs/api.rst
index 0478dd1c..1f185446 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -72,14 +72,6 @@ OCRmyPDF, use processes.
     not take at least one of these steps, process semantics will prevent
     OCRmyPDF from working correctly.
 
-.. warning::
-
-    On macOS with Python 3.7, you must call
-    :func:`multiprocessing.set_start_method("spawn")`. Without this, multiprocessing
-    will be unstable. From the command line, OCRmyPDF does this automatically,
-    but as an API user you must do this. See Python bpo-33725 for details.
-    Python 3.8+ also resolve this automatically.
-
 Logging
 -------
 
diff --git a/docs/installation.rst b/docs/installation.rst
index 79bfb7c4..dd53dc88 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -417,7 +417,7 @@ Native Windows
 
 You must install the following for Windows:
 
-* Python 3.7 (64-bit) or later
+* Python 3.8 (64-bit) or later
 * Tesseract 4.0 or later
 * Ghostscript 9.50 or later
 
@@ -649,7 +649,7 @@ unfortunately, the ``pip install`` command cannot satisfy all of them.
 Installing HEAD revision from sources
 =====================================
 
-If you have ``git`` and Python 3.7 or newer installed, you can install
+If you have ``git`` and Python 3.8 or newer installed, you can install
 from source. When the ``pip`` installer runs, it will alert you if
 dependencies are missing.
 
diff --git a/setup.cfg b/setup.cfg
index 3dc89171..20207083 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,7 +24,6 @@ classifiers =
     Operating System :: POSIX :: Linux
     Programming Language :: Python :: 3
     Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
@@ -54,10 +53,9 @@ install_requires =
     pluggy>=0.13.0
     reportlab>=3.5.66
     tqdm>=4
-    importlib-metadata>=4;python_version<'3.8'  # until Python 3.8
     importlib-resources>=5;python_version<'3.9'  # until Python 3.9
     typing-extensions>=4;python_version<'3.10'
-python_requires = >=3.7
+python_requires = >=3.8
 include_package_data = True
 package_dir =
     =src
@@ -100,7 +98,7 @@ ocrmypdf =
     py.typed
 
 [bdist_wheel]
-python-tag = py37
+python-tag = py38
 
 [aliases]
 test = pytest
diff --git a/src/ocrmypdf/__main__.py b/src/ocrmypdf/__main__.py
index 24d620c8..24a64ae9 100755
--- a/src/ocrmypdf/__main__.py
+++ b/src/ocrmypdf/__main__.py
@@ -71,6 +71,4 @@ def run(args=None):
 
 
 if __name__ == '__main__':
-    if sys.platform == 'darwin' and sys.version_info < (3, 8):
-        set_start_method('spawn')  # see python bpo-33725
     sys.exit(run())
diff --git a/src/ocrmypdf/subprocess/_windows.py b/src/ocrmypdf/subprocess/_windows.py
index 3a6c2a39..46f68931 100644
--- a/src/ocrmypdf/subprocess/_windows.py
+++ b/src/ocrmypdf/subprocess/_windows.py
@@ -171,11 +171,6 @@ SHIMS = [
 def fix_windows_args(program: str, args, env):
     """Adjust our desired program and command line arguments for use on Windows"""
 
-    if sys.version_info < (3, 8):
-        # bpo-33617 - Windows needs manual Path -> str conversion
-        args = [os.fspath(arg) for arg in args]
-        program = os.fspath(program)
-
     # If we are running a .py on Windows, ensure we call it with this Python
     # (to support test suite shims)
     if program.lower().endswith('.py'):
diff --git a/tests/test_stdio.py b/tests/test_stdio.py
index 17536145..577ec607 100644
--- a/tests/test_stdio.py
+++ b/tests/test_stdio.py
@@ -51,10 +51,6 @@ def test_stdout(ocrmypdf_exec, resources, outpdf):
     assert check_pdf(output_file)
 
 
-@pytest.mark.xfail(
-    os.name == 'nt' and sys.version_info < (3, 8),
-    reason="Windows does not like this; not sure how to fix",
-)
 def test_dev_null(resources):
     if 'COV_CORE_DATAFILE' in os.environ:
         pytest.skip("Coverage uses stdout")

From 9ffe829a107f3cabeafa793e0359aba4fdfe08ea Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 14:48:11 -0700
Subject: [PATCH 02/20] Remove external importlib_metadata since Python 3.8
 provides it directly

---
 docs/conf.py             | 6 ++----
 pyproject.toml           | 3 +--
 src/ocrmypdf/_version.py | 5 +----
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 4b6bbbb9..ee636db6 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -76,6 +76,8 @@ author = 'James R. Barlow'
 # The short X.Y version.
 
 import os
+from importlib.metadata import version as package_version
+
 
 on_rtd = os.environ.get('READTHEDOCS') == 'True'
 
@@ -96,10 +98,6 @@ if on_rtd:
     ]
     sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
-try:
-    from importlib_metadata import version as package_version
-except ModuleNotFoundError:
-    from importlib.metadata import version as package_version
 
 # The full version, including alpha/beta/rc tags.
 release = package_version('ocrmypdf')
diff --git a/pyproject.toml b/pyproject.toml
index 4d0aba2a..7e9ba8df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,8 +94,7 @@ module = [
   'pdfminer.*',
   'reportlab.*',
   'fitz',
-  'libxmp.utils',
-  'importlib_metadata'
+  'libxmp.utils'
 ]
 ignore_missing_imports = true
 
diff --git a/src/ocrmypdf/_version.py b/src/ocrmypdf/_version.py
index 11f8ed3f..4b368f3e 100644
--- a/src/ocrmypdf/_version.py
+++ b/src/ocrmypdf/_version.py
@@ -8,10 +8,7 @@ OCRmyPDF uses setuptools_scm to derive version from git tags.
 
 from __future__ import annotations
 
-try:
-    from importlib.metadata import version as _package_version
-except ImportError:
-    from importlib_metadata import version as _package_version  # type: ignore
+from importlib.metadata import version as _package_version
 
 PROGRAM_NAME = 'ocrmypdf'
 

From d5a9861d5c4fd043bcfa6d0893b6973ab08b65d5 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 14:49:09 -0700
Subject: [PATCH 03/20] readme: freebsd calls it py-ocrmypdf now

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e90526c8..30fe0693 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ Linux, Windows, macOS and FreeBSD are supported. Docker images are also availabl
 | macOS (Homebrew)              | ``brew install ocrmypdf``     |
 | macOS (nix)                   | ``nix-env -i  ocrmypdf``      |
 | LinuxBrew                     | ``brew install ocrmypdf``     |
-| FreeBSD                       | ``pkg install py38-ocrmypdf`` |
+| FreeBSD                       | ``pkg install py-ocrmypdf`` |
 | Conda                         | ``conda install ocrmypdf``    |
 | Ubuntu Snap                   | ``snap install ocrmypdf``     |
 

From 67773da309d54a7a9b7821a77782c94fbbef8cce Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 15:01:10 -0700
Subject: [PATCH 04/20] Drop support for Ghostscript <9.50

---
 docs/installation.rst                       |  4 +--
 docs/introduction.rst                       |  3 +--
 src/ocrmypdf/_exec/ghostscript.py           | 30 ++-------------------
 src/ocrmypdf/builtin_plugins/ghostscript.py | 22 ++-------------
 tests/plugins/gs_feature_elision.py         |  4 +--
 tests/test_main.py                          |  8 +++---
 tests/test_validation.py                    | 16 -----------
 7 files changed, 12 insertions(+), 75 deletions(-)

diff --git a/docs/installation.rst b/docs/installation.rst
index dd53dc88..cb006566 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -616,8 +616,8 @@ manager. ``pip`` cannot provide them.
 
 The following versions are required:
 
--  Python 3.7 or newer
--  Ghostscript 9.23 or newer
+-  Python 3.8 or newer
+-  Ghostscript 9.50 or newer
 -  Tesseract 4.0.0 or newer
 -  jbig2enc 0.29 or newer
 -  pngquant 2.5 or newer
diff --git a/docs/introduction.rst b/docs/introduction.rst
index 7c11818b..563844bb 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -190,8 +190,7 @@ Ghostscript also imposes some limitations:
    behavior can be suppressed by setting ``--pdfa-image-compression`` to
    ``jpeg`` or ``lossless`` to set all images to one type or the other.
    Ghostscript has no option to maintain the input image's format.
-   (Ghostscript 9.25+ can copy JPEG images without transcoding them;
-   earlier versions will transcode.)
+   (Modern Ghostscript can copy JPEG images without transcoding them.)
 -  Ghostscript's PDF/A conversion removes any XMP metadata that is not
    one of the standard XMP metadata namespaces for PDFs. In particular,
    PRISM Metdata is removed.
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
index 9e21c33c..d67044e7 100644
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -47,21 +47,6 @@ def version():
     return get_version(GS)
 
 
-def jpeg_passthrough_available() -> bool:
-    """Returns True if the installed version of Ghostscript supports JPEG passthru
-
-    Prior to 9.23, Ghostscript decoded and re-encoded JPEGs internally. In 9.23
-    it gained the ability to keep JPEGs unmodified. However, the 9.23
-    implementation was buggy and would deletes the last two bytes of images in
-    some cases, as reported here.
-    https://bugs.ghostscript.com/show_bug.cgi?id=699216
-
-    The issue was fixed for 9.24, hence that is the first version we consider
-    the feature available. (Ghostscript 9.24 has its own problems is blacklisted.)
-    """
-    return version() >= '9.24'
-
-
 def _gs_error_reported(stream) -> bool:
     match = re.search(r'error', stream, flags=re.IGNORECASE)
     return bool(match)
@@ -201,20 +186,9 @@ def generate_pdfa(
         ]
 
     strategy = 'LeaveColorUnchanged'
-    # Older versions of Ghostscript expect a leading slash in
-    # sColorConversionStrategy, newer ones should not have it. See Ghostscript
-    # git commit fe1c025d.
     gs_version = version()
-    strategy = ('/' + strategy) if gs_version < '9.19' else strategy
-
-    if gs_version == '9.23':
-        # 9.23: added JPEG passthrough as a new feature, but with a bug that
-        # incorrectly formats some images. Fixed as of 9.24. So we disable this
-        # feature for 9.23.
-        # https://bugs.ghostscript.com/show_bug.cgi?id=699216
-        compression_args.append('-dPassThroughJPEGImages=false')
-    elif gs_version == '9.56.0':
-        # 9.56.0 breaks our OCR...?
+    if gs_version == '9.56.0':
+        # 9.56.0 introduced a new rendering mode that breaks our OCR
         compression_args.append('-dNEWPDF=false')
 
     # nb no need to specify ProcessColorModel when ColorConversionStrategy
diff --git a/src/ocrmypdf/builtin_plugins/ghostscript.py b/src/ocrmypdf/builtin_plugins/ghostscript.py
index 64375ad8..7ab8d7c9 100644
--- a/src/ocrmypdf/builtin_plugins/ghostscript.py
+++ b/src/ocrmypdf/builtin_plugins/ghostscript.py
@@ -21,37 +21,19 @@ def check_options(options):
         program='gs',
         package='ghostscript',
         version_checker=ghostscript.version,
-        need_version='9.15',  # limited by Travis CI / Ubuntu 14.04 backports
+        need_version='9.50',  # Ubuntu 20.04's version
     )
     gs_version = ghostscript.version()
-    if gs_version in ('9.24', '9.51'):
+    if gs_version in ('9.51',):
         raise MissingDependencyError(
             f"Ghostscript {gs_version} contains serious regressions and is not "
             "supported. Please upgrade to a newer version, or downgrade to the "
             "previous version."
         )
 
-    # We have these constraints to check for.
-    # 1. Ghostscript < 9.20 mangles multibyte Unicode
-    # 2. hocr doesn't work on non-Latin languages (so don't select it)
-    is_latin = options.languages.issubset(HOCR_OK_LANGS)
-    if gs_version < '9.20' and options.output_type != 'pdf' and not is_latin:
-        # https://bugs.ghostscript.com/show_bug.cgi?id=696874
-        # Ghostscript < 9.20 fails to encode multibyte characters properly
-        log.warning(
-            f"The installed version of Ghostscript ({gs_version}) does not work "
-            "correctly with the OCR languages you specified. Use --output-type pdf or "
-            "upgrade to Ghostscript 9.20 or later to avoid this issue."
-        )
-
     if options.output_type == 'pdfa':
         options.output_type = 'pdfa-2'
 
-    if options.output_type == 'pdfa-3' and ghostscript.version() < '9.19':
-        raise MissingDependencyError(
-            "--output-type pdfa-3 requires Ghostscript 9.19 or later"
-        )
-
 
 @hookimpl
 def rasterize_pdf_page(
diff --git a/tests/plugins/gs_feature_elision.py b/tests/plugins/gs_feature_elision.py
index 0f63c502..ce829f5f 100644
--- a/tests/plugins/gs_feature_elision.py
+++ b/tests/plugins/gs_feature_elision.py
@@ -9,13 +9,13 @@ from ocrmypdf import hookimpl
 from ocrmypdf.builtin_plugins import ghostscript
 from ocrmypdf.subprocess import run_polling_stderr
 
-elision_warning = """GPL Ghostscript 9.20: Setting Overprint Mode to 1
+ELISION_WARNING = """GPL Ghostscript 9.50: Setting Overprint Mode to 1
 not permitted in PDF/A-2, overprint mode not set"""
 
 
 def run_append_stderr(*args, **kwargs):
     proc = run_polling_stderr(*args, **kwargs)
-    proc.stderr += '\n' + elision_warning + '\n'
+    proc.stderr += '\n' + ELISION_WARNING + '\n'
     return proc
 
 
diff --git a/tests/test_main.py b/tests/test_main.py
index 739df7b7..c975665b 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -716,11 +716,9 @@ def test_compression_changed(ocrmypdf_exec, resources, image, compression, outpd
     if compression == "jpeg":
         assert pdfimage.enc == Encoding.jpeg
     else:
-        if ghostscript.jpeg_passthrough_available():
-            # Ghostscript 9.23 adds JPEG passthrough, which allows a JPEG to be
-            # copied without transcoding - so report
-            if image.endswith('jpg'):
-                assert pdfimage.enc == Encoding.jpeg
+        if image.endswith('jpg'):
+            # Ghostscript JPEG passthrough - no issue
+            assert pdfimage.enc == Encoding.jpeg
         else:
             assert pdfimage.enc not in (Encoding.jpeg, Encoding.jpeg2000)
 
diff --git a/tests/test_validation.py b/tests/test_validation.py
index fca0cc32..ee29f312 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -48,22 +48,6 @@ def test_hocr_notlatin_warning(caplog):
     assert 'PDF renderer is known to cause' in caplog.text
 
 
-def test_old_ghostscript(caplog):
-    with patch('ocrmypdf._exec.ghostscript.version', return_value='9.19'), patch(
-        'ocrmypdf._exec.tesseract.get_languages', return_value={'eng', 'chi_sim'}
-    ):
-        vd.check_options(*make_opts_pm(language='chi_sim', output_type='pdfa'))
-        assert 'does not work correctly' in caplog.text
-
-    with patch('ocrmypdf._exec.ghostscript.version', return_value='9.18'):
-        with pytest.raises(MissingDependencyError):
-            vd.check_options(*make_opts_pm(output_type='pdfa-3'))
-
-    with patch('ocrmypdf._exec.ghostscript.version', return_value='9.24'):
-        with pytest.raises(MissingDependencyError):
-            vd.check_options(*make_opts_pm())
-
-
 def test_old_tesseract_error():
     with patch('ocrmypdf._exec.tesseract.version', return_value='4.00.00alpha'):
         with pytest.raises(MissingDependencyError):

From 8a8c06c79c52105414fe4c4f2258e976140df8bc Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 15:05:43 -0700
Subject: [PATCH 05/20] Update pre-commit for py3.8+

---
 .pre-commit-config.yaml         | 2 +-
 src/ocrmypdf/_exec/tesseract.py | 2 +-
 src/ocrmypdf/_validation.py     | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8fb0a293..b222746b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,7 +25,7 @@ repos:
     rev: v2.37.2
     hooks:
       - id: pyupgrade
-        args: ["--py37-plus"]
+        args: ["--py38-plus"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.971
     hooks:
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
index ad98836a..78ec318a 100644
--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@@ -244,7 +244,7 @@ def get_deskew(
 
 def tesseract_log_output(stream: bytes) -> None:
     tlog = TesseractLoggerAdapter(
-        log, extra=log.extra if hasattr(log, 'extra') else None
+        log, extra=log.extra if hasattr(log, 'extra') else None  # type: ignore
     )
 
     if not stream:
diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py
index 0533b928..977ea14e 100644
--- a/src/ocrmypdf/_validation.py
+++ b/src/ocrmypdf/_validation.py
@@ -134,7 +134,7 @@ def check_options_preprocessing(options: Namespace) -> None:
             package='unpaper',
             version_checker=unpaper.version,
             need_version='6.1',
-            required_for=['--clean, --clean-final'],
+            required_for="--clean, --clean-final",  # Problem arguments
         )
         try:
             if options.unpaper_args:
@@ -221,7 +221,7 @@ def check_options_metadata(options: Namespace) -> None:
 def check_options_pillow(options: Namespace) -> None:
     PIL.Image.MAX_IMAGE_PIXELS = int(options.max_image_mpixels * 1_000_000)
     if PIL.Image.MAX_IMAGE_PIXELS == 0:
-        PIL.Image.MAX_IMAGE_PIXELS = None
+        PIL.Image.MAX_IMAGE_PIXELS = None  # type: ignore
 
 
 def _check_plugin_invariant_options(options: Namespace) -> None:

From 80b7cf63307830e4dcafe7a6e71678dc8b0a8a5b Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 15:07:43 -0700
Subject: [PATCH 06/20] Update black target versions

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7e9ba8df..f6e07f29 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.black]
 line-length = 88
-target-version = ["py37", "py38"]
+target-version = ["py38", "py39", "py310", "py311"]
 skip-string-normalization = true
 include = '\.pyi?$'
 exclude = '''

From acc70036cc9ddca7986c520dc627b204ccc5b1f1 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 15:20:29 -0700
Subject: [PATCH 07/20] Set minimum Tesseract to 4.1.1

---
 README.md                                     |  5 +--
 docs/cookbook.rst                             |  2 +-
 docs/installation.rst                         |  6 +--
 src/ocrmypdf/_exec/tesseract.py               | 11 +----
 src/ocrmypdf/builtin_plugins/tesseract_ocr.py |  9 +---
 tests/plugins/tesseract_cache.py              |  2 +-
 tests/plugins/tesseract_debug_rotate.py       |  4 +-
 tests/plugins/tesseract_noop.py               |  4 +-
 tests/test_main.py                            |  1 -
 tests/test_validation.py                      | 41 ++++++-------------
 10 files changed, 26 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index 30fe0693..4a2b66b1 100644
--- a/README.md
+++ b/README.md
@@ -92,10 +92,7 @@ brew install tesseract-lang
 
 You can then pass the `-l LANG` argument to OCRmyPDF to give a hint as to what languages it should search for. Multiple languages can be requested.
 
-OCRmyPDF supports Tesseract 4.0 and the beta versions of Tesseract 5.0. It will
-automatically use whichever version it finds first on the `PATH` environment
-variable. On Windows, if `PATH` does not provide a Tesseract binary, we use
-the highest version number that is installed according to the Windows Registry.
+OCRmyPDF supports Tesseract 4.1.1+. It will automatically use whichever version it finds first on the `PATH` environment variable. On Windows, if `PATH` does not provide a Tesseract binary, we use the highest version number that is installed according to the Windows Registry.
 
 ## Documentation and support
 
diff --git a/docs/cookbook.rst b/docs/cookbook.rst
index 1360d17e..581c6474 100644
--- a/docs/cookbook.rst
+++ b/docs/cookbook.rst
@@ -283,7 +283,7 @@ argument. (Normally, OCRmyPDF will exit with an error if asked to modify
 a file with OCR.)
 
 This may be helpful for users who want to take advantage of accuracy
-improvements in Tesseract 4.0 for files they previously OCRed with an
+improvements in Tesseract for files they previously OCRed with an
 earlier version of Tesseract and OCRmyPDF.
 
 .. code-block:: bash
diff --git a/docs/installation.rst b/docs/installation.rst
index cb006566..a352ac6b 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -418,7 +418,7 @@ Native Windows
 You must install the following for Windows:
 
 * Python 3.8 (64-bit) or later
-* Tesseract 4.0 or later
+* Tesseract 4.1.1 or later
 * Ghostscript 9.50 or later
 
 Using the `Chocolatey <https://chocolatey.org/>`_ package manager, install the
@@ -481,7 +481,7 @@ Cygwin64
 
 First install the the following prerequisite Cygwin packages using ``setup-x86_64.exe``::
 
-    python37 (or later)
+    python38 (or later)
     python3?-devel
     python3?-pip
     python3?-lxml
@@ -618,7 +618,7 @@ The following versions are required:
 
 -  Python 3.8 or newer
 -  Ghostscript 9.50 or newer
--  Tesseract 4.0.0 or newer
+-  Tesseract 4.1.1 or newer
 -  jbig2enc 0.29 or newer
 -  pngquant 2.5 or newer
 -  unpaper 6.1
diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py
index 78ec318a..adb1ec17 100644
--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@@ -33,7 +33,7 @@ HOCR_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
  <head>
   <title></title>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-  <meta name='ocr-system' content='tesseract 4.0.0' />
+  <meta name='ocr-system' content='tesseract 4.1.1' />
   <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_wconf'/>
 </head>
 <body>
@@ -114,15 +114,6 @@ def version() -> str:
     return get_version('tesseract', regex=r'tesseract\s(.+)')
 
 
-def has_user_words() -> bool:
-    """Does Tesseract have --user-words capability?
-
-    Not available in 4.0, but available in 4.1. Also available in 3.x, but
-    we no longer support 3.x.
-    """
-    return version() >= '4.1'
-
-
 def has_thresholding() -> bool:
     """Does Tesseract have -c thresholding method capability?"""
     return version() >= '5.0'
diff --git a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
index d9e24e4e..8372577c 100644
--- a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
+++ b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
@@ -43,7 +43,7 @@ def add_options(parser):
         metavar='MODE',
         choices=range(0, 4),
         help=(
-            "Set Tesseract 4.0+ OCR engine mode: "
+            "Set Tesseract 4+ OCR engine mode: "
             "0 - original Tesseract only; "
             "1 - neural nets LSTM only; "
             "2 - Tesseract + LSTM; "
@@ -93,7 +93,7 @@ def check_options(options):
         program='tesseract',
         package={'linux': 'tesseract-ocr'},
         version_checker=tesseract.version,
-        need_version='4.0.0-beta.1',  # using backport for Travis CI
+        need_version='4.1.1',  # Ubuntu 20.04 version
         version_parser=tesseract.TesseractVersion,
     )
 
@@ -101,11 +101,6 @@ def check_options(options):
     if options.pdf_renderer == 'auto':
         options.pdf_renderer = 'sandwich'
 
-    if not tesseract.has_user_words() and (options.user_words or options.user_patterns):
-        log.warning(
-            "Tesseract 4.0 (which you have installed) ignores --user-words and "
-            "--user-patterns, so these arguments have no effect."
-        )
     if not tesseract.has_thresholding() and options.tesseract_thresholding != 0:
         log.warning(
             "The installed version of Tesseract does not support changes to its "
diff --git a/tests/plugins/tesseract_cache.py b/tests/plugins/tesseract_cache.py
index b32d2498..bc2750f9 100644
--- a/tests/plugins/tesseract_cache.py
+++ b/tests/plugins/tesseract_cache.py
@@ -21,7 +21,7 @@ were produced.
 
 Certain operations are not cached and routed to Tesseract OCR directly.
 
-Assumes Tesseract 4.0.0-alpha or higher.
+Assumes Tesseract 4+.
 
 """
 
diff --git a/tests/plugins/tesseract_debug_rotate.py b/tests/plugins/tesseract_debug_rotate.py
index f278cb3f..1e10cfb7 100644
--- a/tests/plugins/tesseract_debug_rotate.py
+++ b/tests/plugins/tesseract_debug_rotate.py
@@ -27,7 +27,7 @@ HOCR_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
  <head>
   <title></title>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-  <meta name='ocr-system' content='tesseract 4.0.0' />
+  <meta name='ocr-system' content='tesseract 4.1.1' />
   <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
  </head>
  <body>
@@ -46,7 +46,7 @@ HOCR_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
 class FixedRotateNoopOcrEngine(OcrEngine):
     @staticmethod
     def version():
-        return '4.0.0'
+        return '4.1.1'
 
     @staticmethod
     def creator_tag(options):
diff --git a/tests/plugins/tesseract_noop.py b/tests/plugins/tesseract_noop.py
index 4dd18dfd..68a0bfe6 100644
--- a/tests/plugins/tesseract_noop.py
+++ b/tests/plugins/tesseract_noop.py
@@ -25,7 +25,7 @@ HOCR_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
  <head>
   <title></title>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-  <meta name='ocr-system' content='tesseract 4.0.0' />
+  <meta name='ocr-system' content='tesseract 4.1.1' />
   <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
  </head>
  <body>
@@ -44,7 +44,7 @@ HOCR_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
 class NoopOcrEngine(OcrEngine):
     @staticmethod
     def version():
-        return '4.0.0'
+        return '4.1.1'
 
     @staticmethod
     def creator_tag(options):
diff --git a/tests/test_main.py b/tests/test_main.py
index c975665b..d4e3135e 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -539,7 +539,6 @@ def test_tesseract_config_invalid(renderer, resources, invalid_tess_config, outp
     assert p.returncode == ExitCode.invalid_config
 
 
-@pytest.mark.skipif(not tesseract.has_user_words(), reason='not functional until 4.1.0')
 def test_user_words_ocr(resources, outdir):
     # Does not actually test if --user-words causes output to differ
     word_list = outdir / 'wordlist.txt'
diff --git a/tests/test_validation.py b/tests/test_validation.py
index ee29f312..192925f8 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -87,22 +87,6 @@ def test_optimizing(caplog):
     assert 'will be ignored because' in caplog.text
 
 
-def test_user_words(caplog):
-    with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=False):
-        vd.check_options(*make_opts_pm(user_words='foo'))
-        assert (
-            'Tesseract 4.0 (which you have installed) ignores --user-words'
-            in caplog.text
-        )
-    caplog.clear()
-    with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=True):
-        vd.check_options(*make_opts_pm(user_patterns='foo'))
-        assert (
-            'Tesseract 4.0 (which you have installed) ignores --user-words'
-            not in caplog.text
-        )
-
-
 def test_pillow_options():
     vd.check_options_pillow(make_opts(max_image_mpixels=0))
 
@@ -213,37 +197,38 @@ def test_version_comparison():
             program="tesseract",
             package="tesseract",
             version_checker=lambda: '4.0.0-beta.1',
-            need_version='4.0.0',
+            need_version='4.1.1',
             version_parser=TesseractVersion,
         )
     vd.check_external_program(
         program="tesseract",
         package="tesseract",
         version_checker=lambda: 'v5.0.0-alpha.20200201',
-        need_version='4.0.0',
+        need_version='4.1.1',
         version_parser=TesseractVersion,
     )
     vd.check_external_program(
         program="tesseract",
         package="tesseract",
         version_checker=lambda: '5.0.0-rc1.20211030',
-        need_version='4.0.0',
+        need_version='4.1.1',
         version_parser=TesseractVersion,
     )
     vd.check_external_program(
         program="tesseract",
         package="tesseract",
-        version_checker=lambda: 'v4.0.0.20181030',  # Some Windows builds use this format
-        need_version='4.0.0',
-        version_parser=TesseractVersion,
-    )
-    vd.check_external_program(
-        program="tesseract",
-        package="tesseract",
-        version_checker=lambda: '4.1.1-rc2-25-g9707',
-        need_version='4.0.0',
+        version_checker=lambda: 'v4.1.1.20181030',  # Some Windows builds use this format
+        need_version='4.1.1',
         version_parser=TesseractVersion,
     )
+    with pytest.raises(MissingDependencyError):
+        vd.check_external_program(
+            program="tesseract",
+            package="tesseract",
+            version_checker=lambda: '4.1.1-rc2-25-g9707',
+            need_version='4.1.1',
+            version_parser=TesseractVersion,
+        )
     with pytest.raises(MissingDependencyError):
         vd.check_external_program(
             program="dummy_fails",

From d619fac0bd94915426930b7ba1e207f168da2349 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Tue, 2 Aug 2022 15:30:20 -0700
Subject: [PATCH 08/20] unpaper: tidy file

---
 src/ocrmypdf/_exec/unpaper.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
index d7f24265..dd3a5116 100644
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -1,12 +1,10 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0
 
-from __future__ import annotations
-
-# unpaper documentation:
-# https://github.com/Flameeyes/unpaper/blob/master/doc/basic-concepts.md
 """Interface to unpaper executable"""
 
+from __future__ import annotations
+
 import logging
 import os
 import shlex
@@ -22,6 +20,10 @@ from PIL import Image
 from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError
 from ocrmypdf.subprocess import get_version, run
 
+# unpaper documentation:
+# https://github.com/Flameeyes/unpaper/blob/master/doc/basic-concepts.md
+
+
 if sys.version_info >= (3, 10):
     from tempfile import TemporaryDirectory
 else:

From 53db866ef96f98e442421f65e1662bb244047240 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 03:54:55 -0700
Subject: [PATCH 09/20] Remove deprecated exception PdfMergeFailedError

---
 src/ocrmypdf/__init__.py   |  1 -
 src/ocrmypdf/api.py        |  2 --
 src/ocrmypdf/exceptions.py | 20 --------------------
 3 files changed, 23 deletions(-)

diff --git a/src/ocrmypdf/__init__.py b/src/ocrmypdf/__init__.py
index 7f4a4cd2..25c3039d 100644
--- a/src/ocrmypdf/__init__.py
+++ b/src/ocrmypdf/__init__.py
@@ -21,7 +21,6 @@ from ocrmypdf.exceptions import (
     InputFileError,
     MissingDependencyError,
     OutputFileAccessError,
-    PdfMergeFailedError,
     PriorOcrFoundError,
     SubprocessOutputError,
     TesseractConfigError,
diff --git a/src/ocrmypdf/api.py b/src/ocrmypdf/api.py
index 8d92eb71..67febaeb 100644
--- a/src/ocrmypdf/api.py
+++ b/src/ocrmypdf/api.py
@@ -284,8 +284,6 @@ def ocr(  # pylint: disable=unused-argument
             ``"-"``, some final validation steps are not performed (we do not read
             back the stream after it is written).
     Raises:
-        ocrmypdf.PdfMergeFailedError: If the input PDF is malformed, preventing merging
-            with the OCR layer.
         ocrmypdf.MissingDependencyError: If a required dependency program is missing or
             was not found on PATH.
         ocrmypdf.UnsupportedImageFormatError: If the input file type was an image that
diff --git a/src/ocrmypdf/exceptions.py b/src/ocrmypdf/exceptions.py
index d5a9ae8e..b56e8a1a 100644
--- a/src/ocrmypdf/exceptions.py
+++ b/src/ocrmypdf/exceptions.py
@@ -47,26 +47,6 @@ class BadArgsError(ExitCodeException):
     exit_code = ExitCode.bad_args
 
 
-class PdfMergeFailedError(ExitCodeException):  # deprecated
-    """An intermediate PDF can't be merged.
-
-    No longer in use.
-    """
-
-    exit_code = ExitCode.input_file
-    message = dedent(
-        '''\
-        Failed to merge PDF image layer with OCR layer
-
-        Usually this happens because the input PDF file is malformed and
-        ocrmypdf cannot correct the problem on its own.
-
-        Try using
-            ocrmypdf --pdf-renderer sandwich  [..other args..]
-        '''
-    )
-
-
 class MissingDependencyError(ExitCodeException):
     """A third-party dependency is missing."""
 

From 7e97981114d15990651a50878ff9702b97473171 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 03:56:44 -0700
Subject: [PATCH 10/20] hocrtx: unused imports

---
 src/ocrmypdf/hocrtransform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ocrmypdf/hocrtransform.py b/src/ocrmypdf/hocrtransform.py
index 409aa689..b05eaa9e 100755
--- a/src/ocrmypdf/hocrtransform.py
+++ b/src/ocrmypdf/hocrtransform.py
@@ -14,7 +14,7 @@ import re
 import warnings
 from math import atan, cos, sin
 from pathlib import Path
-from typing import Any, NamedTuple, Optional, Tuple, Union
+from typing import Any, NamedTuple
 from xml.etree import ElementTree
 
 with warnings.catch_warnings():

From 670ce2b969b1d88e6e8f037738dd5582f61d03a5 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 03:58:10 -0700
Subject: [PATCH 11/20] Remove support for non-callable version checker

---
 src/ocrmypdf/subprocess/__init__.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/ocrmypdf/subprocess/__init__.py b/src/ocrmypdf/subprocess/__init__.py
index cc5b3550..1254727a 100644
--- a/src/ocrmypdf/subprocess/__init__.py
+++ b/src/ocrmypdf/subprocess/__init__.py
@@ -314,7 +314,7 @@ def check_external_program(
         program: The name of the program to test.
         package: The name of a software package that typically supplies this program.
             Usually the same as program.
-        version_check: A callable without arguments that retrieves the installed
+        version_checker: A callable without arguments that retrieves the installed
             version of program.
         need_version: The minimum required version.
         required_for: The name of an argument of feature that requires this program.
@@ -325,10 +325,7 @@ def check_external_program(
     """
 
     try:
-        if callable(version_checker):
-            found_version = version_checker()
-        else:  # deprecated
-            found_version = version_checker
+        found_version = version_checker()
     except (CalledProcessError, FileNotFoundError) as e:
         _error_missing_program(program, package, required_for, recommended)
         if not recommended:

From 1a0a797ca64ab78717d4019ccce50ac370cc606c Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 04:00:25 -0700
Subject: [PATCH 12/20] Remove our @deprecated decorator and use standard
 package

---
 setup.cfg               |  1 +
 src/ocrmypdf/helpers.py | 18 ------------------
 tests/test_helpers.py   |  9 ---------
 3 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 20207083..70309431 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -46,6 +46,7 @@ packages = find:
 install_requires =
     Pillow>=8.2.0
     coloredlogs>=14.0  # strictly optional
+    deprecation>=2.1.0
     img2pdf>=0.3.0  # pure Python
     packaging>=20
     pdfminer.six!=20200720,>=20191110
diff --git a/src/ocrmypdf/helpers.py b/src/ocrmypdf/helpers.py
index 481b2f1b..033f5553 100644
--- a/src/ocrmypdf/helpers.py
+++ b/src/ocrmypdf/helpers.py
@@ -13,7 +13,6 @@ import warnings
 from collections import namedtuple
 from collections.abc import Iterable
 from contextlib import suppress
-from functools import wraps
 from io import StringIO
 from math import isclose, isfinite
 from pathlib import Path
@@ -275,20 +274,3 @@ def pikepdf_enable_mmap():
     # Fix is not in pybind11 2.6.0
     # log.debug("pikepdf mmap disabled")
     return
-
-
-def deprecated(func):
-    """Warn that function is deprecated."""
-
-    @wraps(func)
-    def new_func(*args, **kwargs):
-        warnings.simplefilter('always', DeprecationWarning)  # turn off filter
-        warnings.warn(
-            f"Call to deprecated function {func.__name__}.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        warnings.simplefilter('default', DeprecationWarning)  # reset filter
-        return func(*args, **kwargs)
-
-    return new_func
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index bd43b5b5..e1992b32 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -54,15 +54,6 @@ def test_no_cpu_count(monkeypatch):
     assert invoked, "Patched function called during test"
 
 
-def test_deprecated():
-    @helpers.deprecated
-    def old_function():
-        return 42
-
-    with pytest.deprecated_call():
-        assert old_function() == 42
-
-
 skipif_docker = pytest.mark.skipif(running_in_docker(), reason="fails on Docker")
 
 

From 4104904a1e7e0ac955d0e0f51d4adc3019a59a1d Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 04:13:02 -0700
Subject: [PATCH 13/20] Document reason for suppress some third party
 deprecation warnings

---
 src/ocrmypdf/hocrtransform.py |  1 +
 tests/test_metadata.py        | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/ocrmypdf/hocrtransform.py b/src/ocrmypdf/hocrtransform.py
index b05eaa9e..306dfa24 100755
--- a/src/ocrmypdf/hocrtransform.py
+++ b/src/ocrmypdf/hocrtransform.py
@@ -18,6 +18,7 @@ from typing import Any, NamedTuple
 from xml.etree import ElementTree
 
 with warnings.catch_warnings():
+    # reportlab uses deprecated load_module
     warnings.filterwarnings(
         'ignore', category=DeprecationWarning, message=r".*load_module.*"
     )
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index dbd9f7f9..80b97c78 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -28,9 +28,6 @@ except ImportError:
     fitz = None
 
 
-pytestmark = pytest.mark.filterwarnings('ignore:.*XMLParser.*:DeprecationWarning')
-
-
 @pytest.mark.parametrize("output_type", ['pdfa', 'pdf'])
 def test_preserve_docinfo(output_type, resources, outpdf):
     pdf_before = pikepdf.open(resources / 'graph.pdf')
@@ -174,7 +171,12 @@ def test_creation_date_preserved(output_type, resources, infile, outpdf):
 def libxmp_file_to_dict():
     try:
         with warnings.catch_warnings():
-            warnings.simplefilter("ignore", DeprecationWarning)
+            # libxmp imports distutils.Version, which is deprecated
+            warnings.filterwarnings(
+                "ignore",
+                category=DeprecationWarning,
+                message=r".*distutils Version classes are deprecated.*",
+            )
             from libxmp.utils import (
                 file_to_dict,  # pylint: disable=import-outside-toplevel
             )

From 4d2f499f97c123c9c21d46cba3ce3486fe913c46 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 04:15:56 -0700
Subject: [PATCH 14/20] Remove optional status of coloredlogs

Everything optional is a possible complication.
Better to remove the option.
---
 setup.cfg           |  2 +-
 src/ocrmypdf/api.py | 14 ++++----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 70309431..933c767f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -45,7 +45,7 @@ project_urls =
 packages = find:
 install_requires =
     Pillow>=8.2.0
-    coloredlogs>=14.0  # strictly optional
+    coloredlogs>=14.0
     deprecation>=2.1.0
     img2pdf>=0.3.0  # pure Python
     packaging>=20
diff --git a/src/ocrmypdf/api.py b/src/ocrmypdf/api.py
index 67febaeb..52821869 100644
--- a/src/ocrmypdf/api.py
+++ b/src/ocrmypdf/api.py
@@ -15,6 +15,9 @@ from pathlib import Path
 from typing import AnyStr, BinaryIO, Iterable, Union
 from warnings import warn
 
+import coloredlogs
+from humanfriendly.terminal import enable_ansi_support
+
 from ocrmypdf._logging import PageNumberFilter, TqdmConsole
 from ocrmypdf._plugin_manager import get_plugin_manager
 from ocrmypdf._sync import run_pipeline
@@ -22,15 +25,6 @@ from ocrmypdf._validation import check_options
 from ocrmypdf.cli import ArgumentParser, get_parser
 from ocrmypdf.helpers import is_iterable_notstr
 
-try:
-    import coloredlogs
-except ModuleNotFoundError:
-    coloredlogs = None  # pylint: disable=invalid-name
-
-if coloredlogs:
-    from humanfriendly.terminal import enable_ansi_support
-
-
 StrPath = Union[Path, AnyStr]
 PathOrIO = Union[BinaryIO, StrPath]
 
@@ -121,7 +115,7 @@ def configure_logging(
 
     use_colors = progress_bar_friendly
     formatter = None
-    if coloredlogs and use_colors:
+    if use_colors:
         use_colors = enable_ansi_support()
         if use_colors:
             use_colors = coloredlogs.terminal_supports_colors()

From c9389c77138808f02a4de3fb3b5e4fcf7cec7c81 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 04:21:15 -0700
Subject: [PATCH 15/20] Remove deprecated falsy handling of filter_page_image

---
 src/ocrmypdf/_sync.py      |  4 ++--
 src/ocrmypdf/pluginspec.py | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/ocrmypdf/_sync.py b/src/ocrmypdf/_sync.py
index 6a9a85a8..5843f3b4 100644
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@@ -206,8 +206,8 @@ def exec_page_sync(page_context: PageContext) -> PageResult:
         filtered_image = page_context.plugin_manager.hook.filter_page_image(
             page=page_context, image_filename=visible_image_out
         )
-        if filtered_image:
-            visible_image_out = filtered_image
+
+        visible_image_out = filtered_image
         pdf_page_from_image_out = create_pdf_page_from_image(
             visible_image_out, page_context, orientation_correction
         )
diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py
index c3298e4c..551d42e6 100644
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -276,17 +276,19 @@ def filter_page_image(page: PageContext, image_filename: Path) -> Path:
 
     If the return value is a file that does not exist, ``FileNotFoundError``
     will occur. The return value should be a path to a file in the same folder
-    as ``image_filename``.
-
-    Implementation detail: If the value returned is falsy, OCRmyPDF will ignore
-    the return value and assume the input file was unmodified. This is deprecated.
-    To leave the image unmodified, ``image_filename`` should be returned.
+    as ``image_filename``. To leave the image unmodified, ``image_filename``
+    should be returned.
 
     Note:
         This hook will be called from child processes. Modifying global state
         will not affect the main process or other child processes.
     Note:
         This is a :ref:`firstresult hook<firstresult>`.
+
+    .. versionchanged:: 14.0
+        Previously, OCRmyPDF would treat as a falsy value as a request to leave
+        the image unmodified. This is no longer supported and will trigger an
+        exception.
     """
 
 

From 88d2949e6b94683963c3b58398be34e6b3172fef Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 4 Aug 2022 05:00:46 -0700
Subject: [PATCH 16/20] Abolish setup.cfg and migrate to pyproject.toml

---
 pyproject.toml          |  86 +++++++++++++++++++++++++++++-
 setup.cfg               | 115 ----------------------------------------
 src/ocrmypdf/RELEASE.md |   4 +-
 3 files changed, 87 insertions(+), 118 deletions(-)
 delete mode 100644 setup.cfg

diff --git a/pyproject.toml b/pyproject.toml
index f6e07f29..afaa60b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,97 @@
 [build-system]
 requires = [
-  "setuptools >= 52",
+  "setuptools >= 61",
   "setuptools_scm[toml] >= 7.0.5",
   "wheel"
 ]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "ocrmypdf"
+dynamic = ["version"]
+description = "OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched"
+readme = "README.md"
+license = {text = "MPL-2.0"}
+requires-python = ">=3.8"
+dependencies = [
+  "Pillow>=8.2.0",
+  "coloredlogs>=14.0",
+  "deprecation>=2.1.0",
+  "img2pdf>=0.3.0", # pure Python
+  "packaging>=20",
+  "pdfminer.six!=20200720,>=20191110",
+  "pikepdf!=5.0.0,>=4.0.0",
+  "pluggy>=0.13.0",
+  "reportlab>=3.5.66",
+  "tqdm>=4",
+  "importlib-resources>=5;python_version<'3.9'",  # until Python 3.9
+  "typing-extensions>=4;python_version<'3.10'",
+]
+authors = [{name = "James R. Barlow", email="james@purplerock.ca"}]
+classifiers = [
+  "Development Status :: 5 - Production/Stable",
+  "Environment :: Console",
+  "Intended Audience :: End Users/Desktop",
+  "Intended Audience :: Science/Research",
+  "Intended Audience :: System Administrators",
+  "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+  "Operating System :: MacOS :: MacOS X",
+  "Operating System :: Microsoft :: Windows :: Windows 10",
+  "Operating System :: POSIX",
+  "Operating System :: POSIX :: BSD",
+  "Operating System :: POSIX :: Linux",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3 :: Only",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Topic :: Scientific/Engineering :: Image Recognition",
+  "Topic :: Text Processing :: Indexing",
+  "Topic :: Text Processing :: Linguistic",
+]
+keywords = [
+  "PDF",
+  "OCR",
+  "optical character recognition",
+  "PDF/A",
+  "scanning",
+]
+
+[project.urls]
+Documentation = "https://ocrmypdf.readthedocs.io/"
+Source = "https://github.com/ocrmypdf/OCRmyPDF"
+Tracker = "https://github.com/ocrmypdf/OCRmyPDF/issues"
+
+[project.optional-dependencies]
+docs = ["sphinx", "sphinx-issues", "sphinx-rtd-theme"]
+extended_test = ["PyMuPDF==1.19.1"]
+test = [
+  "coverage[toml]>=5",
+  "pytest>=6.0.0",
+  "pytest-cov>=2.11.1",
+  "pytest-xdist>=2.2.0",
+  "python-xmp-toolkit==2.0.1",  # also requires apt-get install libexempi3
+  "types-Pillow",
+  "types-humanfriendly",
+]
+watcher = ["watchdog>=1.0.2"]
+webservice = ["Flask>=1"]
+
+[project.scripts]
+ocrmypdf = "ocrmypdf.__main__:run"
+
+[tool.setuptools.package-data]
+ocrmypdf = ["data/sRGB.icc", "py.typed"]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+namespaces = false
+
 [tool.setuptools_scm]
 
+[tool.distutils.bdist_wheel]
+python-tag = "py38"
+
 [tool.black]
 line-length = 88
 target-version = ["py38", "py39", "py310", "py311"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 933c767f..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,115 +0,0 @@
-[metadata]
-name = ocrmypdf
-description = OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched
-long_description = file: README.md
-long_description_content_type = text/markdown
-url = https://github.com/ocrmypdf/OCRmyPDF
-author = James R. Barlow
-author_email = james@purplerock.ca
-license = MPL-2.0
-license_file = LICENSE
-license_files =
-    LICENSE
-classifiers =
-    Development Status :: 5 - Production/Stable
-    Environment :: Console
-    Intended Audience :: End Users/Desktop
-    Intended Audience :: Science/Research
-    Intended Audience :: System Administrators
-    License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
-    Operating System :: MacOS :: MacOS X
-    Operating System :: Microsoft :: Windows :: Windows 10
-    Operating System :: POSIX
-    Operating System :: POSIX :: BSD
-    Operating System :: POSIX :: Linux
-    Programming Language :: Python :: 3
-    Programming Language :: Python :: 3 :: Only
-    Programming Language :: Python :: 3.8
-    Programming Language :: Python :: 3.9
-    Programming Language :: Python :: 3.10
-    Topic :: Scientific/Engineering :: Image Recognition
-    Topic :: Text Processing :: Indexing
-    Topic :: Text Processing :: Linguistic
-keywords =
-    PDF
-    OCR
-    optical character recognition
-    PDF/A
-    scanning
-project_urls =
-    Documentation = https://ocrmypdf.readthedocs.io/
-    Source = https://github.com/ocrmypdf/OCRmyPDF
-    Tracker = https://github.com/ocrmypdf/OCRmyPDF/issues
-
-[options]
-packages = find:
-install_requires =
-    Pillow>=8.2.0
-    coloredlogs>=14.0
-    deprecation>=2.1.0
-    img2pdf>=0.3.0  # pure Python
-    packaging>=20
-    pdfminer.six!=20200720,>=20191110
-    pikepdf!=5.0.0,>=4.0.0
-    pluggy>=0.13.0
-    reportlab>=3.5.66
-    tqdm>=4
-    importlib-resources>=5;python_version<'3.9'  # until Python 3.9
-    typing-extensions>=4;python_version<'3.10'
-python_requires = >=3.8
-include_package_data = True
-package_dir =
-    =src
-platforms = any
-setup_requires =
-    setuptools-scm
-    setuptools-scm-git-archive
-zip_safe = False
-
-[options.packages.find]
-where = src
-
-[options.entry_points]
-console_scripts =
-    ocrmypdf = ocrmypdf.__main__:run
-
-[options.extras_require]
-docs =
-    sphinx
-    sphinx-issues
-    sphinx-rtd-theme
-extended_test =
-    PyMuPDF==1.19.1
-test =
-    coverage[toml]>=5
-    pytest>=6.0.0
-    pytest-cov>=2.11.1
-    pytest-xdist>=2.2.0
-    python-xmp-toolkit==2.0.1  # also requires apt-get install libexempi3
-    types-Pillow
-    types-humanfriendly
-watcher =
-    watchdog>=1.0.2
-webservice =
-    Flask>=1
-
-[options.package_data]
-ocrmypdf =
-    data/sRGB.icc
-    py.typed
-
-[bdist_wheel]
-python-tag = py38
-
-[aliases]
-test = pytest
-
-[check-manifest]
-ignore =
-    .github
-
-[flake8]
-ignore = D203,F401,W503,E501,E203,F841
-exclude = .git,__pycache__,docs/conf.py,build,dist,.venv,.venvpp,.eggs,tmp,src/ocrmypdf/lib/
-max-complexity = 10
-max-line-length = 100
diff --git a/src/ocrmypdf/RELEASE.md b/src/ocrmypdf/RELEASE.md
index 2cb89b78..744cddd6 100644
--- a/src/ocrmypdf/RELEASE.md
+++ b/src/ocrmypdf/RELEASE.md
@@ -14,11 +14,11 @@
 
 - Check README.md
 
-- Check setup.py
+- Check pyproject.toml
 
     - Are classifiers up to date?
     - Is `python_requires` correct?
-    - Python 3.6 is EOL on December 2021-12. Could drop support then.
+    - Is it to drop support for older Pythons?
     - Can we tighten any `install_requires` dependencies?
 
 - Search for old version shims we can remove

From 47dcb6fcd069168b8cb09c61e03e1fba36b81e98 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Fri, 5 Aug 2022 01:01:09 -0700
Subject: [PATCH 17/20] Revert "Remove deprecated falsy handling of
 filter_page_image"

This reverts commit c9389c77138808f02a4de3fb3b5e4fcf7cec7c81.
---
 src/ocrmypdf/_sync.py      |  4 ++--
 src/ocrmypdf/pluginspec.py | 12 +++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/ocrmypdf/_sync.py b/src/ocrmypdf/_sync.py
index 5843f3b4..6a9a85a8 100644
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@@ -206,8 +206,8 @@ def exec_page_sync(page_context: PageContext) -> PageResult:
         filtered_image = page_context.plugin_manager.hook.filter_page_image(
             page=page_context, image_filename=visible_image_out
         )
-
-        visible_image_out = filtered_image
+        if filtered_image:
+            visible_image_out = filtered_image
         pdf_page_from_image_out = create_pdf_page_from_image(
             visible_image_out, page_context, orientation_correction
         )
diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py
index 551d42e6..c3298e4c 100644
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -276,19 +276,17 @@ def filter_page_image(page: PageContext, image_filename: Path) -> Path:
 
     If the return value is a file that does not exist, ``FileNotFoundError``
     will occur. The return value should be a path to a file in the same folder
-    as ``image_filename``. To leave the image unmodified, ``image_filename``
-    should be returned.
+    as ``image_filename``.
+
+    Implementation detail: If the value returned is falsy, OCRmyPDF will ignore
+    the return value and assume the input file was unmodified. This is deprecated.
+    To leave the image unmodified, ``image_filename`` should be returned.
 
     Note:
         This hook will be called from child processes. Modifying global state
         will not affect the main process or other child processes.
     Note:
         This is a :ref:`firstresult hook<firstresult>`.
-
-    .. versionchanged:: 14.0
-        Previously, OCRmyPDF would treat as a falsy value as a request to leave
-        the image unmodified. This is no longer supported and will trigger an
-        exception.
     """
 
 

From ef70c9499ea773d66d6a2e014f2db6164cee372e Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Fri, 5 Aug 2022 01:07:46 -0700
Subject: [PATCH 18/20] Don't deprecate falsy filter_page_image

---
 src/ocrmypdf/_sync.py      | 2 +-
 src/ocrmypdf/pluginspec.py | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/ocrmypdf/_sync.py b/src/ocrmypdf/_sync.py
index 6a9a85a8..1c971484 100644
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@@ -206,7 +206,7 @@ def exec_page_sync(page_context: PageContext) -> PageResult:
         filtered_image = page_context.plugin_manager.hook.filter_page_image(
             page=page_context, image_filename=visible_image_out
         )
-        if filtered_image:
+        if filtered_image is not None:  # None if no hook is present
             visible_image_out = filtered_image
         pdf_page_from_image_out = create_pdf_page_from_image(
             visible_image_out, page_context, orientation_correction
diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py
index c3298e4c..dfa2e3ea 100644
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -278,10 +278,6 @@ def filter_page_image(page: PageContext, image_filename: Path) -> Path:
     will occur. The return value should be a path to a file in the same folder
     as ``image_filename``.
 
-    Implementation detail: If the value returned is falsy, OCRmyPDF will ignore
-    the return value and assume the input file was unmodified. This is deprecated.
-    To leave the image unmodified, ``image_filename`` should be returned.
-
     Note:
         This hook will be called from child processes. Modifying global state
         will not affect the main process or other child processes.

From 76bd8cab135af61a0754fbcdc9f4b6ee45e03052 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Sat, 6 Aug 2022 02:58:26 -0700
Subject: [PATCH 19/20] Drop Ubuntu 18.04 content

---
 .github/workflows/build.yml |  6 -----
 docs/installation.rst       | 49 +++----------------------------------
 2 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0615005e..57d1b6d6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,12 +71,6 @@ jobs:
             unpaper \
             zlib1g
 
-      - name: Install Ubuntu 18.04 packages
-        if: matrix.os == 'ubuntu-18.04'
-        run: |
-          sudo apt-get install -y --no-install-recommends \
-            libexempi3
-
       - name: Install Ubuntu 20.04 packages
         if: matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-latest'
         run: |
diff --git a/docs/installation.rst b/docs/installation.rst
index a352ac6b..07c25abd 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -44,7 +44,7 @@ install, or install a more recent version than your platform provides, read on.
 Installing on Linux
 ===================
 
-Debian and Ubuntu 18.04 or newer
+Debian and Ubuntu 20.04 or newer
 --------------------------------
 
 .. |deb-11| image:: https://repology.org/badge/version-for-repo/debian_11/ocrmypdf.svg
@@ -56,9 +56,6 @@ Debian and Ubuntu 18.04 or newer
 .. |deb-unstable| image:: https://repology.org/badge/version-for-repo/debian_unstable/ocrmypdf.svg
     :alt: Debian unstable
 
-.. |ubu-1804| image:: https://repology.org/badge/version-for-repo/ubuntu_18_04/ocrmypdf.svg
-    :alt: Ubuntu 18.04 LTS
-
 .. |ubu-2004| image:: https://repology.org/badge/version-for-repo/ubuntu_20_04/ocrmypdf.svg
     :alt: Ubuntu 20.04 LTS
 
@@ -72,7 +69,7 @@ Debian and Ubuntu 18.04 or newer
 +-----------------------------------------------+
 | |deb-11| |deb-12| |deb-unstable|              |
 +-----------------------------------------------+
-| |ubu-1804| |ubu-2004| |ubu-2204|              |
+| |ubu-2004| |ubu-2204|                         |
 +-----------------------------------------------+
 
 Users of Debian 9 ("stretch") or later, or Ubuntu 18.04 or later, including users
@@ -80,7 +77,7 @@ of Windows Subsystem for Linux, may simply
 
 .. code-block:: bash
 
-    apt-get install ocrmypdf
+    apt install ocrmypdf
 
 As indicated in the table above, Debian and Ubuntu releases may lag
 behind the latest version. If the version available for your platform is
@@ -198,46 +195,6 @@ To install for the current user only:
 
 To add JBIG2 encoding, see :ref:`jbig2`.
 
-Ubuntu 18.04 LTS
-----------------
-
-Ubuntu 18.04 includes ocrmypdf 6.1.2 - you can install that with ``apt``, but
-it is quite old now. To install a more recent version, uninstall the old version
-of ocrmypdf, and install the following dependencies:
-
-.. code-block:: bash
-
-    sudo apt-get -y remove ocrmypdf
-    sudo apt-get -y update
-    sudo apt-get -y install \
-        ghostscript \
-        icc-profiles-free \
-        libxml2 \
-        pngquant \
-        python3-distutils \
-        python3-pkg-resources \
-        python3-reportlab \
-        qpdf \
-        tesseract-ocr \
-        zlib1g \
-        unpaper
-
-We will need a newer version of ``pip`` then was available for Ubuntu 18.04:
-
-.. code-block:: bash
-
-    wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
-
-Then install the most recent ocrmypdf for the local user and set the
-user's ``PATH`` to check for the user's Python packages.
-
-.. code-block:: bash
-
-    export PATH=$HOME/.local/bin:$PATH
-    python3 -m pip install --user ocrmypdf
-
-To add JBIG2 encoding, see :ref:`jbig2`.
-
 Arch Linux (AUR)
 ----------------
 

From 5156fe7662bd1ed316e59e29cc05cedcdaa594e0 Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <james@purplerock.ca>
Date: Thu, 15 Sep 2022 22:56:44 -0700
Subject: [PATCH 20/20] Test PyPy 3.8 and 3.9

---
 .github/workflows/build.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 76f31b35..f9a15f41 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -31,7 +31,9 @@ jobs:
           - os: ubuntu-latest
             python: "3.9"
           - os: ubuntu-latest
-            python: "pypy-3.8"
+            python: "pypy3.8"
+          - os: ubuntu-latest
+            python: "pypy3.9"
           - os: ubuntu-latest
             python: "3.9"
             tesseract5: true