diff --git a/docs/advanced.rst b/docs/advanced.rst
index 963ff797..6e59b3fc 100644
--- a/docs/advanced.rst
+++ b/docs/advanced.rst
@@ -114,6 +114,41 @@ exceed a certain number of megapixels with ``--skip-big``. (A 300 DPI,
     # Allow 300 seconds for OCR; skip any page larger than 50 megapixels
     ocrmypdf --tesseract-timeout 300 --skip-big 50 bigfile.pdf output.pdf
 
+OCR for huge images
+-------------------
+
+Separate from these settings, Tesseract has internal limits on the size
+of images it will process. If you issue
+``--tesseract-downsample-large-images``, OCRmyPDF will downsample images
+to fit Tesseract limits. (The limits are usually entered only for scanned
+images of oversized media, such as large maps or blueprints exceeding
+110 cm or 43 inches in either dimension, and at high DPI.)
+
+``--tesseract-downsample-above`` adjusts the threshold at which images
+will be downsampled. By default, only images that exceed any of Tesseract's
+internal limits are downsampled.
+
+You will also need to set ``--tesseract-timeout`` high enough to allow
+for processing.
+
+Only the image sent for OCR is downsampled. The original image is
+preserved.
+
+.. code-block:: bash
+
+    # Allow 600 seconds for OCR on huge images
+    ocrmypdf --tesseract-timeout 600 \
+        --tesseract-downsample-large-images \
+        bigfile.pdf output.pdf
+
+    # Downsample images above 5000 pixels on the longest dimension to
+    # 5000 pixels
+    ocrmypdf --tesseract-timeout 120 \
+        --tesseract-downsample-large-images \
+        --tesseract-downsample-above 5000 \
+        bigfile.pdf output_downsampled_ocr.pdf
+
+
 Overriding default tesseract
 ----------------------------
 
diff --git a/misc/completion/ocrmypdf.bash b/misc/completion/ocrmypdf.bash
index bbd6a5a4..b21c4c1e 100644
--- a/misc/completion/ocrmypdf.bash
+++ b/misc/completion/ocrmypdf.bash
@@ -6,53 +6,55 @@ set -o errexit
 
 __ocrmypdf_arguments()
 {
-    local arguments="--help                   (show help message)
---language               (language(s) of the file to be OCRed)
---image-dpi              (assume this DPI if input image DPI is unknown)
---output-type            (select PDF output options)
---sidecar                (write OCR to text file)
---version                (print program version and exit)
---jobs                   (how many worker processes to use)
---quiet                  (suppress INFO messages)
---verbose                (set verbosity level)
---title                  (set metadata)
---author                 (set metadata)
---subject                (set metadata)
---keywords               (set metadata)
---rotate-pages           (rotate pages to correct orientation)
---remove-background      (attempt to remove background from pages)
---deskew                 (fix small horizontal alignment skew)
---clean                  (clean document images before OCR)
---clean-final            (clean document images and keep result)
---unpaper-args           (a quoted string of arguments to pass to unpaper)
---oversample             (oversample images to this DPI)
---remove-vectors         (don\'t send vector objects to OCR)
---threshold              (threshold images before OCR)
---force-ocr              (OCR documents that already have printable text)
---skip-text              (skip OCR on any pages that already contain text)
---redo-ocr               (redo OCR on any pages that seem to have OCR already)
+    local arguments="\
+--help                          (show help message)
+--language                      (language(s) of the file to be OCRed)
+--image-dpi                     (assume this DPI if input image DPI is unknown)
+--output-type                   (select PDF output options)
+--sidecar                       (write OCR to text file)
+--version                       (print program version and exit)
+--jobs                          (how many worker processes to use)
+--quiet                         (suppress INFO messages)
+--verbose                       (set verbosity level)
+--title                         (set metadata)
+--author                        (set metadata)
+--subject                       (set metadata)
+--keywords                      (set metadata)
+--rotate-pages                  (rotate pages to correct orientation)
+--remove-background             (attempt to remove background from pages)
+--deskew                        (fix small horizontal alignment skew)
+--clean                         (clean document images before OCR)
+--clean-final                   (clean document images and keep result)
+--unpaper-args                  (a quoted string of arguments to pass to unpaper)
+--oversample                    (oversample images to this DPI)
+--remove-vectors                (don\'t send vector objects to OCR)
+--threshold                     (threshold images before OCR)
+--force-ocr                     (OCR documents that already have printable text)
+--skip-text                     (skip OCR on any pages that already contain text)
+--redo-ocr                      (redo OCR on any pages that seem to have OCR already)
 --invalidate-digital-signatures (remove digital signatures from PDF)
---skip-big               (skip OCR on pages larger than this many MPixels)
---optimize               (select optimization level)
---jpeg-quality           (JPEG quality [0..100])
---png-quality            (PNG quality [0..100])
---jbig2-lossy            (enable lossy JBIG2 (see docs))
---pages                  (apply OCR to only the specified pages)
---max-image-mpixels      (image decompression bomb threshold)
---pdf-renderer           (select PDF renderer options)
---rotate-pages-threshold (page rotation confidence)
---pdfa-image-compression (set PDF/A image compression options)
---fast-web-view          (if file size if above this amount in MB linearize PDF)
---plugin                 (name of plugin to import)
---keep-temporary-files   (keep temporary files (debug)
---tesseract-config       (set custom tesseract config file)
---tesseract-pagesegmode  (set tesseract --psm)
---tesseract-oem          (set tesseract --oem)
---tesseract-thresholding (set tesseract image thresholding)
---tesseract-timeout      (maximum number of seconds to wait for OCR)
---user-words             (specify location of user words file)
---user-patterns          (specify location of user patterns file)
---no-progress-bar        (disable the progress bar)
+--skip-big                      (skip OCR on pages larger than this many MPixels)
+--optimize                      (select optimization level)
+--jpeg-quality                  (JPEG quality [0..100])
+--png-quality                   (PNG quality [0..100])
+--jbig2-lossy                   (enable lossy JBIG2 (see docs))
+--pages                         (apply OCR to only the specified pages)
+--max-image-mpixels             (image decompression bomb threshold)
+--pdf-renderer                  (select PDF renderer options)
+--rotate-pages-threshold        (page rotation confidence)
+--pdfa-image-compression        (set PDF/A image compression options)
+--fast-web-view                 (if file size if above this amount in MB linearize PDF)
+--plugin                        (name of plugin to import)
+--keep-temporary-files          (keep temporary files (debug)
+--tesseract-config              (set custom tesseract config file)
+--tesseract-pagesegmode         (set tesseract --psm)
+--tesseract-oem                 (set tesseract --oem)
+--tesseract-thresholding        (set tesseract image thresholding)
+--tesseract-timeout             (maximum number of seconds to wait for OCR)
+--user-words                    (specify location of user words file)
+--user-patterns                 (specify location of user patterns file)
+--no-progress-bar               (disable the progress bar)
+--color-conversion-strategy     (select color conversion strategy)
 "
 
     COMPREPLY=( $( compgen -W "$arguments" -- "$cur") )
@@ -192,6 +194,20 @@ sauvola       (use Sauvola thresholding)"
     fi
 }
 
+__ocrmypdf_color-conversion-strategy()
+{
+    local choices="LeaveColorUnchanged (default)
+CMYK (convert to CMYK)
+Gray (convert to grayscale)
+RGB (convert to RGB)
+UseDeviceIndependentColor (convert with device independent color)"
+
+    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
+    # Remove description if only one completion exists
+    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
+        COMPREPLY=( ${COMPREPLY[0]%% *} )
+    fi
+}
 
 __ocrmypdf_check_previous()
 {
@@ -251,6 +267,10 @@ __ocrmypdf_check_previous()
             _filedir
             return 0
             ;;
+        --color-conversion-strategy)
+            __ocrmypdf_color-conversion-strategy
+            return 0
+            ;;
     esac
 
     return 1
diff --git a/misc/completion/ocrmypdf.fish b/misc/completion/ocrmypdf.fish
index 12b7ea9c..c279c6cb 100644
--- a/misc/completion/ocrmypdf.fish
+++ b/misc/completion/ocrmypdf.fish
@@ -129,4 +129,27 @@ complete -c ocrmypdf -r -l user-words -d "specify location of user words file"
 complete -c ocrmypdf -r -l user-patterns -d "specify location of user patterns file"
 complete -c ocrmypdf -x -l fast-web-view -d "if file size if above this amount in MB, linearize PDF"
 
-complete -c ocrmypdf -x -a "(__fish_complete_suffix .pdf; __fish_complete_suffix .PDF; __fish_complete_suffix .jpg; __fish_complete_suffix .png)"
+function __fish_ocrmypdf_color_conversion_strategy
+    echo -e "LeaveColorUnchanged\t"(_ "do not convert color spaces (default)")
+    echo -e "CMYK\t"(_ "convert all color spaces to CMYK")
+    echo -e "Gray\t"(_ "convert all color spaces to grayscale")
+    echo -e "RGB\t"(_ "convert all color spaces to RGB")
+    echo -e "UseDeviceIndependentColor\t"(_ "convert all color spaces to ICC-based color spaces")
+end
+
+complete -c ocrmypdf -x -l color-conversion-strategy -a '(__fish_ocrmypdf_color_conversion_strategy)' -d "set color conversion strategy"
+
+function __fish_ocrmypdf_input_file_given
+    set -l tokens (commandline -opc)
+    for token in $tokens
+        if string match -q -r '^-' -- $token
+            continue
+        end
+        if test -f "$token"
+            return 0
+        end
+    end
+    return 1
+end
+
+complete -c ocrmypdf -x -n 'not __fish_ocrmypdf_input_file_given' -a "(__fish_complete_suffix .pdf)" -d "input file"
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
index 001cbfbf..e5210b33 100644
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -28,6 +28,17 @@ except AttributeError:
     # Pillow 9 shim
     Transpose = Image  # type: ignore
 
+
+COLOR_CONVERSION_STRATEGIES = frozenset(
+    [
+        'CMYK',
+        'Gray',
+        'LeaveColorUnchanged',
+        'RGB',
+        'UseDeviceIndependentColor',
+    ]
+)
+
 log = logging.getLogger(__name__)
 
 
@@ -151,6 +162,7 @@ def generate_pdfa(
     output_file: os.PathLike,
     *,
     compression: str,
+    color_conversion_strategy: str,
     pdf_version: str = '1.5',
     pdfa_part: str = '2',
     progressbar_class=None,
@@ -200,16 +212,16 @@ def generate_pdfa(
             "-dBATCH",
             "-dNOPAUSE",
             "-dSAFER",
-            "-dCompatibilityLevel=" + str(pdf_version),
+            f"-dCompatibilityLevel={str(pdf_version)}",
             "-sDEVICE=pdfwrite",
             "-dAutoRotatePages=/None",
-            "-sColorConversionStrategy=" + strategy,
+            f"-sColorConversionStrategy={color_conversion_strategy}",
         ]
         + (['-dPDFSTOPONERROR'] if stop_on_error else [])
         + compression_args
         + [
             "-dJPEGQ=95",
-            "-dPDFA=" + pdfa_part,
+            f"-dPDFA={pdfa_part}",
             "-dPDFACompatibilityPolicy=1",
             "-o",
             "-",
diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
index 4d4eab66..01d23003 100644
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@@ -359,8 +359,12 @@ def is_ocr_required(page_context: PageContext) -> bool:
 def rasterize_preview(input_file: Path, page_context: PageContext) -> Path:
     """Generate a lower quality preview image."""
     output_file = page_context.get_path('rasterize_preview.jpg')
-    canvas_dpi = get_canvas_square_dpi(page_context.pageinfo, page_context.options)
-    page_dpi = get_page_square_dpi(page_context.pageinfo, page_context.options)
+    canvas_dpi = Resolution(300.0, 300.0).take_min(
+        [get_canvas_square_dpi(page_context.pageinfo, page_context.options)]
+    )
+    page_dpi = Resolution(300.0, 300.0).take_min(
+        [get_page_square_dpi(page_context.pageinfo, page_context.options)]
+    )
     page_context.plugin_manager.hook.rasterize_pdf_page(
         input_file=input_file,
         output_file=output_file,
@@ -490,6 +494,21 @@ def rasterize(
     canvas_dpi = get_canvas_square_dpi(pageinfo, page_context.options)
     page_dpi = get_page_square_dpi(pageinfo, page_context.options)
 
+    dpi_profile = pageinfo.page_dpi_profile()
+    if dpi_profile and dpi_profile.average_to_max_dpi_ratio < 0.8:
+        log.warning(
+            "Weight average DPI is %0.1f, max DPI is %0.1f. "
+            "The discrepancy may indicate a high detail region on this page, "
+            "but could also indicate a problem with the input PDF file. "
+            "An image will be rendered at %0.1f DPI.",
+            dpi_profile.weighted_dpi,
+            dpi_profile.max_dpi,
+            dpi_profile.weighted_dpi,
+        )
+        canvas_dpi = page_dpi = Resolution(
+            dpi_profile.weighted_dpi, dpi_profile.weighted_dpi
+        )
+
     page_context.plugin_manager.hook.rasterize_pdf_page(
         input_file=input_file,
         output_file=output_file,
@@ -792,7 +811,7 @@ def convert_to_pdfa(input_pdf: Path, input_ps_stub: Path, context: PdfContext) -
         pdf_pages=[fix_docinfo_file],
         pdfmark=input_ps_stub,
         output_file=output_file,
-        compression=options.pdfa_image_compression,
+        context=context,
         pdfa_part=options.output_type[-1],  # is pdfa-1, pdfa-2, or pdfa-3
         progressbar_class=(
             context.plugin_manager.hook.get_progressbar_class()
diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py
index 52006faf..ef378e1a 100644
--- a/src/ocrmypdf/_validation.py
+++ b/src/ocrmypdf/_validation.py
@@ -205,16 +205,6 @@ def check_options_ocr_behavior(options: Namespace) -> None:
         options.pages = _pages_from_ranges(options.pages)
 
 
-def check_options_advanced(options: Namespace) -> None:
-    if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
-        'pdfa'
-    ):
-        log.warning(
-            "--pdfa-image-compression argument only applies when "
-            "--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
-        )
-
-
 def check_options_metadata(options: Namespace) -> None:
     docinfo = [options.title, options.author, options.keywords, options.subject]
     for s in (m for m in docinfo if m):
@@ -241,7 +231,6 @@ def _check_plugin_invariant_options(options: Namespace) -> None:
     check_options_sidecar(options)
     check_options_preprocessing(options)
     check_options_ocr_behavior(options)
-    check_options_advanced(options)
     check_options_pillow(options)
 
 
diff --git a/src/ocrmypdf/builtin_plugins/ghostscript.py b/src/ocrmypdf/builtin_plugins/ghostscript.py
index 44570e2f..895236b1 100644
--- a/src/ocrmypdf/builtin_plugins/ghostscript.py
+++ b/src/ocrmypdf/builtin_plugins/ghostscript.py
@@ -18,6 +18,33 @@ log = logging.getLogger(__name__)
 BLACKLISTED_GS_VERSIONS = frozenset()
 
 
+@hookimpl
+def add_options(parser):
+    gs = parser.add_argument_group("Ghostscript", "Advanced control of Ghostscript")
+    gs.add_argument(
+        '--color-conversion-strategy',
+        action='store',
+        type=str,
+        metavar='STRATEGY',
+        choices=ghostscript.COLOR_CONVERSION_STRATEGIES,
+        default='LeaveColorUnchanged',
+        help="Set Ghostscript color conversion strategy",
+    )
+    gs.add_argument(
+        '--pdfa-image-compression',
+        choices=['auto', 'jpeg', 'lossless'],
+        default='auto',
+        help="Specify how to compress images in the output PDF/A. 'auto' lets "
+        "OCRmyPDF decide.  'jpeg' changes all grayscale and color images to "
+        "JPEG compression.  'lossless' uses PNG-style lossless compression "
+        "for all images.  Monochrome images are always compressed using a "
+        "lossless codec.  Compression settings "
+        "are applied to all pages, including those for which OCR was "
+        "skipped.  Not supported for --output-type=pdf ; that setting "
+        "preserves the original compression of all images.",
+    )
+
+
 @hookimpl
 def check_options(options):
     """Check that the options are valid for this plugin."""
@@ -37,6 +64,17 @@ def check_options(options):
 
     if options.output_type == 'pdfa':
         options.output_type = 'pdfa-2'
+    if options.color_conversion_strategy not in ghostscript.COLOR_CONVERSION_STRATEGIES:
+        raise ValueError(
+            f"Invalid color conversion strategy: {options.color_conversion_strategy}"
+        )
+    if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
+        'pdfa'
+    ):
+        log.warning(
+            "--pdfa-image-compression argument only applies when "
+            "--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
+        )
 
 
 @hookimpl
@@ -71,7 +109,7 @@ def generate_pdfa(
     pdf_pages,
     pdfmark,
     output_file,
-    compression,
+    context,
     pdf_version,
     pdfa_part,
     progressbar_class,
@@ -81,7 +119,8 @@ def generate_pdfa(
     ghostscript.generate_pdfa(
         pdf_pages=[*pdf_pages, pdfmark],
         output_file=output_file,
-        compression=compression,
+        compression=context.options.pdfa_image_compression,
+        color_conversion_strategy=context.options.color_conversion_strategy,
         pdf_version=pdf_version,
         pdfa_part=pdfa_part,
         progressbar_class=progressbar_class,
diff --git a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
index ad6b6d85..81692831 100644
--- a/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
+++ b/src/ocrmypdf/builtin_plugins/tesseract_ocr.py
@@ -30,7 +30,7 @@ def add_options(parser):
         action='append',
         metavar='CFG',
         default=[],
-        help="Additional Tesseract configuration files -- see documentation",
+        help="Additional Tesseract configuration files -- see documentation.",
     )
     tess.add_argument(
         '--tesseract-pagesegmode',
@@ -38,7 +38,7 @@ def add_options(parser):
         type=int,
         metavar='PSM',
         choices=range(0, 14),
-        help="Set Tesseract page segmentation mode (see tesseract --help)",
+        help="Set Tesseract page segmentation mode (see tesseract --help).",
     )
     tess.add_argument(
         '--tesseract-oem',
@@ -75,7 +75,10 @@ def add_options(parser):
         metavar='SECONDS',
         help=(
             "Give up on OCR after the timeout, but copy the preprocessed page "
-            "into the final output."
+            "into the final output. This timeout is only used when using Tesseract "
+            "for OCR. When Tesseract is used for other operations such as "
+            "deskewing and orientation, the timeout is controlled by "
+            "--tesseract-non-ocr-timeout."
         ),
     )
     tess.add_argument(
@@ -175,6 +178,15 @@ def validate(pdfinfo, options):
         tess_threads = int(os.environ['OMP_THREAD_LIMIT'])
     log.debug("Using Tesseract OpenMP thread limit %d", tess_threads)
 
+    if (
+        options.tesseract_downsample_above != 32767
+        and not options.tesseract_downsample_large_images
+    ):
+        log.warning(
+            "The --tesseract-downsample-above argument will have no effect unless "
+            "--tesseract-downsample-large-images is also given."
+        )
+
 
 @hookimpl
 def filter_ocr_image(page: PageContext, image: Image.Image) -> Image.Image:
diff --git a/src/ocrmypdf/cli.py b/src/ocrmypdf/cli.py
index 34c8973e..3d658d0e 100644
--- a/src/ocrmypdf/cli.py
+++ b/src/ocrmypdf/cli.py
@@ -177,7 +177,9 @@ Online documentation is located at:
         '--image-dpi',
         metavar='DPI',
         type=int,
-        help="For input image instead of PDF, use this DPI instead of file's.",
+        help="When the input file is an image, not a PDF, use this DPI instead "
+        "of the DPI claimed by the input file. If the input does not claim a "
+        "sensible DPI, this option will be required.",
     )
     parser.add_argument(
         '--output-type',
@@ -402,19 +404,6 @@ Online documentation is located at:
         help="Only rotate pages when confidence is above this value (arbitrary "
         "units reported by tesseract)",
     )
-    advanced.add_argument(
-        '--pdfa-image-compression',
-        choices=['auto', 'jpeg', 'lossless'],
-        default='auto',
-        help="Specify how to compress images in the output PDF/A. 'auto' lets "
-        "OCRmyPDF decide.  'jpeg' changes all grayscale and color images to "
-        "JPEG compression.  'lossless' uses PNG-style lossless compression "
-        "for all images.  Monochrome images are always compressed using a "
-        "lossless codec.  Compression settings "
-        "are applied to all pages, including those for which OCR was "
-        "skipped.  Not supported for --output-type=pdf ; that setting "
-        "preserves the original compression of all images.",
-    )
     advanced.add_argument(
         '--fast-web-view',
         type=numeric(float, 0),
diff --git a/src/ocrmypdf/helpers.py b/src/ocrmypdf/helpers.py
index 2ab3392c..29532235 100644
--- a/src/ocrmypdf/helpers.py
+++ b/src/ocrmypdf/helpers.py
@@ -15,7 +15,16 @@ from contextlib import suppress
 from io import StringIO
 from math import isclose, isfinite
 from pathlib import Path
-from typing import Any, Generic, Sequence, SupportsFloat, SupportsRound, TypeVar
+from statistics import harmonic_mean
+from typing import (
+    Any,
+    Callable,
+    Generic,
+    Sequence,
+    SupportsFloat,
+    SupportsRound,
+    TypeVar,
+)
 
 import img2pdf
 import pikepdf
@@ -73,17 +82,38 @@ class Resolution(Generic[T]):
             return isfinite(self.x) and isfinite(self.y)
         return True
 
+    def to_scalar(self) -> float:
+        """Return the harmonic mean of x and y as a 1D approximation.
+
+        In most cases, Resolution is 2D, but typically it is "square" (x == y) and
+        can be approximated as a single number. When not square, the harmonic mean
+        is used to approximate the 2D resolution as a single number.
+        """
+        return harmonic_mean([self.x, self.y])
+
+    def _take_minmax(
+        self, vals: Iterable[Any], yvals: Iterable[Any], cmp: Callable
+    ) -> Resolution:
+        """Return a new Resolution object with the maximum resolution of inputs."""
+        if yvals is not None:
+            return Resolution(cmp(self.x, *vals), cmp(self.y, *yvals))
+        cmp_x, cmp_y = self.x, self.y
+        for x, y in vals:
+            cmp_x = cmp(x, cmp_x)
+            cmp_y = cmp(y, cmp_y)
+        return Resolution(cmp_x, cmp_y)
+
     def take_max(
         self, vals: Iterable[Any], yvals: Iterable[Any] | None = None
     ) -> Resolution:
         """Return a new Resolution object with the maximum resolution of inputs."""
-        if yvals is not None:
-            return Resolution(max(self.x, *vals), max(self.y, *yvals))
-        max_x, max_y = self.x, self.y
-        for x, y in vals:
-            max_x = max(x, max_x)
-            max_y = max(y, max_y)
-        return Resolution(max_x, max_y)
+        return self._take_minmax(vals, yvals, max)
+
+    def take_min(
+        self, vals: Iterable[Any], yvals: Iterable[Any] | None = None
+    ) -> Resolution:
+        """Return a new Resolution object with the minimum resolution of inputs."""
+        return self._take_minmax(vals, yvals, min)
 
     def flip_axis(self) -> Resolution[T]:
         """Return a new Resolution object with x and y swapped."""
@@ -95,11 +125,11 @@ class Resolution(Generic[T]):
 
     def __str__(self):
         """Return a string representation of the resolution."""
-        return f"{self.x:f}x{self.y:f}"
+        return f"{self.x:f}×{self.y:f}"
 
     def __repr__(self):  # pragma: no cover
         """Return a repr() of the resolution."""
-        return f"Resolution({self.x}x{self.y} dpi)"
+        return f"Resolution({self.x!r}, {self.y!r})"
 
     def __eq__(self, other):
         """Return True if the resolution is equal to another resolution."""
diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py
index 4122317b..56030e60 100644
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@@ -420,12 +420,12 @@ class ImageInfo:
         return self._type
 
     @property
-    def width(self):
+    def width(self) -> int:
         """Width of the image in pixels."""
         return self._width
 
     @property
-    def height(self):
+    def height(self) -> int:
         """Height of the image in pixels."""
         return self._height
 
@@ -458,17 +458,24 @@ class ImageInfo:
         return self.dpi.is_finite and self.width >= 0 and self.height >= 0
 
     @property
-    def dpi(self):
+    def dpi(self) -> Resolution:
         """Dots per inch of the image.
 
         Calculated based on where and how the image is drawn in the PDF.
         """
         return _get_dpi(self._shorthand, (self._width, self._height))
 
+    @property
+    def printed_area(self) -> float:
+        """Physical area of the image in square inches."""
+        if not self.renderable:
+            return 0.0
+        return float(self.width * self.dpi.x * self.height * self.dpi.y)
+
     def __repr__(self):
         """Return a string representation of the image."""
         return (
-            f"<ImageInfo '{self.name}' {self.type_} {self.width}x{self.height} "
+            f"<ImageInfo '{self.name}' {self.type_} {self.width}×{self.height} "
             f"{self.color} {self.comp} {self.bpc} {self.enc} {self.dpi}>"
         )
 
@@ -747,12 +754,38 @@ def _pdf_pageinfo_concurrent(
     return pages
 
 
+class PageResolutionProfile(NamedTuple):
+    """Information about the resolutions of a page."""
+
+    weighted_dpi: float
+    """The weighted average DPI of the page, weighted by the area of each image."""
+
+    max_dpi: float
+    """The maximum DPI of an image on the page."""
+
+    average_to_max_dpi_ratio: float
+    """The average DPI of the page divided by the maximum DPI of the page.
+
+    This indicates the intensity of the resolution variation on the page.
+
+    If the average is 1.0 or close to 1.0, has all of its content at a uniform
+    resolution. If the average is much lower than 1.0, some content is at a
+    higher resolution than the rest of the page.
+    """
+
+    area_ratio: float
+    """The maximum-DPI area of the page divided by the total drawn area.
+
+    This indicates the prevalence of high-resolution content on the page.
+    """
+
+
 class PageInfo:
     """Information about type of contents on each page in a PDF."""
 
     _has_text: bool | None
     _has_vector: bool | None
-    _images: list[ImageInfo]
+    _images: list[ImageInfo] = []
 
     def __init__(
         self,
@@ -939,6 +972,44 @@ class PageInfo:
         else:
             return '1.5'
 
+    def page_dpi_profile(self) -> PageResolutionProfile | None:
+        """Return information about the DPIs of the page.
+
+        This is useful to detect pages with a small proportion of high-resolution
+        content that is forcing us to use a high DPI for the whole page. The ratio
+        is weighted by the area of each image. If images overlap, the overlapped
+        area counts.
+
+        Vector graphics and text are ignored.
+
+        Returns None if there is no meaningful DPI for the page.
+        """
+        image_dpis = [
+            image.dpi.to_scalar() for image in self._images if image.renderable
+        ]
+        image_areas = [image.printed_area for image in self._images if image.renderable]
+        total_drawn_area = sum(image_areas)
+        if total_drawn_area == 0:
+            return None
+
+        weights = [area / total_drawn_area for area in image_areas]
+        # Calculate harmonic mean of DPIs weighted by area
+        # When the minimum version is Python 3.10, change this to
+        # statistics.harmonic_mean with the weights parameter
+        # rather than doing it manually.
+        weighted_dpi = sum(weights) / sum(
+            weight / dpi for weight, dpi in zip(weights, image_dpis)
+        )
+        max_dpi = max(image_dpis)
+        dpi_average_max_ratio = weighted_dpi / max_dpi
+
+        arg_max_dpi = image_dpis.index(max_dpi)
+        max_area_ratio = image_areas[arg_max_dpi] / total_drawn_area
+
+        return PageResolutionProfile(
+            weighted_dpi, max_dpi, dpi_average_max_ratio, max_area_ratio
+        )
+
     def __repr__(self):
         """Return string representation."""
         return (
diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py
index 46f2944e..00c8a4df 100644
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -351,7 +351,6 @@ def filter_pdf_page(page: PageContext, image_filename: Path, output_pdf: Path) -
         This hook will be called from child processes. Modifying global state
         will not affect the main process or other child processes.
 
-
     Note:
         This is a :ref:`firstresult hook<firstresult>`.
     """
@@ -466,7 +465,7 @@ def generate_pdfa(
     pdf_pages: list[Path],
     pdfmark: Path,
     output_file: Path,
-    compression: str,
+    context: PdfContext,
     pdf_version: str,
     pdfa_part: str,
     progressbar_class,
@@ -484,11 +483,7 @@ def generate_pdfa(
         pdfmark: A PostScript file intended for Ghostscript with details on
             how to perform the PDF/A conversion.
         output_file: The name of the desired output file.
-        compression: One of ``'jpeg'``, ``'lossless'``, ``''``. For ``'jpeg'``,
-            the PDF/A generator should convert all images to JPEG encoding where
-            possible. For lossless, all images should be converted to FlateEncode
-            (lossless PNG). If an empty string, the PDF generator should make its
-            own decisions about how to encode images.
+        context: The current context.
         pdf_version: The minimum PDF version that the output file should be.
             At its own discretion, the PDF/A generator may raise the version,
             but should not lower it.
@@ -514,6 +509,11 @@ def generate_pdfa(
     Note:
         This is a :ref:`firstresult hook<firstresult>`.
 
+    Note:
+        Before version 15.0.0, the ``context`` was not provided and ``compression``
+        was provided instead. Plugins should now read the context object to determine
+        if compression is requested.
+
     See Also:
         https://github.com/tqdm/tqdm
     """
diff --git a/tests/plugins/gs_feature_elision.py b/tests/plugins/gs_feature_elision.py
index dcd4c849..5f914dde 100644
--- a/tests/plugins/gs_feature_elision.py
+++ b/tests/plugins/gs_feature_elision.py
@@ -20,14 +20,14 @@ def run_append_stderr(*args, **kwargs):
 
 
 @hookimpl
-def generate_pdfa(pdf_pages, pdfmark, output_file, compression, pdf_version, pdfa_part):
+def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version, pdfa_part):
     with patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as mock:
         mock.side_effect = run_append_stderr
         ghostscript.generate_pdfa(
             pdf_pages=pdf_pages,
             pdfmark=pdfmark,
             output_file=output_file,
-            compression=compression,
+            context=context,
             pdf_version=pdf_version,
             pdfa_part=pdfa_part,
             progressbar_class=None,
diff --git a/tests/plugins/gs_pdfa_failure.py b/tests/plugins/gs_pdfa_failure.py
index e9ec9697..9dc742e5 100644
--- a/tests/plugins/gs_pdfa_failure.py
+++ b/tests/plugins/gs_pdfa_failure.py
@@ -22,14 +22,14 @@ def run_rig_args(args, **kwargs):
 
 
 @hookimpl
-def generate_pdfa(pdf_pages, pdfmark, output_file, compression, pdf_version, pdfa_part):
+def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version, pdfa_part):
     with patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as mock:
         mock.side_effect = run_rig_args
         ghostscript.generate_pdfa(
             pdf_pages=pdf_pages,
             pdfmark=pdfmark,
             output_file=output_file,
-            compression=compression,
+            context=context,
             pdf_version=pdf_version,
             pdfa_part=pdfa_part,
             progressbar_class=None,
diff --git a/tests/plugins/gs_render_failure.py b/tests/plugins/gs_render_failure.py
index 56ec3905..4d955354 100644
--- a/tests/plugins/gs_render_failure.py
+++ b/tests/plugins/gs_render_failure.py
@@ -17,14 +17,14 @@ def raise_gs_fail(*args, **kwargs):
 
 
 @hookimpl
-def generate_pdfa(pdf_pages, pdfmark, output_file, compression, pdf_version, pdfa_part):
+def generate_pdfa(pdf_pages, pdfmark, output_file, context, pdf_version, pdfa_part):
     with patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as mock:
         mock.side_effect = raise_gs_fail
         ghostscript.generate_pdfa(
             pdf_pages=pdf_pages,
             pdfmark=pdfmark,
             output_file=output_file,
-            compression=compression,
+            context=context,
             pdf_version=pdf_version,
             pdfa_part=pdfa_part,
             progressbar_class=None,
diff --git a/tests/plugins/gs_render_soft_error.py b/tests/plugins/gs_render_soft_error.py
index f80e0401..e276194a 100644
--- a/tests/plugins/gs_render_soft_error.py
+++ b/tests/plugins/gs_render_soft_error.py
@@ -23,7 +23,7 @@ def generate_pdfa(
     pdf_pages,
     pdfmark,
     output_file,
-    compression,
+    context,
     pdf_version,
     pdfa_part,
     stop_on_soft_error,
@@ -34,7 +34,7 @@ def generate_pdfa(
             pdf_pages=pdf_pages,
             pdfmark=pdfmark,
             output_file=output_file,
-            compression=compression,
+            context=context,
             pdf_version=pdf_version,
             pdfa_part=pdfa_part,
             progressbar_class=None,
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 3ef661bf..48a24854 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -86,7 +86,8 @@ def test_unset_metadata(output_type, field, resources, outpdf):
         'title': b'NFY5f7Ft2DWMkxLhXwxvFf7eWR2KeK3vEDcd',
         'author': b'yXaryipxyRk9dVjWjSSaVaNCKeLRgEVzPRMp',
         'subject': b't49vimctvnuH7ZeAjAkv52ACvWFjcnm5MPJr',
-        'keywords': b's9EeALwUg7urA7fnnhm5EtUyC54sW2WPUzqh'}
+        'keywords': b's9EeALwUg7urA7fnnhm5EtUyC54sW2WPUzqh',
+    }
 
     p = run_ocrmypdf(
         input_file,
@@ -352,17 +353,24 @@ XMP_MAGIC = b'W5M0MpCehiHzreSzNTczkc9d'
 
 def test_prevent_gs_invalid_xml(resources, outdir):
     generate_pdfa_ps(outdir / 'pdfa.ps')
-    copyfile(resources / 'trivial.pdf', outdir / 'layers.rendered.pdf')
 
     # Inject a string with a trailing nul character into the DocumentInfo
     # dictionary of this PDF, as often occurs in practice.
-    with pikepdf.open(outdir / 'layers.rendered.pdf') as pike:
+    with pikepdf.open(resources / 'trivial.pdf') as pike:
         pike.Root.DocumentInfo = pikepdf.Dictionary(
             Title=b'String with trailing nul\x00'
         )
+        pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
 
-    options = get_parser().parse_args(
-        args=['-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf']
+    _, options, _ = get_parser_options_plugins(
+        args=[
+            '-j',
+            '1',
+            '--output-type',
+            'pdfa-2',
+            'a.pdf',
+            'b.pdf',
+        ]
     )
     pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf')
     context = PdfContext(
@@ -387,17 +395,15 @@ def test_prevent_gs_invalid_xml(resources, outdir):
 
 def test_malformed_docinfo(caplog, resources, outdir):
     generate_pdfa_ps(outdir / 'pdfa.ps')
-    # copyfile(resources / 'trivial.pdf', outdir / 'layers.rendered.pdf')
 
     with pikepdf.open(resources / 'trivial.pdf') as pike:
         pike.trailer.Info = pikepdf.Stream(pike, b"<xml></xml>")
         pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
 
-    options = get_parser().parse_args(
+    _, options, _ = get_parser_options_plugins(
         args=[
             '-j',
             '1',
-            '--continue-on-soft-render-error',
             '--output-type',
             'pdfa-2',
             'a.pdf',