From b2cbbf009996ced42342f74e3e2127ca8fcd3b6b Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Wed, 20 Sep 2023 15:20:42 -0700 Subject: [PATCH] Migrate --pdfa-image-compression argument to being managed by Ghostscript plugin --- src/ocrmypdf/_validation.py | 11 ----------- src/ocrmypdf/builtin_plugins/ghostscript.py | 20 ++++++++++++++++++++ src/ocrmypdf/cli.py | 13 ------------- tests/test_metadata.py | 16 ++++++++++------ 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py index d75c0ff2..ceba2778 100644 --- a/src/ocrmypdf/_validation.py +++ b/src/ocrmypdf/_validation.py @@ -204,16 +204,6 @@ def check_options_ocr_behavior(options: Namespace) -> None: options.pages = _pages_from_ranges(options.pages) -def check_options_advanced(options: Namespace) -> None: - if options.pdfa_image_compression != 'auto' and not options.output_type.startswith( - 'pdfa' - ): - log.warning( - "--pdfa-image-compression argument only applies when " - "--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'" - ) - - def check_options_metadata(options: Namespace) -> None: docinfo = [options.title, options.author, options.keywords, options.subject] for s in (m for m in docinfo if m): @@ -240,7 +230,6 @@ def _check_plugin_invariant_options(options: Namespace) -> None: check_options_sidecar(options) check_options_preprocessing(options) check_options_ocr_behavior(options) - check_options_advanced(options) check_options_pillow(options) diff --git a/src/ocrmypdf/builtin_plugins/ghostscript.py b/src/ocrmypdf/builtin_plugins/ghostscript.py index 04f0e9d0..b5c645cd 100644 --- a/src/ocrmypdf/builtin_plugins/ghostscript.py +++ b/src/ocrmypdf/builtin_plugins/ghostscript.py @@ -26,6 +26,19 @@ def add_options(parser): default='LeaveColorUnchanged', help="Set Ghostscript color conversion strategy", ) + gs.add_argument( + '--pdfa-image-compression', + choices=['auto', 'jpeg', 'lossless'], + default='auto', + help="Specify how to compress images in the output PDF/A. 'auto' lets " + "OCRmyPDF decide. 'jpeg' changes all grayscale and color images to " + "JPEG compression. 'lossless' uses PNG-style lossless compression " + "for all images. Monochrome images are always compressed using a " + "lossless codec. Compression settings " + "are applied to all pages, including those for which OCR was " + "skipped. Not supported for --output-type=pdf ; that setting " + "preserves the original compression of all images.", + ) @hookimpl @@ -51,6 +64,13 @@ def check_options(options): raise ValueError( f"Invalid color conversion strategy: {options.color_conversion_strategy}" ) + if options.pdfa_image_compression != 'auto' and not options.output_type.startswith( + 'pdfa' + ): + log.warning( + "--pdfa-image-compression argument only applies when " + "--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'" + ) @hookimpl diff --git a/src/ocrmypdf/cli.py b/src/ocrmypdf/cli.py index 34c8973e..1473b846 100644 --- a/src/ocrmypdf/cli.py +++ b/src/ocrmypdf/cli.py @@ -402,19 +402,6 @@ Online documentation is located at: help="Only rotate pages when confidence is above this value (arbitrary " "units reported by tesseract)", ) - advanced.add_argument( - '--pdfa-image-compression', - choices=['auto', 'jpeg', 'lossless'], - default='auto', - help="Specify how to compress images in the output PDF/A. 'auto' lets " - "OCRmyPDF decide. 'jpeg' changes all grayscale and color images to " - "JPEG compression. 'lossless' uses PNG-style lossless compression " - "for all images. Monochrome images are always compressed using a " - "lossless codec. Compression settings " - "are applied to all pages, including those for which OCR was " - "skipped. Not supported for --output-type=pdf ; that setting " - "preserves the original compression of all images.", - ) advanced.add_argument( '--fast-web-view', type=numeric(float, 0), diff --git a/tests/test_metadata.py b/tests/test_metadata.py index acf55444..48a24854 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -362,10 +362,16 @@ def test_prevent_gs_invalid_xml(resources, outdir): ) pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False) - options = get_parser().parse_args( - args=['-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf'] + _, options, _ = get_parser_options_plugins( + args=[ + '-j', + '1', + '--output-type', + 'pdfa-2', + 'a.pdf', + 'b.pdf', + ] ) - options.color_conversion_strategy = 'LeaveColorUnchanged' pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf') context = PdfContext( options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([]) @@ -394,18 +400,16 @@ def test_malformed_docinfo(caplog, resources, outdir): pike.trailer.Info = pikepdf.Stream(pike, b"") pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False) - options = get_parser().parse_args( + _, options, _ = get_parser_options_plugins( args=[ '-j', '1', - '--continue-on-soft-render-error', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf', ] ) - options.color_conversion_strategy = 'LeaveColorUnchanged' pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf') context = PdfContext( options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([])