Migrate --pdfa-image-compression argument to being managed by Ghostscript plugin

This commit is contained in:
James R. Barlow
2023-09-20 15:20:42 -07:00
parent 6b6c34af01
commit b2cbbf0099
4 changed files with 30 additions and 30 deletions

View File

@@ -204,16 +204,6 @@ def check_options_ocr_behavior(options: Namespace) -> None:
options.pages = _pages_from_ranges(options.pages)
def check_options_advanced(options: Namespace) -> None:
if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
'pdfa'
):
log.warning(
"--pdfa-image-compression argument only applies when "
"--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
)
def check_options_metadata(options: Namespace) -> None:
docinfo = [options.title, options.author, options.keywords, options.subject]
for s in (m for m in docinfo if m):
@@ -240,7 +230,6 @@ def _check_plugin_invariant_options(options: Namespace) -> None:
check_options_sidecar(options)
check_options_preprocessing(options)
check_options_ocr_behavior(options)
check_options_advanced(options)
check_options_pillow(options)

View File

@@ -26,6 +26,19 @@ def add_options(parser):
default='LeaveColorUnchanged',
help="Set Ghostscript color conversion strategy",
)
gs.add_argument(
'--pdfa-image-compression',
choices=['auto', 'jpeg', 'lossless'],
default='auto',
help="Specify how to compress images in the output PDF/A. 'auto' lets "
"OCRmyPDF decide. 'jpeg' changes all grayscale and color images to "
"JPEG compression. 'lossless' uses PNG-style lossless compression "
"for all images. Monochrome images are always compressed using a "
"lossless codec. Compression settings "
"are applied to all pages, including those for which OCR was "
"skipped. Not supported for --output-type=pdf ; that setting "
"preserves the original compression of all images.",
)
@hookimpl
@@ -51,6 +64,13 @@ def check_options(options):
raise ValueError(
f"Invalid color conversion strategy: {options.color_conversion_strategy}"
)
if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
'pdfa'
):
log.warning(
"--pdfa-image-compression argument only applies when "
"--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
)
@hookimpl

View File

@@ -402,19 +402,6 @@ Online documentation is located at:
help="Only rotate pages when confidence is above this value (arbitrary "
"units reported by tesseract)",
)
advanced.add_argument(
'--pdfa-image-compression',
choices=['auto', 'jpeg', 'lossless'],
default='auto',
help="Specify how to compress images in the output PDF/A. 'auto' lets "
"OCRmyPDF decide. 'jpeg' changes all grayscale and color images to "
"JPEG compression. 'lossless' uses PNG-style lossless compression "
"for all images. Monochrome images are always compressed using a "
"lossless codec. Compression settings "
"are applied to all pages, including those for which OCR was "
"skipped. Not supported for --output-type=pdf ; that setting "
"preserves the original compression of all images.",
)
advanced.add_argument(
'--fast-web-view',
type=numeric(float, 0),

View File

@@ -362,10 +362,16 @@ def test_prevent_gs_invalid_xml(resources, outdir):
)
pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
options = get_parser().parse_args(
args=['-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf']
_, options, _ = get_parser_options_plugins(
args=[
'-j',
'1',
'--output-type',
'pdfa-2',
'a.pdf',
'b.pdf',
]
)
options.color_conversion_strategy = 'LeaveColorUnchanged'
pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf')
context = PdfContext(
options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([])
@@ -394,18 +400,16 @@ def test_malformed_docinfo(caplog, resources, outdir):
pike.trailer.Info = pikepdf.Stream(pike, b"<xml></xml>")
pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
options = get_parser().parse_args(
_, options, _ = get_parser_options_plugins(
args=[
'-j',
'1',
'--continue-on-soft-render-error',
'--output-type',
'pdfa-2',
'a.pdf',
'b.pdf',
]
)
options.color_conversion_strategy = 'LeaveColorUnchanged'
pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf')
context = PdfContext(
options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([])