mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-06 13:47:41 -04:00
Migrate --pdfa-image-compression argument to being managed by Ghostscript plugin
This commit is contained in:
@@ -204,16 +204,6 @@ def check_options_ocr_behavior(options: Namespace) -> None:
|
||||
options.pages = _pages_from_ranges(options.pages)
|
||||
|
||||
|
||||
def check_options_advanced(options: Namespace) -> None:
|
||||
if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
|
||||
'pdfa'
|
||||
):
|
||||
log.warning(
|
||||
"--pdfa-image-compression argument only applies when "
|
||||
"--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
|
||||
)
|
||||
|
||||
|
||||
def check_options_metadata(options: Namespace) -> None:
|
||||
docinfo = [options.title, options.author, options.keywords, options.subject]
|
||||
for s in (m for m in docinfo if m):
|
||||
@@ -240,7 +230,6 @@ def _check_plugin_invariant_options(options: Namespace) -> None:
|
||||
check_options_sidecar(options)
|
||||
check_options_preprocessing(options)
|
||||
check_options_ocr_behavior(options)
|
||||
check_options_advanced(options)
|
||||
check_options_pillow(options)
|
||||
|
||||
|
||||
|
||||
@@ -26,6 +26,19 @@ def add_options(parser):
|
||||
default='LeaveColorUnchanged',
|
||||
help="Set Ghostscript color conversion strategy",
|
||||
)
|
||||
gs.add_argument(
|
||||
'--pdfa-image-compression',
|
||||
choices=['auto', 'jpeg', 'lossless'],
|
||||
default='auto',
|
||||
help="Specify how to compress images in the output PDF/A. 'auto' lets "
|
||||
"OCRmyPDF decide. 'jpeg' changes all grayscale and color images to "
|
||||
"JPEG compression. 'lossless' uses PNG-style lossless compression "
|
||||
"for all images. Monochrome images are always compressed using a "
|
||||
"lossless codec. Compression settings "
|
||||
"are applied to all pages, including those for which OCR was "
|
||||
"skipped. Not supported for --output-type=pdf ; that setting "
|
||||
"preserves the original compression of all images.",
|
||||
)
|
||||
|
||||
|
||||
@hookimpl
|
||||
@@ -51,6 +64,13 @@ def check_options(options):
|
||||
raise ValueError(
|
||||
f"Invalid color conversion strategy: {options.color_conversion_strategy}"
|
||||
)
|
||||
if options.pdfa_image_compression != 'auto' and not options.output_type.startswith(
|
||||
'pdfa'
|
||||
):
|
||||
log.warning(
|
||||
"--pdfa-image-compression argument only applies when "
|
||||
"--output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2'"
|
||||
)
|
||||
|
||||
|
||||
@hookimpl
|
||||
|
||||
@@ -402,19 +402,6 @@ Online documentation is located at:
|
||||
help="Only rotate pages when confidence is above this value (arbitrary "
|
||||
"units reported by tesseract)",
|
||||
)
|
||||
advanced.add_argument(
|
||||
'--pdfa-image-compression',
|
||||
choices=['auto', 'jpeg', 'lossless'],
|
||||
default='auto',
|
||||
help="Specify how to compress images in the output PDF/A. 'auto' lets "
|
||||
"OCRmyPDF decide. 'jpeg' changes all grayscale and color images to "
|
||||
"JPEG compression. 'lossless' uses PNG-style lossless compression "
|
||||
"for all images. Monochrome images are always compressed using a "
|
||||
"lossless codec. Compression settings "
|
||||
"are applied to all pages, including those for which OCR was "
|
||||
"skipped. Not supported for --output-type=pdf ; that setting "
|
||||
"preserves the original compression of all images.",
|
||||
)
|
||||
advanced.add_argument(
|
||||
'--fast-web-view',
|
||||
type=numeric(float, 0),
|
||||
|
||||
@@ -362,10 +362,16 @@ def test_prevent_gs_invalid_xml(resources, outdir):
|
||||
)
|
||||
pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
|
||||
|
||||
options = get_parser().parse_args(
|
||||
args=['-j', '1', '--output-type', 'pdfa-2', 'a.pdf', 'b.pdf']
|
||||
_, options, _ = get_parser_options_plugins(
|
||||
args=[
|
||||
'-j',
|
||||
'1',
|
||||
'--output-type',
|
||||
'pdfa-2',
|
||||
'a.pdf',
|
||||
'b.pdf',
|
||||
]
|
||||
)
|
||||
options.color_conversion_strategy = 'LeaveColorUnchanged'
|
||||
pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf')
|
||||
context = PdfContext(
|
||||
options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([])
|
||||
@@ -394,18 +400,16 @@ def test_malformed_docinfo(caplog, resources, outdir):
|
||||
pike.trailer.Info = pikepdf.Stream(pike, b"<xml></xml>")
|
||||
pike.save(outdir / 'layers.rendered.pdf', fix_metadata_version=False)
|
||||
|
||||
options = get_parser().parse_args(
|
||||
_, options, _ = get_parser_options_plugins(
|
||||
args=[
|
||||
'-j',
|
||||
'1',
|
||||
'--continue-on-soft-render-error',
|
||||
'--output-type',
|
||||
'pdfa-2',
|
||||
'a.pdf',
|
||||
'b.pdf',
|
||||
]
|
||||
)
|
||||
options.color_conversion_strategy = 'LeaveColorUnchanged'
|
||||
pdfinfo = PdfInfo(outdir / 'layers.rendered.pdf')
|
||||
context = PdfContext(
|
||||
options, outdir, outdir / 'layers.rendered.pdf', pdfinfo, get_plugin_manager([])
|
||||
|
||||
Reference in New Issue
Block a user