diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 13cfdc06..15efde3a 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -817,13 +817,14 @@ def metadata_fixup(working_file: Path, context: PdfContext): missing = set(meta_original.keys()) - set(meta.keys()) report_on_metadata(missing) + optimizing = context.plugin_manager.hook.is_optimization_enabled( + context=context + ) pdf.save( output_file, **get_pdf_save_settings(options.output_type), linearize=( # Don't linearize if optimize() will be linearizing too - should_linearize(working_file, context) - if hasattr(options, 'optimize') and options.optimize == 0 - else False + not optimizing and should_linearize(working_file, context) ), ) @@ -833,7 +834,11 @@ def metadata_fixup(working_file: Path, context: PdfContext): def optimize_pdf(input_file: Path, context: PdfContext, executor: Executor): output_file = context.get_path('optimize.pdf') output_pdf = context.plugin_manager.hook.optimize_pdf( - input_pdf=input_file, output_pdf=output_file, context=context, executor=executor + input_pdf=input_file, + output_pdf=output_file, + context=context, + executor=executor, + linearize=should_linearize(input_file, context), ) input_size = input_file.stat().st_size diff --git a/src/ocrmypdf/_plugin_manager.py b/src/ocrmypdf/_plugin_manager.py index 516adbaf..5ca792f0 100644 --- a/src/ocrmypdf/_plugin_manager.py +++ b/src/ocrmypdf/_plugin_manager.py @@ -120,3 +120,6 @@ def get_parser_options_plugins( options = parser.parse_args(args=args) return parser, options, plugin_manager + + +__all__ = ['OcrmypdfPluginManager', 'get_plugin_manager', 'get_parser_options_plugins'] diff --git a/src/ocrmypdf/builtin_plugins/optimize.py b/src/ocrmypdf/builtin_plugins/optimize.py index 30386ccd..fed0a997 100644 --- a/src/ocrmypdf/builtin_plugins/optimize.py +++ b/src/ocrmypdf/builtin_plugins/optimize.py @@ -14,7 +14,7 @@ from pathlib import Path from ocrmypdf import PdfContext, hookimpl from ocrmypdf._concurrent import Executor from ocrmypdf._exec import jbig2enc, pngquant -from ocrmypdf._pipeline import get_pdf_save_settings, should_linearize +from ocrmypdf._pipeline import get_pdf_save_settings from ocrmypdf.cli import numeric from ocrmypdf.optimize import optimize from ocrmypdf.subprocess import check_external_program @@ -125,11 +125,20 @@ def check_options(options): @hookimpl def optimize_pdf( - input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor + input_pdf: Path, + output_pdf: Path, + context: PdfContext, + executor: Executor, + linearize: bool, ) -> Path: save_settings = dict( - linearize=should_linearize(input_pdf, context), + linearize=linearize, **get_pdf_save_settings(context.options.output_type), ) optimize(input_pdf, output_pdf, context, save_settings, executor) return output_pdf + + +@hookimpl +def is_optimization_enabled(context: PdfContext) -> bool: + return context.options.optimize != 0 diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py index 248bd339..528d033a 100644 --- a/src/ocrmypdf/pluginspec.py +++ b/src/ocrmypdf/pluginspec.py @@ -462,12 +462,16 @@ def generate_pdfa( @hookspec(firstresult=True) def optimize_pdf( - input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor + input_pdf: Path, + output_pdf: Path, + context: PdfContext, + executor: Executor, + linearize: bool, ) -> Path: """Optimize a PDF after image, OCR and metadata processing. - If the input_pdf is a PDF/A, the plugin must only modify input_pdf in a way - that preserves the PDF/A status. + If the input_pdf is a PDF/A, the plugin should modify input_pdf in a way + that preserves the PDF/A status, or report to the user when this is not possible. If the implementation fails to produce a smaller file than the input file, it should return input_pdf instead. @@ -479,6 +483,8 @@ def optimize_pdf( context: The current context. executor: An initialized executor which may be used during optimization, to distribute optimization tasks. + linearize: If True, OCRmyPDF requires ``optimize_pdf`` to return a linearized, + also known as fast web view PDF. Returns: Path: If optimization is successful, the hook should return ``output_file``. @@ -488,3 +494,20 @@ def optimize_pdf( Note: This is a :ref:`firstresult hook`. """ + + +@hookspec(firstresult=True) +def is_optimization_enabled(context: PdfContext) -> bool: + """For a given PdfContext, OCRmyPDF asks the plugin if optimization is enabled. + + It is assumed that an optimization plugin might be installed but could be + disabled by user settings. + + If this returns False, OCRmyPDF will take certain actions to finalize the PDF. + + Returns: + True if the plugin's optimization is enabled. + + Note: + This is a :ref:`firstresult hook`. + """ diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 65a1d255..d5e2aa7e 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -16,7 +16,7 @@ from pikepdf.models.metadata import decode_pdf_date from ocrmypdf._jobcontext import PdfContext from ocrmypdf._pipeline import convert_to_pdfa, metadata_fixup -from ocrmypdf._plugin_manager import get_plugin_manager +from ocrmypdf._plugin_manager import get_parser_options_plugins, get_plugin_manager from ocrmypdf.cli import get_parser from ocrmypdf.exceptions import ExitCode from ocrmypdf.pdfa import file_claims_pdfa, generate_pdfa_ps @@ -299,8 +299,8 @@ def test_kodak_toc(resources, outpdf): def test_metadata_fixup_warning(resources, outdir, caplog): - options = get_parser().parse_args( - args=['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf'] + _parser, options, _pm = get_parser_options_plugins( + ['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf'] ) copyfile(resources / 'graph.pdf', outdir / 'graph.pdf')