optimize plugin: solve linearization and "is optimization enabled?" issues

This commit is contained in:
James R. Barlow
2022-06-13 00:59:41 -07:00
parent 61069660a2
commit 13d11e76e5
5 changed files with 53 additions and 13 deletions

View File

@@ -817,13 +817,14 @@ def metadata_fixup(working_file: Path, context: PdfContext):
missing = set(meta_original.keys()) - set(meta.keys())
report_on_metadata(missing)
optimizing = context.plugin_manager.hook.is_optimization_enabled(
context=context
)
pdf.save(
output_file,
**get_pdf_save_settings(options.output_type),
linearize=( # Don't linearize if optimize() will be linearizing too
should_linearize(working_file, context)
if hasattr(options, 'optimize') and options.optimize == 0
else False
not optimizing and should_linearize(working_file, context)
),
)
@@ -833,7 +834,11 @@ def metadata_fixup(working_file: Path, context: PdfContext):
def optimize_pdf(input_file: Path, context: PdfContext, executor: Executor):
output_file = context.get_path('optimize.pdf')
output_pdf = context.plugin_manager.hook.optimize_pdf(
input_pdf=input_file, output_pdf=output_file, context=context, executor=executor
input_pdf=input_file,
output_pdf=output_file,
context=context,
executor=executor,
linearize=should_linearize(input_file, context),
)
input_size = input_file.stat().st_size

View File

@@ -120,3 +120,6 @@ def get_parser_options_plugins(
options = parser.parse_args(args=args)
return parser, options, plugin_manager
__all__ = ['OcrmypdfPluginManager', 'get_plugin_manager', 'get_parser_options_plugins']

View File

@@ -14,7 +14,7 @@ from pathlib import Path
from ocrmypdf import PdfContext, hookimpl
from ocrmypdf._concurrent import Executor
from ocrmypdf._exec import jbig2enc, pngquant
from ocrmypdf._pipeline import get_pdf_save_settings, should_linearize
from ocrmypdf._pipeline import get_pdf_save_settings
from ocrmypdf.cli import numeric
from ocrmypdf.optimize import optimize
from ocrmypdf.subprocess import check_external_program
@@ -125,11 +125,20 @@ def check_options(options):
@hookimpl
def optimize_pdf(
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
input_pdf: Path,
output_pdf: Path,
context: PdfContext,
executor: Executor,
linearize: bool,
) -> Path:
save_settings = dict(
linearize=should_linearize(input_pdf, context),
linearize=linearize,
**get_pdf_save_settings(context.options.output_type),
)
optimize(input_pdf, output_pdf, context, save_settings, executor)
return output_pdf
@hookimpl
def is_optimization_enabled(context: PdfContext) -> bool:
return context.options.optimize != 0

View File

@@ -462,12 +462,16 @@ def generate_pdfa(
@hookspec(firstresult=True)
def optimize_pdf(
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
input_pdf: Path,
output_pdf: Path,
context: PdfContext,
executor: Executor,
linearize: bool,
) -> Path:
"""Optimize a PDF after image, OCR and metadata processing.
If the input_pdf is a PDF/A, the plugin must only modify input_pdf in a way
that preserves the PDF/A status.
If the input_pdf is a PDF/A, the plugin should modify input_pdf in a way
that preserves the PDF/A status, or report to the user when this is not possible.
If the implementation fails to produce a smaller file than the input file, it
should return input_pdf instead.
@@ -479,6 +483,8 @@ def optimize_pdf(
context: The current context.
executor: An initialized executor which may be used during optimization,
to distribute optimization tasks.
linearize: If True, OCRmyPDF requires ``optimize_pdf`` to return a linearized,
also known as fast web view PDF.
Returns:
Path: If optimization is successful, the hook should return ``output_file``.
@@ -488,3 +494,20 @@ def optimize_pdf(
Note:
This is a :ref:`firstresult hook<firstresult>`.
"""
@hookspec(firstresult=True)
def is_optimization_enabled(context: PdfContext) -> bool:
"""For a given PdfContext, OCRmyPDF asks the plugin if optimization is enabled.
It is assumed that an optimization plugin might be installed but could be
disabled by user settings.
If this returns False, OCRmyPDF will take certain actions to finalize the PDF.
Returns:
True if the plugin's optimization is enabled.
Note:
This is a :ref:`firstresult hook<firstresult>`.
"""

View File

@@ -16,7 +16,7 @@ from pikepdf.models.metadata import decode_pdf_date
from ocrmypdf._jobcontext import PdfContext
from ocrmypdf._pipeline import convert_to_pdfa, metadata_fixup
from ocrmypdf._plugin_manager import get_plugin_manager
from ocrmypdf._plugin_manager import get_parser_options_plugins, get_plugin_manager
from ocrmypdf.cli import get_parser
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.pdfa import file_claims_pdfa, generate_pdfa_ps
@@ -299,8 +299,8 @@ def test_kodak_toc(resources, outpdf):
def test_metadata_fixup_warning(resources, outdir, caplog):
options = get_parser().parse_args(
args=['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf']
_parser, options, _pm = get_parser_options_plugins(
['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf']
)
copyfile(resources / 'graph.pdf', outdir / 'graph.pdf')