mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-05 13:16:55 -04:00
optimize plugin: solve linearization and "is optimization enabled?" issues
This commit is contained in:
@@ -817,13 +817,14 @@ def metadata_fixup(working_file: Path, context: PdfContext):
|
||||
missing = set(meta_original.keys()) - set(meta.keys())
|
||||
report_on_metadata(missing)
|
||||
|
||||
optimizing = context.plugin_manager.hook.is_optimization_enabled(
|
||||
context=context
|
||||
)
|
||||
pdf.save(
|
||||
output_file,
|
||||
**get_pdf_save_settings(options.output_type),
|
||||
linearize=( # Don't linearize if optimize() will be linearizing too
|
||||
should_linearize(working_file, context)
|
||||
if hasattr(options, 'optimize') and options.optimize == 0
|
||||
else False
|
||||
not optimizing and should_linearize(working_file, context)
|
||||
),
|
||||
)
|
||||
|
||||
@@ -833,7 +834,11 @@ def metadata_fixup(working_file: Path, context: PdfContext):
|
||||
def optimize_pdf(input_file: Path, context: PdfContext, executor: Executor):
|
||||
output_file = context.get_path('optimize.pdf')
|
||||
output_pdf = context.plugin_manager.hook.optimize_pdf(
|
||||
input_pdf=input_file, output_pdf=output_file, context=context, executor=executor
|
||||
input_pdf=input_file,
|
||||
output_pdf=output_file,
|
||||
context=context,
|
||||
executor=executor,
|
||||
linearize=should_linearize(input_file, context),
|
||||
)
|
||||
|
||||
input_size = input_file.stat().st_size
|
||||
|
||||
@@ -120,3 +120,6 @@ def get_parser_options_plugins(
|
||||
|
||||
options = parser.parse_args(args=args)
|
||||
return parser, options, plugin_manager
|
||||
|
||||
|
||||
__all__ = ['OcrmypdfPluginManager', 'get_plugin_manager', 'get_parser_options_plugins']
|
||||
|
||||
@@ -14,7 +14,7 @@ from pathlib import Path
|
||||
from ocrmypdf import PdfContext, hookimpl
|
||||
from ocrmypdf._concurrent import Executor
|
||||
from ocrmypdf._exec import jbig2enc, pngquant
|
||||
from ocrmypdf._pipeline import get_pdf_save_settings, should_linearize
|
||||
from ocrmypdf._pipeline import get_pdf_save_settings
|
||||
from ocrmypdf.cli import numeric
|
||||
from ocrmypdf.optimize import optimize
|
||||
from ocrmypdf.subprocess import check_external_program
|
||||
@@ -125,11 +125,20 @@ def check_options(options):
|
||||
|
||||
@hookimpl
|
||||
def optimize_pdf(
|
||||
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
|
||||
input_pdf: Path,
|
||||
output_pdf: Path,
|
||||
context: PdfContext,
|
||||
executor: Executor,
|
||||
linearize: bool,
|
||||
) -> Path:
|
||||
save_settings = dict(
|
||||
linearize=should_linearize(input_pdf, context),
|
||||
linearize=linearize,
|
||||
**get_pdf_save_settings(context.options.output_type),
|
||||
)
|
||||
optimize(input_pdf, output_pdf, context, save_settings, executor)
|
||||
return output_pdf
|
||||
|
||||
|
||||
@hookimpl
|
||||
def is_optimization_enabled(context: PdfContext) -> bool:
|
||||
return context.options.optimize != 0
|
||||
|
||||
@@ -462,12 +462,16 @@ def generate_pdfa(
|
||||
|
||||
@hookspec(firstresult=True)
|
||||
def optimize_pdf(
|
||||
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
|
||||
input_pdf: Path,
|
||||
output_pdf: Path,
|
||||
context: PdfContext,
|
||||
executor: Executor,
|
||||
linearize: bool,
|
||||
) -> Path:
|
||||
"""Optimize a PDF after image, OCR and metadata processing.
|
||||
|
||||
If the input_pdf is a PDF/A, the plugin must only modify input_pdf in a way
|
||||
that preserves the PDF/A status.
|
||||
If the input_pdf is a PDF/A, the plugin should modify input_pdf in a way
|
||||
that preserves the PDF/A status, or report to the user when this is not possible.
|
||||
|
||||
If the implementation fails to produce a smaller file than the input file, it
|
||||
should return input_pdf instead.
|
||||
@@ -479,6 +483,8 @@ def optimize_pdf(
|
||||
context: The current context.
|
||||
executor: An initialized executor which may be used during optimization,
|
||||
to distribute optimization tasks.
|
||||
linearize: If True, OCRmyPDF requires ``optimize_pdf`` to return a linearized,
|
||||
also known as fast web view PDF.
|
||||
|
||||
Returns:
|
||||
Path: If optimization is successful, the hook should return ``output_file``.
|
||||
@@ -488,3 +494,20 @@ def optimize_pdf(
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
|
||||
|
||||
@hookspec(firstresult=True)
|
||||
def is_optimization_enabled(context: PdfContext) -> bool:
|
||||
"""For a given PdfContext, OCRmyPDF asks the plugin if optimization is enabled.
|
||||
|
||||
It is assumed that an optimization plugin might be installed but could be
|
||||
disabled by user settings.
|
||||
|
||||
If this returns False, OCRmyPDF will take certain actions to finalize the PDF.
|
||||
|
||||
Returns:
|
||||
True if the plugin's optimization is enabled.
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
|
||||
@@ -16,7 +16,7 @@ from pikepdf.models.metadata import decode_pdf_date
|
||||
|
||||
from ocrmypdf._jobcontext import PdfContext
|
||||
from ocrmypdf._pipeline import convert_to_pdfa, metadata_fixup
|
||||
from ocrmypdf._plugin_manager import get_plugin_manager
|
||||
from ocrmypdf._plugin_manager import get_parser_options_plugins, get_plugin_manager
|
||||
from ocrmypdf.cli import get_parser
|
||||
from ocrmypdf.exceptions import ExitCode
|
||||
from ocrmypdf.pdfa import file_claims_pdfa, generate_pdfa_ps
|
||||
@@ -299,8 +299,8 @@ def test_kodak_toc(resources, outpdf):
|
||||
|
||||
|
||||
def test_metadata_fixup_warning(resources, outdir, caplog):
|
||||
options = get_parser().parse_args(
|
||||
args=['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf']
|
||||
_parser, options, _pm = get_parser_options_plugins(
|
||||
['--output-type', 'pdfa-2', 'graph.pdf', 'out.pdf']
|
||||
)
|
||||
|
||||
copyfile(resources / 'graph.pdf', outdir / 'graph.pdf')
|
||||
|
||||
Reference in New Issue
Block a user