Add progressbar for metadata_fixup

Might take time for big files. Pdf.open() potentially is expensive as well, but QPDF doesn't give us progress feedback for that.

Closes Show progress during postprocessing #1313
This commit is contained in:
James R. Barlow
2024-05-19 00:46:50 -07:00
parent 950c700274
commit 9a3c5a3f7c
2 changed files with 42 additions and 9 deletions

View File

@@ -153,18 +153,45 @@ def _set_language(pdf: Pdf, languages: list[str]):
pdf.Root.Lang = iso639_2
class MetadataProgress:
def __init__(self, progressbar_class):
self.progressbar_class = progressbar_class
self.progressbar = self.progressbar_class(
total=100, desc="Linearizing", unit='%'
)
def __enter__(self):
self.progressbar.__enter__()
return self
def __exit__(self, exc_type, exc_value, traceback):
return self.progressbar.__exit__(exc_type, exc_value, traceback)
def __call__(self, percent: int):
if not self.progressbar_class:
return
self.progressbar.update(completed=percent)
def metadata_fixup(
working_file: Path, context: PdfContext, pdf_save_settings: dict[str, Any]
) -> Path:
"""Fix certain metadata fields after Ghostscript PDF/A conversion.
"""Fix certain metadata fields whether PDF or PDF/A.
Override some of Ghostscript's metadata choices.
Also report on metadata in the input file that was not retained during
PDF/A conversion.
conversion.
"""
output_file = context.get_path('metafix.pdf')
options = context.options
with Pdf.open(context.origin) as original, Pdf.open(working_file) as pdf:
pbar_class = context.plugin_manager.hook.get_progressbar_class()
with (
Pdf.open(context.origin) as original,
Pdf.open(working_file) as pdf,
MetadataProgress(pbar_class) as pbar,
):
docinfo = get_docinfo(original, context)
with (
original.open_metadata(
@@ -182,6 +209,6 @@ def metadata_fixup(
report_on_metadata(options, meta_missing)
_set_language(pdf, options.languages)
pdf.save(output_file, **pdf_save_settings)
pdf.save(output_file, progress=pbar, **pdf_save_settings)
return output_file

View File

@@ -66,7 +66,7 @@ class ProgressBar(Protocol):
def __exit__(self, *args):
"""Exit a progress bar context."""
def update(self, n=1):
def update(self, n=1, *, completed=None):
"""Update the progress bar by an increment.
For use within a progress bar context.
@@ -85,7 +85,7 @@ class NullProgressBar:
def __exit__(self, exc_type, exc_value, traceback):
return False
def update(self, _arg=None):
def update(self, _arg=None, *, completed=None):
return
@@ -103,6 +103,7 @@ class RichProgressBar:
disable: bool = False,
**kwargs,
):
self._entered = False
self.progress = Progress(
TextColumn(
"[progress.description]{task.description}",
@@ -130,6 +131,7 @@ class RichProgressBar:
def __enter__(self):
self.progress.start()
self._entered = True
return self
def __exit__(self, exc_type, exc_value, traceback):
@@ -137,6 +139,10 @@ class RichProgressBar:
self.progress.stop()
return False
def update(self, value=None):
advance = self.unit_scale if value is None else value
self.progress.update(self.progress_bar, advance=advance)
def update(self, n=1, *, completed=None):
assert self._entered, "Progress bar must be entered before updating"
if completed is None:
advance = self.unit_scale if n is None else n
self.progress.update(self.progress_bar, advance=advance)
else:
self.progress.update(self.progress_bar, completed=completed)