From 4e4f0bfa1ffff829a2aa1a0c4c29cab97d5b8d86 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Tue, 31 Aug 2021 02:16:25 -0700 Subject: [PATCH] graft: use faster unparse_content_stream if available --- src/ocrmypdf/_graft.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/ocrmypdf/_graft.py b/src/ocrmypdf/_graft.py index 7b9a9a55..f1b69903 100644 --- a/src/ocrmypdf/_graft.py +++ b/src/ocrmypdf/_graft.py @@ -73,17 +73,24 @@ def strip_invisible_text(pdf, page): except AttributeError: return str(op).encode('ascii') - lines = [] + if hasattr(pikepdf, 'unparse_content_stream'): + content_stream = pikepdf.unparse_content_stream(stream) + else: + lines = [] - for operands, operator in stream: - if operator == pikepdf.Operator('INLINE IMAGE'): - iim = operands[0] - line = iim.unparse() - else: - line = b' '.join(convert(op) for op in operands) + b' ' + operator.unparse() - lines.append(line) + for operands, operator in stream: + if operator == pikepdf.Operator('INLINE IMAGE'): + iim = operands[0] + line = iim.unparse() + else: + line = ( + b' '.join(convert(op) for op in operands) + + b' ' + + operator.unparse() + ) + lines.append(line) - content_stream = b'\n'.join(lines) + content_stream = b'\n'.join(lines) page.Contents = pikepdf.Stream(pdf, content_stream)