graft: use faster unparse_content_stream if available

2026-05-04 12:48:02 -04:00 · 2021-08-31 02:16:25 -07:00
parent b91096c615
commit 4e4f0bfa1f
1 changed files with 16 additions and 9 deletions
--- a/src/ocrmypdf/_graft.py
+++ b/src/ocrmypdf/_graft.py
@@ -73,17 +73,24 @@ def strip_invisible_text(pdf, page):
        except AttributeError:
            return str(op).encode('ascii')

-    lines = []
+    if hasattr(pikepdf, 'unparse_content_stream'):
+        content_stream = pikepdf.unparse_content_stream(stream)
+    else:
+        lines = []

-    for operands, operator in stream:
-        if operator == pikepdf.Operator('INLINE IMAGE'):
-            iim = operands[0]
-            line = iim.unparse()
-        else:
-            line = b' '.join(convert(op) for op in operands) + b' ' + operator.unparse()
-        lines.append(line)
+        for operands, operator in stream:
+            if operator == pikepdf.Operator('INLINE IMAGE'):
+                iim = operands[0]
+                line = iim.unparse()
+            else:
+                line = (
+                    b' '.join(convert(op) for op in operands)
+                    + b' '
+                    + operator.unparse()
+                )
+            lines.append(line)

-    content_stream = b'\n'.join(lines)
+        content_stream = b'\n'.join(lines)
    page.Contents = pikepdf.Stream(pdf, content_stream)