From 276f421c446d8a202c0a461896551d3110eaf081 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Thu, 17 Dec 2015 08:49:08 -0800 Subject: [PATCH] Did a quick test of Ghostscript vs QPDF at PDF page splitting qpdf won so hard it wasn't funny, even though it must be called once per page to do the job. Perhaps Ghostscript interprets it as a call to render the page? time bash qpdf-test.fish ../tests/resources/multipage.pdf 0.07 real 0.02 user 0.03 sys time gs -sDEVICE=pdfwrite -dSAFER -o '%06d.pdf' ../tests/resources/multipage.pdf 5.12 real 5.06 user 0.04 sys --- ocrmypdf/qpdf.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocrmypdf/qpdf.py b/ocrmypdf/qpdf.py index d3fba360..d958a6d2 100644 --- a/ocrmypdf/qpdf.py +++ b/ocrmypdf/qpdf.py @@ -66,6 +66,11 @@ def get_npages(input_file): def split_pages(input_file, work_folder, npages): + """Split multipage PDF into individual pages. + + Incredibly enough, this multiple process approach is about 70 times + faster than using Ghostscript. + """ for n in range(int(npages)): args_qpdf = [ 'qpdf', input_file,