From cd49e70154f82f54bf74fc5bb2586fe7e0358971 Mon Sep 17 00:00:00 2001 From: Tristan Porteries Date: Mon, 15 Nov 2021 09:32:58 +0100 Subject: [PATCH] ghostscript: force interpolation when rendering (#855) Specifying option --oversample tends to introduce upsampling in rendering by rasterizing page to an higher DPI. This upsampling improves OCR results, but a correct choice of interpolation method can increase even more the OCR quality. Ghostscript seems to use a nearest interpolation as default choice for pdf. This method doesn't average new introduced pixels with original pixels resulting in an almost similar image but with more pixels. Providing -dInterpolateControl=-1 force switching interpolation on. In this commit the above option is passed to all ghostscript rendering calls. After testing, rendering a page at same DPI with interpolation enabled does not introduce significant time overhead. time (repeat 40 gs -dQUIET -dSAFER -dBATCH -dNOPAUSE -sDEVICE=png16m \ -dFirstPage=1 -dLastPage=1 -r100.000000x100.000000 \ -dInterpolateControl=-1 -o /dev/null -dAutoRotatePages=/None -f pzII.pdf) 7,66s user 0,33s system 99% cpu 8,012 total time (repeat 40 gs -dQUIET -dSAFER -dBATCH -dNOPAUSE -sDEVICE=png16m \ -dFirstPage=1 -dLastPage=1 -r100.000000x100.000000 \ -o /dev/null -dAutoRotatePages=/None -f pzII.pdf) 7,42s user 0,39s system 99% cpu 7,808 total Ghostscript interpolation control reference: https://www.ghostscript.com/doc/current/Use.htm --- src/ocrmypdf/_exec/ghostscript.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py index 5c357f1b..46ecaf7c 100644 --- a/src/ocrmypdf/_exec/ghostscript.py +++ b/src/ocrmypdf/_exec/ghostscript.py @@ -95,6 +95,7 @@ def rasterize_pdf( '-dSAFER', '-dBATCH', '-dNOPAUSE', + '-dInterpolateControl=-1', f'-sDEVICE={raster_device}', f'-dFirstPage={pageno}', f'-dLastPage={pageno}',