From 83f9dfbac42550101bbca69fe7bb12e11f2b1aac Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 28 Aug 2015 04:47:57 -0700 Subject: [PATCH] Use png256 raster device when possible Someone reported a bug where the .png input to unpaper ended up being type 'P' (palette) for some reason, which was not supported in unpaper. Not sure how it happened, but seemed easier to fix by explicitly supporting. Here we use png256 if it would capture all colors in the input file. It's up to tesseract/reportlab to make use of the palette PNG when rendering. --- ocrmypdf/main.py | 9 +++++++-- ocrmypdf/pageinfo.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ocrmypdf/main.py b/ocrmypdf/main.py index b99617a8..2660ba28 100755 --- a/ocrmypdf/main.py +++ b/ocrmypdf/main.py @@ -475,10 +475,15 @@ def rasterize_with_ghostscript( if all(image['comp'] == 1 for image in pageinfo['images']): if all(image['bpc'] == 1 for image in pageinfo['images']): device = 'pngmono' - elif not any(image['color'] == 'color' - for image in pageinfo['images']): + elif all(image['bpc'] > 1 and image['color'] == 'index' + for image in pageinfo['images']): + device = 'png256' + elif all(image['bpc'] > 1 and image['color'] == 'gray' + for image in pageinfo['images']): device = 'pnggray' + log.debug("Rendering {0} with {1}".format( + os.path.basename(input_file), device)) xres = max(pageinfo['xres'], options.oversample or 0) yres = max(pageinfo['yres'], options.oversample or 0) diff --git a/ocrmypdf/pageinfo.py b/ocrmypdf/pageinfo.py index 499ad7e7..ff9b1452 100644 --- a/ocrmypdf/pageinfo.py +++ b/ocrmypdf/pageinfo.py @@ -34,6 +34,7 @@ FRIENDLY_COMP = { 'rgb': 3, 'cmyk': 4, 'lab': 3, + 'index': 1 }