diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index cc7142ba..56c17e35 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -54,9 +54,9 @@ def triage_image_file(input_file, output_file, options, log): # Recover the original filename log.error(str(e).replace(input_file, options.input_file)) raise UnsupportedImageFormatError() from e - else: - log.info("Input file is an image") + with im: + log.info("Input file is an image") if 'dpi' in im.info: if im.info['dpi'] <= (96, 96) and not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) @@ -89,7 +89,6 @@ def triage_image_file(input_file, output_file, options, log): elif im.mode == 'CMYK': log.info('Input CMYK image has no ICC profile, not usable') raise UnsupportedImageFormatError() - im.close() try: log.info("Image seems valid. Try converting to PDF...") diff --git a/src/ocrmypdf/exec/pngquant.py b/src/ocrmypdf/exec/pngquant.py index f22bb68e..5dbe5b64 100644 --- a/src/ocrmypdf/exec/pngquant.py +++ b/src/ocrmypdf/exec/pngquant.py @@ -40,8 +40,7 @@ def available(): def quantize(input_file, output_file, quality_min, quality_max): if input_file.endswith('.jpg'): - im = Image.open(input_file) - with NamedTemporaryFile(suffix='.png') as tmp: + with Image.open(input_file) as im, NamedTemporaryFile(suffix='.png') as tmp: im.save(tmp) args = [ 'pngquant', diff --git a/src/ocrmypdf/exec/tesseract.py b/src/ocrmypdf/exec/tesseract.py index baefba89..03bf037d 100644 --- a/src/ocrmypdf/exec/tesseract.py +++ b/src/ocrmypdf/exec/tesseract.py @@ -233,8 +233,8 @@ def _generate_null_hocr(output_hocr, output_sidecar, image): the same size as the input image.""" from PIL import Image - im = Image.open(image) - w, h = im.size + with Image.open(image) as im: + w, h = im.size with open(output_hocr, 'w', encoding="utf-8") as f: f.write(HOCR_TEMPLATE.format(w, h)) diff --git a/src/ocrmypdf/exec/unpaper.py b/src/ocrmypdf/exec/unpaper.py index d3ee1ea4..e186dea8 100644 --- a/src/ocrmypdf/exec/unpaper.py +++ b/src/ocrmypdf/exec/unpaper.py @@ -42,33 +42,30 @@ def run(input_file, output_file, dpi, log, mode_args): SUFFIXES = {'1': '.pbm', 'L': '.pgm', 'RGB': '.ppm'} - im = Image.open(input_file) - if im.mode not in SUFFIXES.keys(): - log.info("Converting image to other colorspace") + with TemporaryDirectory() as tmpdir, Image.open(input_file) as im: + if im.mode not in SUFFIXES.keys(): + log.info("Converting image to other colorspace") + try: + if im.mode == 'P' and len(im.getcolors()) == 2: + im = im.convert(mode='1') + else: + im = im.convert(mode='RGB') + except IOError as e: + im.close() + raise MissingDependencyError( + "Could not convert image with type " + im.mode + ) from e + try: - if im.mode == 'P' and len(im.getcolors()) == 2: - im = im.convert(mode='1') - else: - im = im.convert(mode='RGB') - except IOError as e: - im.close() + suffix = SUFFIXES[im.mode] + except KeyError: raise MissingDependencyError( - "Could not convert image with type " + im.mode + "Failed to convert image to a supported format." ) from e - try: - suffix = SUFFIXES[im.mode] - except KeyError: - im.close() - raise MissingDependencyError( - "Failed to convert image to a supported format." - ) from e - - with TemporaryDirectory() as tmpdir: input_pnm = os.path.join(tmpdir, f'input{suffix}') output_pnm = os.path.join(tmpdir, f'output{suffix}') im.save(input_pnm, format='PPM') - im.close() # To prevent any shenanigans from accepting arbitrary parameters in # --unpaper-args, we: @@ -95,10 +92,12 @@ def run(input_file, output_file, dpi, log, mode_args): log.debug(proc.stdout) # unpaper sets dpi to 72; fix this try: - Image.open(output_pnm).save(output_file, dpi=(dpi, dpi)) + with Image.open(output_pnm) as imout: + imout.save(output_file, dpi=(dpi, dpi)) except (FileNotFoundError, OSError): raise SubprocessOutputError( - "unpaper: failed to produce the expected output file. Called with: " + "unpaper: failed to produce the expected output file. " + + " Called with: " + str(args_unpaper) ) from None diff --git a/tests/test_lept.py b/tests/test_lept.py index 804ca215..2504c600 100644 --- a/tests/test_lept.py +++ b/tests/test_lept.py @@ -38,7 +38,8 @@ def test_colormap_backgroundnorm(resources): def crom_pix(resources): pix = lept.Pix.open(resources / 'crom.png') im = Image.open(resources / 'crom.png') - return pix, im + yield pix, im + im.close() def test_pix_basic(crom_pix): diff --git a/tests/test_main.py b/tests/test_main.py index caf4a0e8..11e02993 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -118,8 +118,8 @@ def test_deskew(spoof_tesseract_noop, resources, outdir): def test_remove_background(spoof_tesseract_noop, resources, outdir): # Ensure the input image does not contain pure white/black - im = Image.open(resources / 'congress.jpg') - assert im.getextrema() != ((0, 255), (0, 255), (0, 255)) + with Image.open(resources / 'congress.jpg') as im: + assert im.getextrema() != ((0, 255), (0, 255), (0, 255)) output_pdf = check_ocrmypdf( resources / 'congress.jpg', @@ -145,8 +145,8 @@ def test_remove_background(spoof_tesseract_noop, resources, outdir): ) # The output image should contain pure white and black - im = Image.open(output_png) - assert im.getextrema() == ((0, 255), (0, 255), (0, 255)) + with Image.open(output_png) as im: + assert im.getextrema() == ((0, 255), (0, 255), (0, 255)) # This will run 5 * 2 * 2 = 20 test cases @@ -792,6 +792,7 @@ def test_compression_preserved( assert pdfimage.color == Colorspace.rgb, "Colorspace changed" elif im.mode.startswith('L'): assert pdfimage.color == Colorspace.gray, "Colorspace changed" + im.close() @pytest.mark.parametrize( @@ -853,6 +854,7 @@ def test_compression_changed( assert pdfimage.color == Colorspace.rgb, "Colorspace changed" elif im.mode.startswith('L'): assert pdfimage.color == Colorspace.gray, "Colorspace changed" + im.close() def test_sidecar_pagecount(spoof_tesseract_cache, resources, outpdf): diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 03f5d03a..85bcc1a5 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -51,8 +51,8 @@ def test_mono_not_inverted(resources, outdir): log=logging.getLogger(name='test_mono_not_inverted'), ) - im = Image.open(fspath(outdir / 'im.png')) - assert im.getpixel((0, 0)) == 255, "Expected white background" + with Image.open(fspath(outdir / 'im.png')) as im: + assert im.getpixel((0, 0)) == 255, "Expected white background" @pytest.mark.skipif(not pngquant.available(), reason='need pngquant') @@ -110,10 +110,10 @@ def test_flate_to_jbig2(resources, outdir, spoof_tesseract_noop): # This test requires an image that pngquant is capable of converting to # to 1bpp - so use an existing 1bpp image, convert up, confirm it can # convert down - im = Image.open(fspath(resources / 'typewriter.png')) - assert im.mode in ('1', 'P') - im = im.convert('L') - im.save(fspath(outdir / 'type8.png')) + with Image.open(fspath(resources / 'typewriter.png')) as im: + assert im.mode in ('1', 'P') + im = im.convert('L') + im.save(fspath(outdir / 'type8.png')) check_ocrmypdf( outdir / 'type8.png', diff --git a/tests/test_rotation.py b/tests/test_rotation.py index b8bc07ca..7aaead96 100644 --- a/tests/test_rotation.py +++ b/tests/test_rotation.py @@ -224,12 +224,12 @@ def test_rotate_deskew_timeout(resources, outdir): @pytest.mark.parametrize('image_angle', (0, 90, 180, 270)) def test_rotate_page_level(image_angle, page_angle, resources, outdir): def make_rotate_test(prefix, image_angle, page_angle): - im = Image.open(fspath(resources / 'typewriter.png')) - if image_angle != 0: - ccw_angle = -image_angle % 360 - im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}')) memimg = BytesIO() - im.save(memimg, format='PNG') + with Image.open(fspath(resources / 'typewriter.png')) as im: + if image_angle != 0: + ccw_angle = -image_angle % 360 + im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}')) + im.save(memimg, format='PNG') memimg.seek(0) mempdf = BytesIO() img2pdf.convert(