From 1aa34f5d2e187c6adcd511cdbb3a4d49dda4200e Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 21 Jul 2017 13:28:30 -0700 Subject: [PATCH] Make some interfaces accepting of both str-paths and Path objects --- ocrmypdf/exec/ghostscript.py | 12 ++++++---- ocrmypdf/helpers.py | 46 ++++++++++++++++++++++++++++++------ ocrmypdf/leptonica.py | 12 ++++++---- tests/test_main.py | 12 ++++------ 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/ocrmypdf/exec/ghostscript.py b/ocrmypdf/exec/ghostscript.py index d3399710..0873b55d 100644 --- a/ocrmypdf/exec/ghostscript.py +++ b/ocrmypdf/exec/ghostscript.py @@ -10,6 +10,7 @@ import sys from . import get_program from ..exceptions import SubprocessOutputError from PIL import Image +from ..helpers import fspath @lru_cache(maxsize=1) @@ -43,8 +44,8 @@ def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log, (xres, yres) even if those numbers are noninteger. The image's DPI will be overridden with the values in page_dpi. - :param input_file: - :param output_file: + :param input_file: pathlike + :param output_file: pathlike :param xres: resolution at which to rasterize page :param yres: :param raster_device: @@ -69,7 +70,7 @@ def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log, '-dLastPage=%i' % pageno, '-o', tmp.name, '-r{0}x{1}'.format(str(int_res[0]), str(int_res[1])), - input_file + fspath(input_file) ] p = run(args_gs, stdout=PIPE, stderr=STDOUT, @@ -95,9 +96,10 @@ def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log, log.debug( "Ghostscript: resize output image {} -> {}".format( im.size, expected_size)) - im.resize(expected_size).save(output_file, dpi=page_dpi) + im.resize(expected_size).save( + fspath(output_file), dpi=page_dpi) else: - copy(tmp.name, output_file) + copy(tmp.name, fspath(output_file)) def generate_pdfa(pdf_pages, output_file, compression, log, diff --git a/ocrmypdf/helpers.py b/ocrmypdf/helpers.py index ea39f67d..d6ea1a6b 100644 --- a/ocrmypdf/helpers.py +++ b/ocrmypdf/helpers.py @@ -79,10 +79,42 @@ def is_file_writable(test_file): return True -@contextmanager -def universal_open(p, *args, **kwargs): - "Work around Python 3.5's inability to open(pathlib.Path())" - try: - yield p.open(*args, **kwargs) - except AttributeError: - yield open(p, *args, **kwargs) \ No newline at end of file +if sys.version_info <= (3, 5): + def universal_open(p, *args, **kwargs): + "Work around Python 3.5's inability to open(pathlib.Path())" + try: + return p.open(*args, **kwargs) + except AttributeError: + return open(p, *args, **kwargs) + + + def fspath(path): + '''https://www.python.org/dev/peps/pep-0519/#os''' + if isinstance(path, (str, bytes)): + return path + + # Work from the object's type to match method resolution of other magic + # methods. + path_type = type(path) + try: + path = path_type.__fspath__(path) + except AttributeError: + # Added for Python 3.5 support. + if isinstance(path, pathlib.Path): + return str(path) + elif hasattr(path_type, '__fspath__'): + raise + else: + if isinstance(path, (str, bytes)): + return path + else: + raise TypeError("expected __fspath__() to return str or bytes, " + "not " + type(path).__name__) + + raise TypeError( + "expected str, bytes, pathlib.Path or os.PathLike object, not " + + path_type.__name__) + +else: + universal_open = open + fspath = os.fspath \ No newline at end of file diff --git a/ocrmypdf/leptonica.py b/ocrmypdf/leptonica.py index 4f892b93..c32c6c1f 100644 --- a/ocrmypdf/leptonica.py +++ b/ocrmypdf/leptonica.py @@ -14,6 +14,7 @@ from ctypes.util import find_library from .lib._leptonica import ffi from functools import lru_cache from enum import Enum +from .helpers import fspath lept = ffi.dlopen(find_library('lept')) @@ -208,26 +209,27 @@ class Pix: return 'P' @classmethod - def read(cls, filename): + def read(cls, path): """Load an image file into a PIX object. Leptonica can load TIFF, PNM (PBM, PGM, PPM), PNG, and JPEG. If loading fails then the object will wrap a C null pointer. """ + filename = fspath(path) with LeptonicaErrorTrap(): - return cls(lept.pixRead( - filename.encode(sys.getfilesystemencoding()))) + return cls(lept.pixRead(os.fsencode(filename))) def write_implied_format( - self, filename, jpeg_quality=0, jpeg_progressive=0): + self, path, jpeg_quality=0, jpeg_progressive=0): """Write pix to the filename, with the extension indicating format. jpeg_quality -- quality (iff JPEG; 1 - 100, 0 for default) jpeg_progressive -- (iff JPEG; 0 for baseline seq., 1 for progressive) """ + filename = fspath(path) with LeptonicaErrorTrap(): lept.pixWriteImpliedFormat( - filename.encode(sys.getfilesystemencoding()), + os.fsencode(filename), self._pix, jpeg_quality, jpeg_progressive) def topil(self): diff --git a/tests/test_main.py b/tests/test_main.py index d853973a..ae723fe9 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -77,8 +77,8 @@ def test_deskew(spoof_tesseract_noop, resources, outdir): deskewed_png = outdir / 'deskewed.png' ghostscript.rasterize_pdf( - str(deskewed_pdf), - str(deskewed_png), + deskewed_pdf, + deskewed_png, xres=150, yres=150, raster_device='pngmono', @@ -116,8 +116,8 @@ def test_remove_background(spoof_tesseract_noop, resources, outdir): output_png = outdir / 'remove_bg.png' ghostscript.rasterize_pdf( - str(output_pdf), - str(output_png), + output_pdf, + output_png, xres=100, yres=100, raster_device='png16m', @@ -270,9 +270,7 @@ def check_monochrome_correlation( print(png) return ghostscript.rasterize_pdf( - str(pdf), - str(png), - xres=100, yres=100, + pdf, png, xres=100, yres=100, raster_device='pngmono', log=gslog, pageno=pageno) rasterize(reference_pdf, reference_pageno, reference_png)