From 8ed4e229f39fb203e045358f3df9a93b8baf79cb Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Thu, 30 May 2019 13:57:38 -0700 Subject: [PATCH] ghostscript: avoid log=None construct --- src/ocrmypdf/exec/ghostscript.py | 11 ++++++++++- src/ocrmypdf/pdfinfo/__init__.py | 9 ++++----- src/ocrmypdf/pdfinfo/ghosttext.py | 5 ++++- tests/test_rotation.py | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/ocrmypdf/exec/ghostscript.py b/src/ocrmypdf/exec/ghostscript.py index 85e82465..b744cdbf 100644 --- a/src/ocrmypdf/exec/ghostscript.py +++ b/src/ocrmypdf/exec/ghostscript.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with OCRmyPDF. If not, see . +import logging import re from functools import lru_cache from os import fspath @@ -24,8 +25,11 @@ from tempfile import NamedTemporaryFile from PIL import Image -from . import get_version from ..exceptions import SubprocessOutputError +from . import get_version + + +gslog = logging.getLogger() @lru_cache(maxsize=1) @@ -132,6 +136,8 @@ def rasterize_pdf( res = round(xres, 6), round(yres, 6) if not page_dpi: page_dpi = res + if not log: + log = gslog with NamedTemporaryFile(delete=True) as tmp: args_gs = ( @@ -209,6 +215,9 @@ def generate_pdfa( images entirely. (The feature was added in 9.23 but broken, and the 9.24 release of Ghostscript had regressions, so we don't support it until 9.25.) """ + if not log: + log = gslog + compression_args = [] if compression == 'jpeg': compression_args = [ diff --git a/src/ocrmypdf/pdfinfo/__init__.py b/src/ocrmypdf/pdfinfo/__init__.py index aaad8ebe..c6c4f7e9 100644 --- a/src/ocrmypdf/pdfinfo/__init__.py +++ b/src/ocrmypdf/pdfinfo/__init__.py @@ -19,10 +19,10 @@ from collections import namedtuple from decimal import Decimal from enum import Enum +import logging from math import hypot, isclose from os import fspath from pathlib import Path -from unittest.mock import Mock from warnings import warn import re @@ -34,6 +34,8 @@ from . import ghosttext from ..exceptions import EncryptedPdfError, MissingDependencyError +logger = logging.getLogger() + Colorspace = Enum('Colorspace', 'gray rgb cmyk lab icc index sep devn pattern jpeg2000') Encoding = Enum( @@ -615,9 +617,6 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext): def _pdf_get_all_pageinfo(infile, detailed_analysis=False, log=None): - if not log: - log = Mock() - pdf = pikepdf.open(infile) # Do not close in this function if pdf.is_encrypted: pdf.close() @@ -750,7 +749,7 @@ class PageInfo: class PdfInfo: """Get summary information about a PDF""" - def __init__(self, infile, detailed_page_analysis=False, log=None): + def __init__(self, infile, detailed_page_analysis=False, log=logger): self._infile = infile self._pages, pdf = _pdf_get_all_pageinfo( infile, detailed_page_analysis, log=log diff --git a/src/ocrmypdf/pdfinfo/ghosttext.py b/src/ocrmypdf/pdfinfo/ghosttext.py index c1a612a5..43156154 100644 --- a/src/ocrmypdf/pdfinfo/ghosttext.py +++ b/src/ocrmypdf/pdfinfo/ghosttext.py @@ -15,11 +15,14 @@ # You should have received a copy of the GNU General Public License # along with OCRmyPDF. If not, see . +import logging import re import xml.etree.ElementTree as ET from ..exec import ghostscript +gslog = logging.getLogger() + # Forgive me for I have sinned # I am using regular expressions to parse XML. However the XML in this case, # generated by Ghostscript, is self-consistent enough to be parseable. @@ -74,7 +77,7 @@ def page_get_textblocks(infile, pageno, xmltext, height): return [block for block in joined_blocks()] -def extract_text_xml(infile, pdf, pageno=None, log=None): +def extract_text_xml(infile, pdf, pageno=None, log=gslog): existing_text = ghostscript.extract_text(infile, pageno=None) existing_text = regex_remove_char_tags.sub(b' ', existing_text) diff --git a/tests/test_rotation.py b/tests/test_rotation.py index 3f09d43d..a33e66cd 100644 --- a/tests/test_rotation.py +++ b/tests/test_rotation.py @@ -268,7 +268,7 @@ def test_tesseract_orientation(resources, tmpdir): pix_rotated = pix.rotate_orth(2) # 180 degrees clockwise pix_rotated.write_implied_format(tmpdir / '000001.png') - log = Mock() + log = logging.getLogger() tesseract.get_orientation( # Test results of this are unreliable tmpdir / '000001.png', engine_mode='3', timeout=10, log=log )