ghostscript: avoid log=None construct

2026-05-06 05:36:29 -04:00 · 2019-05-30 13:57:38 -07:00
parent db29cae177
commit 8ed4e229f3
4 changed files with 19 additions and 8 deletions
--- a/src/ocrmypdf/exec/ghostscript.py
+++ b/src/ocrmypdf/exec/ghostscript.py
@@ -15,6 +15,7 @@
 # You should have received a copy of the GNU General Public License
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

+import logging
 import re
 from functools import lru_cache
 from os import fspath
@@ -24,8 +25,11 @@ from tempfile import NamedTemporaryFile

 from PIL import Image

-from . import get_version
 from ..exceptions import SubprocessOutputError
+from . import get_version
+
+
+gslog = logging.getLogger()


@lru_cache(maxsize=1)
@@ -132,6 +136,8 @@ def rasterize_pdf(
    res = round(xres, 6), round(yres, 6)
    if not page_dpi:
        page_dpi = res
+    if not log:
+        log = gslog

    with NamedTemporaryFile(delete=True) as tmp:
        args_gs = (
@@ -209,6 +215,9 @@ def generate_pdfa(
    images entirely. (The feature was added in 9.23 but broken, and the 9.24
    release of Ghostscript had regressions, so we don't support it until 9.25.)
    """
+    if not log:
+        log = gslog
+
    compression_args = []
    if compression == 'jpeg':
        compression_args = [
--- a/src/ocrmypdf/pdfinfo/init.py
+++ b/src/ocrmypdf/pdfinfo/init.py
@@ -19,10 +19,10 @@
 from collections import namedtuple
 from decimal import Decimal
 from enum import Enum
+import logging
 from math import hypot, isclose
 from os import fspath
 from pathlib import Path
-from unittest.mock import Mock
 from warnings import warn
 import re

@@ -34,6 +34,8 @@ from . import ghosttext
 from ..exceptions import EncryptedPdfError, MissingDependencyError


+logger = logging.getLogger()
+
 Colorspace = Enum('Colorspace', 'gray rgb cmyk lab icc index sep devn pattern jpeg2000')

 Encoding = Enum(
@@ -615,9 +617,6 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext):


 def _pdf_get_all_pageinfo(infile, detailed_analysis=False, log=None):
-    if not log:
-        log = Mock()
-
    pdf = pikepdf.open(infile)  # Do not close in this function
    if pdf.is_encrypted:
        pdf.close()
@@ -750,7 +749,7 @@ class PageInfo:
 class PdfInfo:
    """Get summary information about a PDF"""

-    def __init__(self, infile, detailed_page_analysis=False, log=None):
+    def __init__(self, infile, detailed_page_analysis=False, log=logger):
        self._infile = infile
        self._pages, pdf = _pdf_get_all_pageinfo(
            infile, detailed_page_analysis, log=log
--- a/src/ocrmypdf/pdfinfo/ghosttext.py
+++ b/src/ocrmypdf/pdfinfo/ghosttext.py
@@ -15,11 +15,14 @@
 # You should have received a copy of the GNU General Public License
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

+import logging
 import re
 import xml.etree.ElementTree as ET

 from ..exec import ghostscript

+gslog = logging.getLogger()
+
 # Forgive me for I have sinned
 # I am using regular expressions to parse XML. However the XML in this case,
 # generated by Ghostscript, is self-consistent enough to be parseable.
@@ -74,7 +77,7 @@ def page_get_textblocks(infile, pageno, xmltext, height):
    return [block for block in joined_blocks()]


-def extract_text_xml(infile, pdf, pageno=None, log=None):
+def extract_text_xml(infile, pdf, pageno=None, log=gslog):
    existing_text = ghostscript.extract_text(infile, pageno=None)
    existing_text = regex_remove_char_tags.sub(b' ', existing_text)

--- a/tests/test_rotation.py
+++ b/tests/test_rotation.py
@@ -268,7 +268,7 @@ def test_tesseract_orientation(resources, tmpdir):
    pix_rotated = pix.rotate_orth(2)  # 180 degrees clockwise
    pix_rotated.write_implied_format(tmpdir / '000001.png')

-    log = Mock()
+    log = logging.getLogger()
    tesseract.get_orientation(  # Test results of this are unreliable
        tmpdir / '000001.png', engine_mode='3', timeout=10, log=log
    )