ghostscript: avoid log=None construct

This commit is contained in:
James R. Barlow
2019-05-30 13:57:38 -07:00
parent db29cae177
commit 8ed4e229f3
4 changed files with 19 additions and 8 deletions

View File

@@ -15,6 +15,7 @@
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import logging
import re
from functools import lru_cache
from os import fspath
@@ -24,8 +25,11 @@ from tempfile import NamedTemporaryFile
from PIL import Image
from . import get_version
from ..exceptions import SubprocessOutputError
from . import get_version
gslog = logging.getLogger()
@lru_cache(maxsize=1)
@@ -132,6 +136,8 @@ def rasterize_pdf(
res = round(xres, 6), round(yres, 6)
if not page_dpi:
page_dpi = res
if not log:
log = gslog
with NamedTemporaryFile(delete=True) as tmp:
args_gs = (
@@ -209,6 +215,9 @@ def generate_pdfa(
images entirely. (The feature was added in 9.23 but broken, and the 9.24
release of Ghostscript had regressions, so we don't support it until 9.25.)
"""
if not log:
log = gslog
compression_args = []
if compression == 'jpeg':
compression_args = [

View File

@@ -19,10 +19,10 @@
from collections import namedtuple
from decimal import Decimal
from enum import Enum
import logging
from math import hypot, isclose
from os import fspath
from pathlib import Path
from unittest.mock import Mock
from warnings import warn
import re
@@ -34,6 +34,8 @@ from . import ghosttext
from ..exceptions import EncryptedPdfError, MissingDependencyError
logger = logging.getLogger()
Colorspace = Enum('Colorspace', 'gray rgb cmyk lab icc index sep devn pattern jpeg2000')
Encoding = Enum(
@@ -615,9 +617,6 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext):
def _pdf_get_all_pageinfo(infile, detailed_analysis=False, log=None):
if not log:
log = Mock()
pdf = pikepdf.open(infile) # Do not close in this function
if pdf.is_encrypted:
pdf.close()
@@ -750,7 +749,7 @@ class PageInfo:
class PdfInfo:
"""Get summary information about a PDF"""
def __init__(self, infile, detailed_page_analysis=False, log=None):
def __init__(self, infile, detailed_page_analysis=False, log=logger):
self._infile = infile
self._pages, pdf = _pdf_get_all_pageinfo(
infile, detailed_page_analysis, log=log

View File

@@ -15,11 +15,14 @@
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import logging
import re
import xml.etree.ElementTree as ET
from ..exec import ghostscript
gslog = logging.getLogger()
# Forgive me for I have sinned
# I am using regular expressions to parse XML. However the XML in this case,
# generated by Ghostscript, is self-consistent enough to be parseable.
@@ -74,7 +77,7 @@ def page_get_textblocks(infile, pageno, xmltext, height):
return [block for block in joined_blocks()]
def extract_text_xml(infile, pdf, pageno=None, log=None):
def extract_text_xml(infile, pdf, pageno=None, log=gslog):
existing_text = ghostscript.extract_text(infile, pageno=None)
existing_text = regex_remove_char_tags.sub(b' ', existing_text)

View File

@@ -268,7 +268,7 @@ def test_tesseract_orientation(resources, tmpdir):
pix_rotated = pix.rotate_orth(2) # 180 degrees clockwise
pix_rotated.write_implied_format(tmpdir / '000001.png')
log = Mock()
log = logging.getLogger()
tesseract.get_orientation( # Test results of this are unreliable
tmpdir / '000001.png', engine_mode='3', timeout=10, log=log
)