mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-06 05:36:29 -04:00
ghostscript: avoid log=None construct
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from os import fspath
|
||||
@@ -24,8 +25,11 @@ from tempfile import NamedTemporaryFile
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from . import get_version
|
||||
from ..exceptions import SubprocessOutputError
|
||||
from . import get_version
|
||||
|
||||
|
||||
gslog = logging.getLogger()
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
@@ -132,6 +136,8 @@ def rasterize_pdf(
|
||||
res = round(xres, 6), round(yres, 6)
|
||||
if not page_dpi:
|
||||
page_dpi = res
|
||||
if not log:
|
||||
log = gslog
|
||||
|
||||
with NamedTemporaryFile(delete=True) as tmp:
|
||||
args_gs = (
|
||||
@@ -209,6 +215,9 @@ def generate_pdfa(
|
||||
images entirely. (The feature was added in 9.23 but broken, and the 9.24
|
||||
release of Ghostscript had regressions, so we don't support it until 9.25.)
|
||||
"""
|
||||
if not log:
|
||||
log = gslog
|
||||
|
||||
compression_args = []
|
||||
if compression == 'jpeg':
|
||||
compression_args = [
|
||||
|
||||
@@ -19,10 +19,10 @@
|
||||
from collections import namedtuple
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
import logging
|
||||
from math import hypot, isclose
|
||||
from os import fspath
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock
|
||||
from warnings import warn
|
||||
import re
|
||||
|
||||
@@ -34,6 +34,8 @@ from . import ghosttext
|
||||
from ..exceptions import EncryptedPdfError, MissingDependencyError
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
Colorspace = Enum('Colorspace', 'gray rgb cmyk lab icc index sep devn pattern jpeg2000')
|
||||
|
||||
Encoding = Enum(
|
||||
@@ -615,9 +617,6 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile, xmltext):
|
||||
|
||||
|
||||
def _pdf_get_all_pageinfo(infile, detailed_analysis=False, log=None):
|
||||
if not log:
|
||||
log = Mock()
|
||||
|
||||
pdf = pikepdf.open(infile) # Do not close in this function
|
||||
if pdf.is_encrypted:
|
||||
pdf.close()
|
||||
@@ -750,7 +749,7 @@ class PageInfo:
|
||||
class PdfInfo:
|
||||
"""Get summary information about a PDF"""
|
||||
|
||||
def __init__(self, infile, detailed_page_analysis=False, log=None):
|
||||
def __init__(self, infile, detailed_page_analysis=False, log=logger):
|
||||
self._infile = infile
|
||||
self._pages, pdf = _pdf_get_all_pageinfo(
|
||||
infile, detailed_page_analysis, log=log
|
||||
|
||||
@@ -15,11 +15,14 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from ..exec import ghostscript
|
||||
|
||||
gslog = logging.getLogger()
|
||||
|
||||
# Forgive me for I have sinned
|
||||
# I am using regular expressions to parse XML. However the XML in this case,
|
||||
# generated by Ghostscript, is self-consistent enough to be parseable.
|
||||
@@ -74,7 +77,7 @@ def page_get_textblocks(infile, pageno, xmltext, height):
|
||||
return [block for block in joined_blocks()]
|
||||
|
||||
|
||||
def extract_text_xml(infile, pdf, pageno=None, log=None):
|
||||
def extract_text_xml(infile, pdf, pageno=None, log=gslog):
|
||||
existing_text = ghostscript.extract_text(infile, pageno=None)
|
||||
existing_text = regex_remove_char_tags.sub(b' ', existing_text)
|
||||
|
||||
|
||||
@@ -268,7 +268,7 @@ def test_tesseract_orientation(resources, tmpdir):
|
||||
pix_rotated = pix.rotate_orth(2) # 180 degrees clockwise
|
||||
pix_rotated.write_implied_format(tmpdir / '000001.png')
|
||||
|
||||
log = Mock()
|
||||
log = logging.getLogger()
|
||||
tesseract.get_orientation( # Test results of this are unreliable
|
||||
tmpdir / '000001.png', engine_mode='3', timeout=10, log=log
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user