mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-04 04:35:49 -04:00
Previous incarnation was only suitable for generating a local cache where the suite was executed repeatedly. Now the cache ignores differences, so it can be checked into Github and shared.
168 lines
4.7 KiB
Python
168 lines
4.7 KiB
Python
# © 2017 James R. Barlow: github.com/jbarlow83
|
|
#
|
|
# This file is part of OCRmyPDF.
|
|
#
|
|
# OCRmyPDF is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# OCRmyPDF is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import sys
|
|
import os
|
|
import platform
|
|
|
|
pytest_plugins = ['helpers_namespace']
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
from subprocess import Popen, PIPE
|
|
|
|
|
|
if sys.version_info.major < 3:
|
|
print("Requires Python 3.4+")
|
|
sys.exit(1)
|
|
|
|
|
|
@pytest.helpers.register
|
|
def is_linux():
|
|
return platform.system() == 'Linux'
|
|
|
|
|
|
@pytest.helpers.register
|
|
def is_macos():
|
|
return platform.system() == 'Darwin'
|
|
|
|
|
|
@pytest.helpers.register
|
|
def running_in_docker():
|
|
# Docker creates a file named /.dockerenv (newer versions) or
|
|
# /.dockerinit (older) -- this is undocumented, not an offical test
|
|
return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')
|
|
|
|
|
|
@pytest.helpers.register
|
|
def running_in_travis():
|
|
return os.environ.get('TRAVIS') == 'true'
|
|
|
|
|
|
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
|
|
SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')
|
|
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
|
|
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']
|
|
|
|
|
|
@pytest.helpers.register
|
|
def spoof(tmpdir_factory, **kwargs):
|
|
"""Modify PATH to override subprocess executables
|
|
|
|
spoof(program1='replacement', ...)
|
|
|
|
Creates temporary directory with symlinks to targets.
|
|
|
|
"""
|
|
env = os.environ.copy()
|
|
slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))
|
|
spoofer_base = Path(tmpdir_factory.mktemp('spoofers'))
|
|
tmpdir = spoofer_base / slug
|
|
tmpdir.mkdir(parents=True)
|
|
|
|
for replace_program, with_spoof in kwargs.items():
|
|
spoofer = Path(SPOOF_PATH) / with_spoof
|
|
spoofer.chmod(0o755)
|
|
(tmpdir / replace_program).symlink_to(spoofer)
|
|
|
|
env['_OCRMYPDF_SAVE_PATH'] = env['PATH']
|
|
env['PATH'] = str(tmpdir) + ":" + env['PATH']
|
|
|
|
return env
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def spoof_tesseract_noop(tmpdir_factory):
|
|
return spoof(tmpdir_factory, tesseract='tesseract_noop.py')
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def spoof_tesseract_cache(tmpdir_factory):
|
|
if running_in_docker():
|
|
return os.environ.copy()
|
|
return spoof(tmpdir_factory, tesseract="tesseract_cache.py")
|
|
|
|
|
|
@pytest.fixture
|
|
def resources():
|
|
return Path(TESTS_ROOT) / 'resources'
|
|
|
|
|
|
@pytest.fixture
|
|
def ocrmypdf_exec():
|
|
return OCRMYPDF
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def outdir(tmpdir):
|
|
return Path(str(tmpdir))
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def outpdf(tmpdir):
|
|
return str(Path(str(tmpdir)) / 'out.pdf')
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def no_outpdf(tmpdir):
|
|
"""This just documents the fact that a test is not expected to produce
|
|
output. Unfortunately an assertion failure inside a test fixture produces
|
|
an error rather than a test failure, so no testing is done. It's up to
|
|
the test to confirm that no output file was created."""
|
|
return str(Path(str(tmpdir)) / 'no_output.pdf')
|
|
|
|
|
|
@pytest.helpers.register
|
|
def check_ocrmypdf(input_file, output_file, *args, env=None):
|
|
"Run ocrmypdf and confirmed that a valid file was created"
|
|
|
|
p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)
|
|
print(err) # ensure py.test collects the output, use -s to view
|
|
assert p.returncode == 0
|
|
assert os.path.exists(str(output_file)), "Output file not created"
|
|
assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"
|
|
assert out == "", \
|
|
"The following was written to stdout and should not have been: \n" + \
|
|
"<stdout>\n" + out + "\n</stdout>"
|
|
return output_file
|
|
|
|
|
|
@pytest.helpers.register
|
|
def run_ocrmypdf(input_file, output_file, *args, env=None):
|
|
"Run ocrmypdf and let caller deal with results"
|
|
|
|
if env is None:
|
|
env = os.environ
|
|
|
|
p_args = OCRMYPDF + [str(arg) for arg in args] + \
|
|
[str(input_file), str(output_file)]
|
|
p = Popen(
|
|
p_args, close_fds=True, stdout=PIPE, stderr=PIPE,
|
|
universal_newlines=True, env=env)
|
|
out, err = p.communicate()
|
|
#print(err)
|
|
|
|
return p, out, err
|
|
|
|
|
|
@pytest.helpers.register
|
|
def first_page_dimensions(pdf):
|
|
from ocrmypdf import pdfinfo
|
|
info = pdfinfo.PdfInfo(pdf)
|
|
page0 = info[0]
|
|
return (page0.width_inches, page0.height_inches)
|