unpaper-args: add test case and harden feature

This commit is contained in:
James R. Barlow
2019-02-07 16:21:02 -08:00
parent 9a4493f211
commit f095e91cb4
3 changed files with 101 additions and 25 deletions

View File

@@ -20,13 +20,14 @@
import os
import shlex
import subprocess
import sys
from functools import lru_cache
from subprocess import STDOUT, CalledProcessError, check_output
from tempfile import NamedTemporaryFile
from subprocess import PIPE, STDOUT, CalledProcessError
from tempfile import TemporaryDirectory
from . import get_version
from ..exceptions import MissingDependencyError
from ..exceptions import MissingDependencyError, SubprocessOutputError
try:
from PIL import Image
@@ -65,26 +66,43 @@ def run(input_file, output_file, dpi, log, mode_args):
im.close()
raise MissingDependencyError() from e
with NamedTemporaryFile(suffix=suffix) as input_pnm, NamedTemporaryFile(
suffix=suffix, mode="r+b"
) as output_pnm:
with TemporaryDirectory() as tmpdir:
input_pnm = os.path.join(tmpdir, f'input{suffix}')
output_pnm = os.path.join(tmpdir, f'output{suffix}')
im.save(input_pnm, format='PPM')
im.close()
os.unlink(output_pnm.name)
args_unpaper.extend([input_pnm.name, output_pnm.name])
# To prevent any shenanigans from accepting arbitrary parameters in
# --unpaper-args, we:
# 1) run with cwd set to a tmpdir with only unpaper's files
# 2) forbid the use of '/' in arguments, to prevent changing paths
# 3) append absolute paths for the input and output file
# This should ensure that a user cannot clobber some other file with
# their unpaper arguments (whether intentionally or otherwise)
args_unpaper.extend([input_pnm, output_pnm])
try:
stdout = check_output(
args_unpaper, close_fds=True, universal_newlines=True, stderr=STDOUT
proc = subprocess.run(
args_unpaper,
check=True,
close_fds=True,
universal_newlines=True,
stderr=STDOUT,
cwd=tmpdir,
stdout=PIPE,
)
except CalledProcessError as e:
log.debug(e.output)
raise e from e
else:
log.debug(stdout)
# unpaper sets dpi to 72
Image.open(output_pnm.name).save(output_file, dpi=(dpi, dpi))
log.debug(proc.stdout)
# unpaper sets dpi to 72; fix this
try:
Image.open(output_pnm).save(output_file, dpi=(dpi, dpi))
except (FileNotFoundError, OSError):
raise SubprocessOutputError(
"unpaper: failed to produce the expected output file. Called with: "
+ str(args_unpaper)
) from None
def validate_custom_args(args: str):

View File

@@ -147,7 +147,7 @@ def no_outpdf(tmpdir):
@pytest.helpers.register
def check_ocrmypdf(input_file, output_file, *args, env=None):
"Run ocrmypdf and confirmed that a valid file was created"
"""Run ocrmypdf and confirmed that a valid file was created"""
p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)
# ensure py.test collects the output, use -s to view

View File

@@ -15,11 +15,16 @@
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import argparse
from os import fspath
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from ocrmypdf import __main__ as main
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.exec import unpaper
# pytest.helpers is dynamic
# pylint: disable=no-member
@@ -30,26 +35,79 @@ run_ocrmypdf = pytest.helpers.run_ocrmypdf
spoof = pytest.helpers.spoof
@pytest.fixture(scope='session')
def have_unpaper():
try:
unpaper.version()
except Exception:
return False
else:
return True
@pytest.fixture(scope="session")
def spoof_unpaper_oldversion(tmpdir_factory):
return spoof(tmpdir_factory, unpaper='unpaper_oldversion.py')
return spoof(tmpdir_factory, unpaper="unpaper_oldversion.py")
@pytest.mark.skipif(True, reason="needs new fixture implementation")
def test_no_unpaper(resources, no_outpdf):
# <disable unpaper here>
p, out, err = run_ocrmypdf(
resources / 'c02-22.pdf', no_outpdf, '--clean', env=os.environ
)
assert p.returncode == ExitCode.missing_dependency
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
options = main.parser.parse_args(args=["--clean", input_, output])
with patch("ocrmypdf.exec.unpaper.version") as mock_unpaper_version:
mock_unpaper_version.side_effect = FileNotFoundError("unpaper")
with pytest.raises(SystemExit):
main.check_options(options, log=MagicMock())
def test_old_unpaper(spoof_unpaper_oldversion, resources, no_outpdf):
p, out, err = run_ocrmypdf(
resources / 'c02-22.pdf', no_outpdf, '--clean', env=spoof_unpaper_oldversion
resources / "c02-22.pdf", no_outpdf, "--clean", env=spoof_unpaper_oldversion
)
assert p.returncode == ExitCode.missing_dependency
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
def test_clean(spoof_tesseract_noop, resources, outpdf):
check_ocrmypdf(resources / 'skew.pdf', outpdf, '-c', env=spoof_tesseract_noop)
check_ocrmypdf(resources / "skew.pdf", outpdf, "-c", env=spoof_tesseract_noop)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
def test_unpaper_args_valid(spoof_tesseract_noop, resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"--layout double", # Spaces required here
env=spoof_tesseract_noop,
)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
def test_unpaper_args_invalid_filename(spoof_tesseract_noop, resources, outpdf):
p, out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"/etc/passwd",
env=spoof_tesseract_noop,
)
assert "No filenames allowed" in err
assert p.returncode == ExitCode.bad_args
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
def test_unpaper_args_invalid(spoof_tesseract_noop, resources, outpdf):
p, out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"unpaper is not going to like these arguments",
env=spoof_tesseract_noop,
)
# Can't tell difference between unpaper choking on bad arguments or some
# other unpaper failure
assert p.returncode == ExitCode.child_process_error