Fix shim_paths to account for unexpected files in Program Files\gs

Fixes #565
This commit is contained in:
James R. Barlow
2020-05-28 14:58:41 -07:00
parent 0cefe886ec
commit df9f5157bd
2 changed files with 39 additions and 18 deletions

View File

@@ -25,6 +25,7 @@ import sys
from collections.abc import Mapping
from distutils.version import LooseVersion
from functools import lru_cache
from pathlib import Path
from subprocess import PIPE, STDOUT, CalledProcessError
from subprocess import run as subprocess_run
@@ -138,24 +139,25 @@ def shim_paths_with_program_files(env=None):
program_files = env.get('PROGRAMFILES', '')
if not program_files:
return env.get('PATH', '')
paths = []
try:
for dirname in os.listdir(program_files):
if dirname.lower() == 'tesseract-ocr':
paths.append(os.path.join(program_files, dirname))
elif dirname.lower() == 'gs':
try:
latest_gs = max(
os.listdir(os.path.join(program_files, dirname)),
key=lambda d: float(d[2:]),
)
except (FileNotFoundError, NotADirectoryError):
continue
paths.append(os.path.join(program_files, dirname, latest_gs, 'bin'))
except EnvironmentError:
pass
paths.extend(path for path in os.get_exec_path(env) if path not in set(paths))
return os.pathsep.join(paths)
def path_walker():
for path in Path(program_files).iterdir():
if not path.is_dir():
continue
if path.name.lower() == 'tesseract-ocr':
yield path
elif path.name.lower() == 'gs':
yield from (p for p in path.glob('**/bin') if p.is_dir())
paths = sorted(
(p for p in path_walker()), key=lambda p: (p.name, p.parent.name), reverse=True
)
paths.extend(
Path(str_path)
for str_path in os.get_exec_path(env)
if Path(str_path) not in set(paths)
)
return os.pathsep.join(str(p) for p in paths)
missing_program = '''

View File

@@ -17,6 +17,7 @@
import logging
import multiprocessing
import os
from pathlib import Path
from unittest.mock import MagicMock
@@ -95,3 +96,21 @@ class TestFileIsWritable:
pathmock.exists.return_value = True
pathmock.is_file.side_effect = PermissionError
assert not helpers.is_file_writable(pathmock)
def test_shim_paths(tmp_path):
progfiles = tmp_path / 'Program Files'
progfiles.mkdir()
(progfiles / 'tesseract-ocr').mkdir()
(progfiles / 'gs' / '9.51' / 'bin').mkdir(parents=True)
(progfiles / 'gs' / '9.52' / 'bin').mkdir(parents=True)
syspath = tmp_path / 'bin'
env = {'PROGRAMFILES': str(progfiles), 'PATH': str(syspath)}
from ocrmypdf.exec import shim_paths_with_program_files
result_str = shim_paths_with_program_files(env=env)
results = result_str.split(os.pathsep)
assert results[0].endswith('tesseract-ocr')
assert results[1].endswith('gs/9.52/bin')
assert results[2].endswith('gs/9.51/bin')
assert results[3] == str(syspath)