mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-18 11:36:26 -04:00
Fix tests that failed on other platforms from previous fix
This commit is contained in:
@@ -67,20 +67,20 @@ def check_platform():
|
||||
)
|
||||
|
||||
|
||||
def check_options_languages(options, plugin_manager):
|
||||
def check_options_languages(options, ocr_engine_languages):
|
||||
if not options.languages:
|
||||
options.languages = {DEFAULT_LANGUAGE}
|
||||
system_lang = locale.getlocale()[0]
|
||||
if system_lang and not system_lang.startswith('en'):
|
||||
log.debug("No language specified; assuming --language %s", DEFAULT_LANGUAGE)
|
||||
|
||||
ocr_engine = plugin_manager.hook.get_ocr_engine()
|
||||
if not options.languages.issubset(ocr_engine.languages(options)):
|
||||
if not ocr_engine_languages:
|
||||
return
|
||||
if not options.languages.issubset(ocr_engine_languages):
|
||||
msg = (
|
||||
f"{ocr_engine} does not have language data for the following "
|
||||
f"OCR engine does not have language data for the following "
|
||||
"requested languages: \n"
|
||||
)
|
||||
for lang in options.languages - ocr_engine.languages(options):
|
||||
for lang in options.languages - ocr_engine_languages:
|
||||
msg += lang + '\n'
|
||||
raise MissingDependencyError(msg)
|
||||
|
||||
@@ -251,9 +251,9 @@ def check_options_pillow(options):
|
||||
PIL.Image.MAX_IMAGE_PIXELS = None
|
||||
|
||||
|
||||
def check_options(options, plugin_manager):
|
||||
def _check_options(options, plugin_manager, ocr_engine_languages):
|
||||
check_platform()
|
||||
check_options_languages(options, plugin_manager)
|
||||
check_options_languages(options, ocr_engine_languages)
|
||||
check_options_metadata(options)
|
||||
check_options_output(options)
|
||||
check_options_sidecar(options)
|
||||
@@ -265,6 +265,11 @@ def check_options(options, plugin_manager):
|
||||
plugin_manager.hook.check_options(options=options)
|
||||
|
||||
|
||||
def check_options(options, plugin_manager):
|
||||
ocr_engine_languages = plugin_manager.hook.get_ocr_engine().languages(options)
|
||||
_check_options(options, plugin_manager, ocr_engine_languages)
|
||||
|
||||
|
||||
def check_closed_streams(options): # pragma: no cover
|
||||
"""Work around Python issue with multiprocessing forking on closed streams
|
||||
|
||||
|
||||
@@ -51,31 +51,33 @@ def make_opts(*args, **kwargs):
|
||||
def test_hocr_notlatin_warning(caplog):
|
||||
# Bypass the test to see if the language is installed; we just want to pretend
|
||||
# that a non-Latin language is installed
|
||||
with patch('ocrmypdf._validation.check_options_languages', return_value=None):
|
||||
vd.check_options(
|
||||
*make_opts_pm(language='chi_sim', pdf_renderer='hocr', output_type='pdfa')
|
||||
)
|
||||
vd._check_options(
|
||||
*make_opts_pm(language='chi_sim', pdf_renderer='hocr', output_type='pdfa'),
|
||||
{'chi_sim'},
|
||||
)
|
||||
assert 'PDF renderer is known to cause' in caplog.text
|
||||
|
||||
|
||||
def test_old_ghostscript(caplog):
|
||||
with patch('ocrmypdf._exec.ghostscript.version', return_value='9.19'), patch(
|
||||
'ocrmypdf._exec.tesseract.has_textonly_pdf', return_value=True
|
||||
), patch('ocrmypdf._validation.check_options_languages', return_value=None):
|
||||
vd.check_options(*make_opts_pm(language='chi_sim', output_type='pdfa'))
|
||||
):
|
||||
vd._check_options(
|
||||
*make_opts_pm(language='chi_sim', output_type='pdfa'), {'chi_sim'}
|
||||
)
|
||||
assert 'Ghostscript does not work correctly' in caplog.text
|
||||
|
||||
with patch('ocrmypdf._exec.ghostscript.version', return_value='9.18'), patch(
|
||||
'ocrmypdf._exec.tesseract.has_textonly_pdf', return_value=True
|
||||
):
|
||||
with pytest.raises(MissingDependencyError):
|
||||
vd.check_options(*make_opts_pm(output_type='pdfa-3'))
|
||||
vd._check_options(*make_opts_pm(output_type='pdfa-3'), set())
|
||||
|
||||
with patch('ocrmypdf._exec.ghostscript.version', return_value='9.24'), patch(
|
||||
'ocrmypdf._exec.tesseract.has_textonly_pdf', return_value=True
|
||||
):
|
||||
with pytest.raises(MissingDependencyError):
|
||||
vd.check_options(*make_opts_pm())
|
||||
vd._check_options(*make_opts_pm(), set())
|
||||
|
||||
|
||||
def test_old_tesseract_error():
|
||||
@@ -83,7 +85,7 @@ def test_old_tesseract_error():
|
||||
with pytest.raises(MissingDependencyError):
|
||||
opts = make_opts(pdf_renderer='sandwich', language='eng')
|
||||
plugin_manager = get_plugin_manager(opts.plugins)
|
||||
vd.check_options(opts, plugin_manager)
|
||||
vd._check_options(opts, plugin_manager, {'eng'})
|
||||
|
||||
|
||||
def test_lossless_redo():
|
||||
@@ -113,13 +115,13 @@ def test_user_words(caplog):
|
||||
with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=False):
|
||||
opts = make_opts(user_words='foo')
|
||||
plugin_manager = get_plugin_manager(opts.plugins)
|
||||
vd.check_options(opts, plugin_manager)
|
||||
vd._check_options(opts, plugin_manager, set())
|
||||
assert '4.0 ignores --user-words' in caplog.text
|
||||
caplog.clear()
|
||||
with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=True):
|
||||
opts = make_opts(user_patterns='foo')
|
||||
plugin_manager = get_plugin_manager(opts.plugins)
|
||||
vd.check_options(opts, plugin_manager)
|
||||
vd._check_options(opts, plugin_manager, set())
|
||||
assert '4.0 ignores --user-words' not in caplog.text
|
||||
|
||||
|
||||
@@ -182,7 +184,7 @@ def test_no_progress_bar(progress_bar, resources):
|
||||
opts = make_opts(progress_bar=progress_bar, input_file=(resources / 'trivial.pdf'))
|
||||
plugin_manager = get_plugin_manager(opts.plugins)
|
||||
with patch('ocrmypdf._concurrent.tqdm', autospec=True) as tqdmpatch:
|
||||
vd.check_options(opts, plugin_manager)
|
||||
vd._check_options(opts, plugin_manager, set())
|
||||
pdfinfo = PdfInfo(opts.input_file, progbar=opts.progress_bar)
|
||||
assert pdfinfo is not None
|
||||
assert tqdmpatch.called
|
||||
@@ -197,7 +199,7 @@ def test_language_warning(caplog):
|
||||
with patch(
|
||||
'ocrmypdf._validation.locale.getlocale', return_value=('en_US', 'UTF-8')
|
||||
):
|
||||
vd.check_options_languages(opts, plugin_manager)
|
||||
vd.check_options_languages(opts, {'eng'})
|
||||
assert opts.languages == {'eng'}
|
||||
assert '' in caplog.text
|
||||
|
||||
@@ -205,7 +207,7 @@ def test_language_warning(caplog):
|
||||
with patch(
|
||||
'ocrmypdf._validation.locale.getlocale', return_value=('fr_FR', 'UTF-8')
|
||||
):
|
||||
vd.check_options_languages(opts, plugin_manager)
|
||||
vd.check_options_languages(opts, {'eng'})
|
||||
assert opts.languages == {'eng'}
|
||||
assert 'assuming --language' in caplog.text
|
||||
|
||||
@@ -268,5 +270,5 @@ def test_optional_program_recommended(caplog):
|
||||
def test_pagesegmode_warning(caplog):
|
||||
opts = make_opts(tesseract_pagesegmode='0')
|
||||
plugin_manager = get_plugin_manager(opts.plugins)
|
||||
vd.check_options(opts, plugin_manager)
|
||||
vd._check_options(opts, plugin_manager, set())
|
||||
assert 'disable OCR' in caplog.text
|
||||
|
||||
Reference in New Issue
Block a user