diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py
index c5dcb6a4..5a533fa9 100644
--- a/src/ocrmypdf/_validation.py
+++ b/src/ocrmypdf/_validation.py
@@ -17,6 +17,7 @@
# along with OCRmyPDF. If not, see .
+import locale
import logging
import os
import sys
@@ -47,6 +48,7 @@ from .helpers import is_file_writable, is_iterable_notstr, monotonic, re_symlink
# External dependencies
HOCR_OK_LANGS = frozenset(['eng', 'deu', 'spa', 'ita', 'por'])
+DEFAULT_LANGUAGE = 'eng' # Enforce English hegemony
log = logging.getLogger(__name__)
@@ -58,7 +60,12 @@ verify_python3_env()
def check_options_languages(options):
if not options.language:
- options.language = ['eng'] # Enforce English hegemony
+ options.language = [DEFAULT_LANGUAGE]
+ system_lang = locale.getlocale()[0]
+ if system_lang and not system_lang.startswith('en'):
+ log.debug(
+ "No language specified; assuming --language %s" % DEFAULT_LANGUAGE
+ )
# Support v2.x "eng+deu" language syntax
if '+' in options.language[0]:
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 5413e375..5465a580 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -15,6 +15,8 @@
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see .
+import locale
+import logging
import os
from unittest.mock import patch
@@ -27,9 +29,9 @@ from ocrmypdf.pdfinfo import PdfInfo
def make_opts(input_file='a.pdf', output_file='b.pdf', language='eng', **kwargs):
- return create_options(
- input_file=input_file, output_file=output_file, language=language, **kwargs
- )
+ if language is not None:
+ kwargs['language'] = language
+ return create_options(input_file=input_file, output_file=output_file, **kwargs)
def test_hocr_notlatin_warning(caplog):
@@ -139,3 +141,21 @@ def test_no_progress_bar(progress_bar, resources):
assert tqdmpatch.called
_args, kwargs = tqdmpatch.call_args
assert kwargs['disable'] != progress_bar
+
+
+def test_language_warning(caplog):
+ opts = make_opts(language=None)
+ caplog.set_level(logging.DEBUG)
+ with patch(
+ 'ocrmypdf._validation.locale.getlocale', return_value=('en_US', 'UTF-8')
+ ):
+ vd.check_options_languages(opts)
+ assert opts.language == ['eng']
+ assert '' in caplog.text
+
+ with patch(
+ 'ocrmypdf._validation.locale.getlocale', return_value=('fr_FR', 'UTF-8')
+ ):
+ vd.check_options_languages(opts)
+ assert opts.language == ['eng']
+ assert 'assuming --language' in caplog.text