Overhaul version checkers to prefer Version to str

This commit is contained in:
James R. Barlow
2023-09-25 00:59:16 -07:00
parent bd4d44e182
commit ea36aedb5f
12 changed files with 57 additions and 39 deletions

View File

@@ -70,8 +70,8 @@ log.addFilter(DuplicateFilter(log))
GS = 'gswin64c' if os.name == 'nt' else 'gs'
def version():
return get_version(GS)
def version() -> Version:
return Version(get_version(GS))
def _gs_error_reported(stream) -> bool:
@@ -216,8 +216,7 @@ def generate_pdfa(
"-dAutoFilterGrayImages=true",
]
strategy = 'LeaveColorUnchanged'
gs_version = Version(version())
gs_version = version()
if gs_version == Version('9.56.0'):
# 9.56.0 breaks our OCR, should be fixed in 9.56.1
# https://bugs.ghostscript.com/show_bug.cgi?id=705187

View File

@@ -7,12 +7,14 @@ from __future__ import annotations
from subprocess import PIPE
from packaging.version import Version
from ocrmypdf.exceptions import MissingDependencyError
from ocrmypdf.subprocess import get_version, run
def version():
return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
def version() -> Version:
return Version(get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*'))
def available():

View File

@@ -10,14 +10,15 @@ from io import BytesIO
from pathlib import Path
from subprocess import PIPE
from packaging.version import Version
from PIL import Image
from ocrmypdf.exceptions import MissingDependencyError
from ocrmypdf.subprocess import get_version, run
def version():
return get_version('pngquant', regex=r'(\d+(\.\d+)*).*')
def version() -> Version:
return Version(get_version('pngquant', regex=r'(\d+(\.\d+)*).*'))
def available():

View File

@@ -113,13 +113,13 @@ class TesseractVersion(Version):
)
def version() -> str:
return get_version('tesseract', regex=r'tesseract\s(.+)')
def version() -> Version:
return TesseractVersion(get_version('tesseract', regex=r'tesseract\s(.+)'))
def has_thresholding() -> bool:
"""Does Tesseract have -c thresholding method capability?"""
return version() >= '5.0'
return version() >= Version('5.0')
def get_languages() -> set[str]:

View File

@@ -15,6 +15,7 @@ from pathlib import Path
from subprocess import PIPE, STDOUT
from typing import Iterator, Union
from packaging.version import Version
from PIL import Image
from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError
@@ -67,8 +68,8 @@ class UnpaperImageTooLargeError(Exception):
super().__init__(self.message)
def version() -> str:
return get_version('unpaper')
def version() -> Version:
return Version(get_version('unpaper'))
SUPPORTED_MODES = {'1', 'L', 'RGB'}

View File

@@ -212,7 +212,7 @@ class TesseractOcrEngine(OcrEngine):
@staticmethod
def version():
return tesseract.version()
return str(tesseract.version())
@staticmethod
def creator_tag(options):

View File

@@ -292,16 +292,12 @@ def _error_old_version(
_error_trailer(**locals())
def _remove_leading_v(s: str) -> str:
return s.removeprefix('v')
def check_external_program(
*,
program: str,
package: str,
version_checker: Callable[[], str],
need_version: str,
version_checker: Callable[[], Version],
need_version: str | Version,
required_for: str | None = None,
recommended: bool = False,
version_parser: type[Version] = Version,
@@ -321,6 +317,8 @@ def check_external_program(
version_parser: A class that should be used to parse and compare version
numbers. Used when version numbers do not follow standard conventions.
"""
if not isinstance(need_version, Version):
need_version = version_parser(need_version)
try:
found_version = version_checker()
except (CalledProcessError, FileNotFoundError) as e:
@@ -334,11 +332,10 @@ def check_external_program(
raise
return
found_version = _remove_leading_v(found_version)
need_version = _remove_leading_v(need_version)
if found_version and version_parser(found_version) < version_parser(need_version):
_error_old_version(program, package, need_version, found_version, required_for)
if found_version and found_version < need_version:
_error_old_version(
program, package, str(need_version), str(found_version), required_for
)
if not recommended:
raise MissingDependencyError(program)

View File

@@ -178,7 +178,7 @@ def test_maximum_options(renderer, output_type, multipage, outpdf):
@pytest.mark.skipif(
tesseract.TesseractVersion(tesseract.version()) >= tesseract.TesseractVersion('5'),
tesseract.version() >= tesseract.TesseractVersion('5'),
reason="tess 5 tries harder to find its files",
)
def test_tesseract_missing_tessdata(monkeypatch, resources, no_outpdf, tmpdir):

View File

@@ -10,12 +10,13 @@ from shutil import copyfile
import pikepdf
import pytest
from packaging.version import Version
from pikepdf.models.metadata import decode_pdf_date
from ocrmypdf._exec import ghostscript
from ocrmypdf._jobcontext import PdfContext
from ocrmypdf._pipeline import convert_to_pdfa, metadata_fixup
from ocrmypdf._plugin_manager import get_parser_options_plugins, get_plugin_manager
from ocrmypdf.cli import get_parser
from ocrmypdf.exceptions import ExitCode
from ocrmypdf.pdfa import file_claims_pdfa, generate_pdfa_ps
from ocrmypdf.pdfinfo import PdfInfo
@@ -393,6 +394,10 @@ def test_prevent_gs_invalid_xml(resources, outdir):
assert contents.find(b'\x00', xmp_start, xmp_end) == -1
@pytest.mark.skipif(
ghostscript.version() >= Version('10.2.0'),
reason="Ghostscript 10.2.0+ exit with an error on invalid DocumentInfo",
)
def test_malformed_docinfo(caplog, resources, outdir):
generate_pdfa_ps(outdir / 'pdfa.ps')

View File

@@ -58,7 +58,9 @@ def test_content_preservation(resources, outpdf):
assert len(page.images) > 1, "masks were rasterized"
@pytest.mark.skipif(tesseract.version() > '5', reason="doesn't fool Tess 5")
@pytest.mark.skipif(
tesseract.version() > tesseract.TesseractVersion('5'), reason="doesn't fool Tess 5"
)
def test_no_languages(tmp_path, monkeypatch):
(tmp_path / 'tessdata').mkdir()
monkeypatch.setenv('TESSDATA_PREFIX', fspath(tmp_path))

View File

@@ -8,6 +8,7 @@ from os import fspath
from unittest.mock import patch
import pytest
from packaging.version import Version
from ocrmypdf._exec import unpaper
from ocrmypdf._plugin_manager import get_parser_options_plugins
@@ -40,7 +41,7 @@ def test_old_unpaper(resources, no_outpdf):
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf._exec.unpaper.version") as mock:
mock.return_value = '0.5'
mock.return_value = Version('0.5')
with pytest.raises(MissingDependencyError):
check_options(options, pm)

View File

@@ -49,7 +49,10 @@ def test_hocr_notlatin_warning(caplog):
def test_old_tesseract_error():
with patch('ocrmypdf._exec.tesseract.version', return_value='4.00.00alpha'):
with patch(
'ocrmypdf._exec.tesseract.version',
return_value=TesseractVersion('4.00.00alpha'),
):
with pytest.raises(MissingDependencyError):
vd.check_options(*make_opts_pm(pdf_renderer='sandwich', language='eng'))
@@ -181,59 +184,66 @@ def test_language_warning(caplog):
mock.assert_called_once()
def make_version(version):
def _make_version():
return TesseractVersion(version)
return _make_version
def test_version_comparison():
vd.check_external_program(
program="dummy_basic",
package="dummy",
version_checker=lambda: '9.0',
version_checker=make_version('9.0'),
need_version='8.0.2',
)
vd.check_external_program(
program="dummy_doubledigit",
package="dummy",
version_checker=lambda: '10.0',
version_checker=make_version('10.0'),
need_version='8.0.2',
)
with pytest.raises(MissingDependencyError):
vd.check_external_program(
program="tesseract",
package="tesseract",
version_checker=lambda: '4.0.0-beta.1',
version_checker=make_version('4.0.0-beta.1'),
need_version='4.1.1',
version_parser=TesseractVersion,
)
vd.check_external_program(
program="tesseract",
package="tesseract",
version_checker=lambda: 'v5.0.0-alpha.20200201',
version_checker=make_version('v5.0.0-alpha.20200201'),
need_version='4.1.1',
version_parser=TesseractVersion,
)
vd.check_external_program(
program="tesseract",
package="tesseract",
version_checker=lambda: '5.0.0-rc1.20211030',
version_checker=make_version('5.0.0-rc1.20211030'),
need_version='4.1.1',
version_parser=TesseractVersion,
)
vd.check_external_program(
program="tesseract",
package="tesseract",
version_checker=lambda: 'v4.1.1.20181030', # Used in some Windows builds
version_checker=make_version('v4.1.1.20181030'), # Used in some Windows builds
need_version='4.1.1',
version_parser=TesseractVersion,
)
vd.check_external_program(
program="gs",
package="ghostscript",
version_checker=lambda: '10.0',
version_checker=make_version('10.0'),
need_version='9.50',
)
with pytest.raises(MissingDependencyError):
vd.check_external_program(
program="tesseract",
package="tesseract",
version_checker=lambda: '4.1.1-rc2-25-g9707',
version_checker=make_version('4.1.1-rc2-25-g9707'),
need_version='4.1.1',
version_parser=TesseractVersion,
)
@@ -241,7 +251,7 @@ def test_version_comparison():
vd.check_external_program(
program="dummy_fails",
package="dummy",
version_checker=lambda: '1.0',
version_checker=make_version('1.0'),
need_version='2.0',
)