mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-18 11:36:26 -04:00
Overhaul version checkers to prefer Version to str
This commit is contained in:
@@ -70,8 +70,8 @@ log.addFilter(DuplicateFilter(log))
|
||||
GS = 'gswin64c' if os.name == 'nt' else 'gs'
|
||||
|
||||
|
||||
def version():
|
||||
return get_version(GS)
|
||||
def version() -> Version:
|
||||
return Version(get_version(GS))
|
||||
|
||||
|
||||
def _gs_error_reported(stream) -> bool:
|
||||
@@ -216,8 +216,7 @@ def generate_pdfa(
|
||||
"-dAutoFilterGrayImages=true",
|
||||
]
|
||||
|
||||
strategy = 'LeaveColorUnchanged'
|
||||
gs_version = Version(version())
|
||||
gs_version = version()
|
||||
if gs_version == Version('9.56.0'):
|
||||
# 9.56.0 breaks our OCR, should be fixed in 9.56.1
|
||||
# https://bugs.ghostscript.com/show_bug.cgi?id=705187
|
||||
|
||||
@@ -7,12 +7,14 @@ from __future__ import annotations
|
||||
|
||||
from subprocess import PIPE
|
||||
|
||||
from packaging.version import Version
|
||||
|
||||
from ocrmypdf.exceptions import MissingDependencyError
|
||||
from ocrmypdf.subprocess import get_version, run
|
||||
|
||||
|
||||
def version():
|
||||
return get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*')
|
||||
def version() -> Version:
|
||||
return Version(get_version('jbig2', regex=r'jbig2enc (\d+(\.\d+)*).*'))
|
||||
|
||||
|
||||
def available():
|
||||
|
||||
@@ -10,14 +10,15 @@ from io import BytesIO
|
||||
from pathlib import Path
|
||||
from subprocess import PIPE
|
||||
|
||||
from packaging.version import Version
|
||||
from PIL import Image
|
||||
|
||||
from ocrmypdf.exceptions import MissingDependencyError
|
||||
from ocrmypdf.subprocess import get_version, run
|
||||
|
||||
|
||||
def version():
|
||||
return get_version('pngquant', regex=r'(\d+(\.\d+)*).*')
|
||||
def version() -> Version:
|
||||
return Version(get_version('pngquant', regex=r'(\d+(\.\d+)*).*'))
|
||||
|
||||
|
||||
def available():
|
||||
|
||||
@@ -113,13 +113,13 @@ class TesseractVersion(Version):
|
||||
)
|
||||
|
||||
|
||||
def version() -> str:
|
||||
return get_version('tesseract', regex=r'tesseract\s(.+)')
|
||||
def version() -> Version:
|
||||
return TesseractVersion(get_version('tesseract', regex=r'tesseract\s(.+)'))
|
||||
|
||||
|
||||
def has_thresholding() -> bool:
|
||||
"""Does Tesseract have -c thresholding method capability?"""
|
||||
return version() >= '5.0'
|
||||
return version() >= Version('5.0')
|
||||
|
||||
|
||||
def get_languages() -> set[str]:
|
||||
|
||||
@@ -15,6 +15,7 @@ from pathlib import Path
|
||||
from subprocess import PIPE, STDOUT
|
||||
from typing import Iterator, Union
|
||||
|
||||
from packaging.version import Version
|
||||
from PIL import Image
|
||||
|
||||
from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError
|
||||
@@ -67,8 +68,8 @@ class UnpaperImageTooLargeError(Exception):
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
def version() -> str:
|
||||
return get_version('unpaper')
|
||||
def version() -> Version:
|
||||
return Version(get_version('unpaper'))
|
||||
|
||||
|
||||
SUPPORTED_MODES = {'1', 'L', 'RGB'}
|
||||
|
||||
@@ -212,7 +212,7 @@ class TesseractOcrEngine(OcrEngine):
|
||||
|
||||
@staticmethod
|
||||
def version():
|
||||
return tesseract.version()
|
||||
return str(tesseract.version())
|
||||
|
||||
@staticmethod
|
||||
def creator_tag(options):
|
||||
|
||||
@@ -292,16 +292,12 @@ def _error_old_version(
|
||||
_error_trailer(**locals())
|
||||
|
||||
|
||||
def _remove_leading_v(s: str) -> str:
|
||||
return s.removeprefix('v')
|
||||
|
||||
|
||||
def check_external_program(
|
||||
*,
|
||||
program: str,
|
||||
package: str,
|
||||
version_checker: Callable[[], str],
|
||||
need_version: str,
|
||||
version_checker: Callable[[], Version],
|
||||
need_version: str | Version,
|
||||
required_for: str | None = None,
|
||||
recommended: bool = False,
|
||||
version_parser: type[Version] = Version,
|
||||
@@ -321,6 +317,8 @@ def check_external_program(
|
||||
version_parser: A class that should be used to parse and compare version
|
||||
numbers. Used when version numbers do not follow standard conventions.
|
||||
"""
|
||||
if not isinstance(need_version, Version):
|
||||
need_version = version_parser(need_version)
|
||||
try:
|
||||
found_version = version_checker()
|
||||
except (CalledProcessError, FileNotFoundError) as e:
|
||||
@@ -334,11 +332,10 @@ def check_external_program(
|
||||
raise
|
||||
return
|
||||
|
||||
found_version = _remove_leading_v(found_version)
|
||||
need_version = _remove_leading_v(need_version)
|
||||
|
||||
if found_version and version_parser(found_version) < version_parser(need_version):
|
||||
_error_old_version(program, package, need_version, found_version, required_for)
|
||||
if found_version and found_version < need_version:
|
||||
_error_old_version(
|
||||
program, package, str(need_version), str(found_version), required_for
|
||||
)
|
||||
if not recommended:
|
||||
raise MissingDependencyError(program)
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ def test_maximum_options(renderer, output_type, multipage, outpdf):
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
tesseract.TesseractVersion(tesseract.version()) >= tesseract.TesseractVersion('5'),
|
||||
tesseract.version() >= tesseract.TesseractVersion('5'),
|
||||
reason="tess 5 tries harder to find its files",
|
||||
)
|
||||
def test_tesseract_missing_tessdata(monkeypatch, resources, no_outpdf, tmpdir):
|
||||
|
||||
@@ -10,12 +10,13 @@ from shutil import copyfile
|
||||
|
||||
import pikepdf
|
||||
import pytest
|
||||
from packaging.version import Version
|
||||
from pikepdf.models.metadata import decode_pdf_date
|
||||
|
||||
from ocrmypdf._exec import ghostscript
|
||||
from ocrmypdf._jobcontext import PdfContext
|
||||
from ocrmypdf._pipeline import convert_to_pdfa, metadata_fixup
|
||||
from ocrmypdf._plugin_manager import get_parser_options_plugins, get_plugin_manager
|
||||
from ocrmypdf.cli import get_parser
|
||||
from ocrmypdf.exceptions import ExitCode
|
||||
from ocrmypdf.pdfa import file_claims_pdfa, generate_pdfa_ps
|
||||
from ocrmypdf.pdfinfo import PdfInfo
|
||||
@@ -393,6 +394,10 @@ def test_prevent_gs_invalid_xml(resources, outdir):
|
||||
assert contents.find(b'\x00', xmp_start, xmp_end) == -1
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
ghostscript.version() >= Version('10.2.0'),
|
||||
reason="Ghostscript 10.2.0+ exit with an error on invalid DocumentInfo",
|
||||
)
|
||||
def test_malformed_docinfo(caplog, resources, outdir):
|
||||
generate_pdfa_ps(outdir / 'pdfa.ps')
|
||||
|
||||
|
||||
@@ -58,7 +58,9 @@ def test_content_preservation(resources, outpdf):
|
||||
assert len(page.images) > 1, "masks were rasterized"
|
||||
|
||||
|
||||
@pytest.mark.skipif(tesseract.version() > '5', reason="doesn't fool Tess 5")
|
||||
@pytest.mark.skipif(
|
||||
tesseract.version() > tesseract.TesseractVersion('5'), reason="doesn't fool Tess 5"
|
||||
)
|
||||
def test_no_languages(tmp_path, monkeypatch):
|
||||
(tmp_path / 'tessdata').mkdir()
|
||||
monkeypatch.setenv('TESSDATA_PREFIX', fspath(tmp_path))
|
||||
|
||||
@@ -8,6 +8,7 @@ from os import fspath
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from packaging.version import Version
|
||||
|
||||
from ocrmypdf._exec import unpaper
|
||||
from ocrmypdf._plugin_manager import get_parser_options_plugins
|
||||
@@ -40,7 +41,7 @@ def test_old_unpaper(resources, no_outpdf):
|
||||
|
||||
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
|
||||
with patch("ocrmypdf._exec.unpaper.version") as mock:
|
||||
mock.return_value = '0.5'
|
||||
mock.return_value = Version('0.5')
|
||||
|
||||
with pytest.raises(MissingDependencyError):
|
||||
check_options(options, pm)
|
||||
|
||||
@@ -49,7 +49,10 @@ def test_hocr_notlatin_warning(caplog):
|
||||
|
||||
|
||||
def test_old_tesseract_error():
|
||||
with patch('ocrmypdf._exec.tesseract.version', return_value='4.00.00alpha'):
|
||||
with patch(
|
||||
'ocrmypdf._exec.tesseract.version',
|
||||
return_value=TesseractVersion('4.00.00alpha'),
|
||||
):
|
||||
with pytest.raises(MissingDependencyError):
|
||||
vd.check_options(*make_opts_pm(pdf_renderer='sandwich', language='eng'))
|
||||
|
||||
@@ -181,59 +184,66 @@ def test_language_warning(caplog):
|
||||
mock.assert_called_once()
|
||||
|
||||
|
||||
def make_version(version):
|
||||
def _make_version():
|
||||
return TesseractVersion(version)
|
||||
|
||||
return _make_version
|
||||
|
||||
|
||||
def test_version_comparison():
|
||||
vd.check_external_program(
|
||||
program="dummy_basic",
|
||||
package="dummy",
|
||||
version_checker=lambda: '9.0',
|
||||
version_checker=make_version('9.0'),
|
||||
need_version='8.0.2',
|
||||
)
|
||||
vd.check_external_program(
|
||||
program="dummy_doubledigit",
|
||||
package="dummy",
|
||||
version_checker=lambda: '10.0',
|
||||
version_checker=make_version('10.0'),
|
||||
need_version='8.0.2',
|
||||
)
|
||||
with pytest.raises(MissingDependencyError):
|
||||
vd.check_external_program(
|
||||
program="tesseract",
|
||||
package="tesseract",
|
||||
version_checker=lambda: '4.0.0-beta.1',
|
||||
version_checker=make_version('4.0.0-beta.1'),
|
||||
need_version='4.1.1',
|
||||
version_parser=TesseractVersion,
|
||||
)
|
||||
vd.check_external_program(
|
||||
program="tesseract",
|
||||
package="tesseract",
|
||||
version_checker=lambda: 'v5.0.0-alpha.20200201',
|
||||
version_checker=make_version('v5.0.0-alpha.20200201'),
|
||||
need_version='4.1.1',
|
||||
version_parser=TesseractVersion,
|
||||
)
|
||||
vd.check_external_program(
|
||||
program="tesseract",
|
||||
package="tesseract",
|
||||
version_checker=lambda: '5.0.0-rc1.20211030',
|
||||
version_checker=make_version('5.0.0-rc1.20211030'),
|
||||
need_version='4.1.1',
|
||||
version_parser=TesseractVersion,
|
||||
)
|
||||
vd.check_external_program(
|
||||
program="tesseract",
|
||||
package="tesseract",
|
||||
version_checker=lambda: 'v4.1.1.20181030', # Used in some Windows builds
|
||||
version_checker=make_version('v4.1.1.20181030'), # Used in some Windows builds
|
||||
need_version='4.1.1',
|
||||
version_parser=TesseractVersion,
|
||||
)
|
||||
vd.check_external_program(
|
||||
program="gs",
|
||||
package="ghostscript",
|
||||
version_checker=lambda: '10.0',
|
||||
version_checker=make_version('10.0'),
|
||||
need_version='9.50',
|
||||
)
|
||||
with pytest.raises(MissingDependencyError):
|
||||
vd.check_external_program(
|
||||
program="tesseract",
|
||||
package="tesseract",
|
||||
version_checker=lambda: '4.1.1-rc2-25-g9707',
|
||||
version_checker=make_version('4.1.1-rc2-25-g9707'),
|
||||
need_version='4.1.1',
|
||||
version_parser=TesseractVersion,
|
||||
)
|
||||
@@ -241,7 +251,7 @@ def test_version_comparison():
|
||||
vd.check_external_program(
|
||||
program="dummy_fails",
|
||||
package="dummy",
|
||||
version_checker=lambda: '1.0',
|
||||
version_checker=make_version('1.0'),
|
||||
need_version='2.0',
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user