From b159e021104b5a97dbb7bbc8d4c9779b6cd19285 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 12 Nov 2021 16:40:51 -0800 Subject: [PATCH] Convert deskew to use degrees, since all our other angles are in degrees --- src/ocrmypdf/_exec/tesseract.py | 8 +++++--- src/ocrmypdf/_pipeline.py | 5 +---- src/ocrmypdf/pluginspec.py | 2 +- tests/test_preprocessing.py | 4 ++-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/ocrmypdf/_exec/tesseract.py b/src/ocrmypdf/_exec/tesseract.py index 8936d297..6dc7f0d0 100644 --- a/src/ocrmypdf/_exec/tesseract.py +++ b/src/ocrmypdf/_exec/tesseract.py @@ -10,6 +10,7 @@ import logging import re from distutils.version import StrictVersion +from math import pi from os import fspath from pathlib import Path from subprocess import PIPE, STDOUT, CalledProcessError, TimeoutExpired @@ -172,7 +173,7 @@ def get_orientation( def get_deskew( input_file: Path, languages: List[str], engine_mode: Optional[int], timeout: float ) -> float: - """Gets angle to deskew this page, in radians.""" + """Gets angle to deskew this page, in degrees.""" args_tesseract = tess_base_args(languages, engine_mode) + [ '--psm', '2', @@ -193,8 +194,9 @@ def get_deskew( raise SubprocessOutputError() from e parsed = _parse_tesseract_output(p.stdout) - deskew = float(parsed.get('Deskew angle', 0)) - return deskew + deskew_radians = float(parsed.get('Deskew angle', 0)) + deskew_degrees = 180 / pi * deskew_radians + return deskew_degrees def tesseract_log_output(stream): diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 33777d96..a5f6ba56 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -6,7 +6,6 @@ import logging -import math import os import re import sys @@ -479,9 +478,7 @@ def preprocess_deskew(input_file: Path, page_context: PageContext): dpi = get_page_square_dpi(page_context.pageinfo, page_context.options) ocr_engine = page_context.plugin_manager.hook.get_ocr_engine() - deskew_angle = ocr_engine.get_deskew(input_file, page_context.options) - - deskew_angle_degrees = deskew_angle * 180.0 / math.pi + deskew_angle_degrees = ocr_engine.get_deskew(input_file, page_context.options) with Image.open(input_file) as im: # According to Pillow docs, .rotate() will automatically use Image.NEAREST diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py index fb1846dc..bec8bcba 100644 --- a/src/ocrmypdf/pluginspec.py +++ b/src/ocrmypdf/pluginspec.py @@ -368,7 +368,7 @@ class OcrEngine(ABC): @staticmethod def get_deskew(input_file: Path, options: Namespace) -> float: - """Returns the deskew angle of the image, in radians.""" + """Returns the deskew angle of the image, in degrees.""" return 0.0 @staticmethod diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 1d0fc818..6418ad6b 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -5,7 +5,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from math import isclose, pi +from math import isclose import pytest from PIL import Image @@ -36,7 +36,7 @@ def test_deskew(resources, outdir): pageno=1, ) - skew_angle = tesseract.get_deskew(deskewed_png, [], None, 5.0) * 180 / pi + skew_angle = tesseract.get_deskew(deskewed_png, [], None, 5.0) print(skew_angle) assert -0.5 < skew_angle < 0.5, "Deskewing failed"