mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-04-26 17:14:43 -04:00
Convert deskew to use degrees, since all our other angles are in degrees
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
import logging
|
||||
import re
|
||||
from distutils.version import StrictVersion
|
||||
from math import pi
|
||||
from os import fspath
|
||||
from pathlib import Path
|
||||
from subprocess import PIPE, STDOUT, CalledProcessError, TimeoutExpired
|
||||
@@ -172,7 +173,7 @@ def get_orientation(
|
||||
def get_deskew(
|
||||
input_file: Path, languages: List[str], engine_mode: Optional[int], timeout: float
|
||||
) -> float:
|
||||
"""Gets angle to deskew this page, in radians."""
|
||||
"""Gets angle to deskew this page, in degrees."""
|
||||
args_tesseract = tess_base_args(languages, engine_mode) + [
|
||||
'--psm',
|
||||
'2',
|
||||
@@ -193,8 +194,9 @@ def get_deskew(
|
||||
raise SubprocessOutputError() from e
|
||||
|
||||
parsed = _parse_tesseract_output(p.stdout)
|
||||
deskew = float(parsed.get('Deskew angle', 0))
|
||||
return deskew
|
||||
deskew_radians = float(parsed.get('Deskew angle', 0))
|
||||
deskew_degrees = 180 / pi * deskew_radians
|
||||
return deskew_degrees
|
||||
|
||||
|
||||
def tesseract_log_output(stream):
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
@@ -479,9 +478,7 @@ def preprocess_deskew(input_file: Path, page_context: PageContext):
|
||||
dpi = get_page_square_dpi(page_context.pageinfo, page_context.options)
|
||||
|
||||
ocr_engine = page_context.plugin_manager.hook.get_ocr_engine()
|
||||
deskew_angle = ocr_engine.get_deskew(input_file, page_context.options)
|
||||
|
||||
deskew_angle_degrees = deskew_angle * 180.0 / math.pi
|
||||
deskew_angle_degrees = ocr_engine.get_deskew(input_file, page_context.options)
|
||||
|
||||
with Image.open(input_file) as im:
|
||||
# According to Pillow docs, .rotate() will automatically use Image.NEAREST
|
||||
|
||||
@@ -368,7 +368,7 @@ class OcrEngine(ABC):
|
||||
|
||||
@staticmethod
|
||||
def get_deskew(input_file: Path, options: Namespace) -> float:
|
||||
"""Returns the deskew angle of the image, in radians."""
|
||||
"""Returns the deskew angle of the image, in degrees."""
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
from math import isclose, pi
|
||||
from math import isclose
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
@@ -36,7 +36,7 @@ def test_deskew(resources, outdir):
|
||||
pageno=1,
|
||||
)
|
||||
|
||||
skew_angle = tesseract.get_deskew(deskewed_png, [], None, 5.0) * 180 / pi
|
||||
skew_angle = tesseract.get_deskew(deskewed_png, [], None, 5.0)
|
||||
print(skew_angle)
|
||||
assert -0.5 < skew_angle < 0.5, "Deskewing failed"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user