leptonica: learn a few new tricks

Found some interesting options for background norm.
2026-05-04 20:54:18 -04:00 · 2016-09-09 12:54:36 -07:00
parent 7942a01e50
commit 2ec516b6ff
2 changed files with 67 additions and 5 deletions
--- a/ocrmypdf/leptonica.py
+++ b/ocrmypdf/leptonica.py
@@ -104,6 +104,12 @@ class Pix:

    Leptonica's reference counting is not threadsafe. This class can be used
    in a threadsafe manner if a Python threading.Lock protects the data.
+
+    This class treats Pix objects as immutable.  All methods return new
+    modified objects.  This allows convenient chaining:
+
+    >>>   Pix.read('filename.jpg').scale((0.5, 0.5)).deskew().show()
+
    """

    def __init__(self, pix):
@@ -201,13 +207,23 @@ class Pix:
        "Returns a PIL.Image version of this Pix"
        from PIL import Image

-        with LeptonicaErrorTrap():
-            pix_swapped = Pix(lept.pixEndianByteSwapNew(self._pix))
+        # Leptonica manages data in words, so it implicitly does an endian
+        # swap.  Tell Pillow about this when it reads the data.
+        if sys.byteorder == 'little':
+            if self.mode == 'RGB':
+                raw_mode = 'XBGR'
+            elif self.mode == 'RGBA':
+                raw_mode = 'ABGR'
+            else:
+                raw_mode = self.mode
+        else:
+            raw_mode = self.mode  # no endian swap needed

-        size = (pix_swapped._pix.wpl * 4, pix_swapped._pix.h)
-        buf = ffi.buffer(pix_swapped._pix.data, size[0] * size[1])
+        size = (self._pix.w, self._pix.h)
+        bytecount = self._pix.wpl * 4 * self._pix.h
+        buf = ffi.buffer(self._pix.data, bytecount)

-        im_raw = Image.frombytes(self.mode, size, buf, 'raw')
+        im_raw = Image.frombytes(self.mode, size, buf, 'raw', raw_mode)

        # Leptonica stores images in 32-bit words
        # Need to crop the any trailing amount
@@ -352,6 +368,33 @@ class Pix:
                black,
                white))

+    def gamma_trc(self, gamma=1.0, minval=0, maxval=255):
+        with LeptonicaErrorTrap():
+            return Pix(lept.pixGammaTRC(
+                ffi.NULL,
+                self._pix,
+                gamma,
+                minval,
+                maxval
+                ))
+
+    def background_norm(
+            self, mask=None, grayscale=None, tile_size=(10, 15), fg_threshold=60,
+            min_count=40, bg_val=200, smooth_kernel=(2, 1)):
+        with LeptonicaErrorTrap():
+            return Pix(lept.pixBackgroundNorm(
+                self._pix,
+                mask or ffi.NULL,
+                grayscale or ffi.NULL,
+                tile_size[0],
+                tile_size[1],
+                fg_threshold,
+                min_count,
+                bg_val,
+                smooth_kernel[0],
+                smooth_kernel[1]
+                ))
+
    @staticmethod
    @lru_cache(maxsize=1)
    def make_pixel_sum_tab8():
--- a/ocrmypdf/lib/compile_leptonica.py
+++ b/ocrmypdf/lib/compile_leptonica.py
@@ -131,6 +131,25 @@ pixClipRectangle(PIX   *pixs,
                 BOX   *box,
                 BOX  **pboxc);

+PIX *
+pixBackgroundNorm(PIX     *pixs,
+                  PIX     *pixim,
+                  PIX     *pixg,
+                  l_int32  sx,
+                  l_int32  sy,
+                  l_int32  thresh,
+                  l_int32  mincount,
+                  l_int32  bgval,
+                  l_int32  smoothx,
+                  l_int32  smoothy);
+
+PIX *
+pixGammaTRC(PIX       *pixd,
+            PIX       *pixs,
+            l_float32  gamma,
+            l_int32    minval,
+            l_int32    maxval);
+
 void
 boxDestroy(BOX  **pbox);