Improve canvas interface with chaining

This commit is contained in:
James R. Barlow
2023-11-20 14:42:48 -08:00
parent 532cf18ad3
commit 74e101a2fa
2 changed files with 84 additions and 55 deletions

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import logging
import unicodedata
from contextlib import contextmanager
from importlib.resources import files as package_files
from pathlib import Path
@@ -14,6 +16,8 @@ from pikepdf import (
unparse_content_stream,
)
log = logging.getLogger(__name__)
GLYPHLESS_FONT_NAME = 'pdf.ttf'
GLYPHLESS_FONT = (package_files('ocrmypdf.data') / GLYPHLESS_FONT_NAME).read_bytes()
@@ -250,23 +254,28 @@ class PikepdfCanvas:
self._pdf = Pdf.new()
self._page = self._pdf.add_blank_page(page_size=page_size)
self._cs = ContentStreamBuilder()
self._cs.push()
self._stack_depth = 0
self.push()
self._font_name = Name("/f-0-0")
def set_stroke_color(self, color):
r, g, b = color.red, color.green, color.blue
self._cs.set_stroke_color(r, g, b)
return self
def set_fill_color(self, color):
r, g, b = color.red, color.green, color.blue
self._cs.set_fill_color(r, g, b)
return self
def set_line_width(self, width):
self._cs.set_line_width(width)
return self
def line(self, x1, y1, x2, y2):
self._cs.line(x1, y1, x2, y2)
self._cs.stroke_and_close()
return self
def rect(self, x, y, w, h, fill):
self._cs.append_rectangle(x, y, w, h)
@@ -274,12 +283,13 @@ class PikepdfCanvas:
self._cs.fill()
else:
self._cs.stroke_and_close()
return self
def begin_text(self, x=0, y=0, direction=None):
return PikepdfText(x, y, direction)
def draw_text(self, text: PikepdfText):
self._cs._instructions.extend(text._cs._instructions)
self._cs._instructions.extend(text._cs.build())
self._end_text()
def _end_text(self):
@@ -294,18 +304,36 @@ class PikepdfCanvas:
def set_dashes(self, *args):
self._cs.set_dashes(*args)
return self
def push(self):
self._cs.push()
self._stack_depth += 1
return self
def pop(self):
self._cs.pop()
self._stack_depth -= 1
return self
@contextmanager
def enter_context(self):
"""Save the graphics state and restore it on exit."""
self.push()
yield self
self.pop()
def cm(self, matrix):
self._cs.cm(matrix)
return self
def save(self):
self._cs.pop()
if self._stack_depth != 0:
log.warning(
"Graphics state stack is not empty when page saved - "
"rendering may be incorrect"
)
self._page.Contents = self._pdf.make_stream(
unparse_content_stream(self._cs.build())
)
@@ -322,22 +350,28 @@ class PikepdfText:
def set_font(self, font, size):
self._cs.set_text_font(Name("/f-0-0"), size)
return self
def set_render_mode(self, mode):
self._cs.set_text_rendering(mode)
return self
def set_text_transform(self, matrix: Matrix):
self._cs.set_text_matrix(matrix)
self._p0 = (matrix.e, matrix.f)
return self
def show(self, text):
self._cs.show_text(text)
return self
def set_horiz_scale(self, scale):
self._cs.set_text_horizontal_scaling(scale)
return self
def get_start_of_line(self):
return self._p0
def move_cursor(self, x, y):
self._cs.move_cursor(x, y)
return self

View File

@@ -351,83 +351,78 @@ class HocrTransform:
"""Draw boxes around paragraphs in the document."""
if not self.render_options.render_paragraph_bbox: # pragma: no cover
return
for elem in self.hocr.iterfind(self._child_xpath('p', 'ocr_par')):
elemtxt = self._get_element_text(elem).rstrip()
if len(elemtxt) == 0:
continue
ocr_par = self.element_coordinates(elem)
with canvas.enter_context():
# draw box around paragraph
canvas.set_stroke_color(color)
canvas.set_line_width(0.1) # no line for bounding box
canvas.rect(ocr_par.llx, ocr_par.lly, ocr_par.width, ocr_par.height, fill=0)
for elem in self.hocr.iterfind(self._child_xpath('p', 'ocr_par')):
elemtxt = self._get_element_text(elem).strip()
if len(elemtxt) == 0:
continue
ocr_par = self.element_coordinates(elem)
canvas.rect(
ocr_par.llx, ocr_par.lly, ocr_par.width, ocr_par.height, fill=0
)
def _debug_draw_line_bbox(self, canvas: Canvas, line_box, color=BLUE):
def _debug_draw_line_bbox(self, canvas: Canvas, line_box: Rectangle, color=BLUE):
"""Render the bounding box of a text line."""
if not self.render_options.render_line_bbox: # pragma: no cover
return
canvas.push()
canvas.set_stroke_color(color)
canvas.set_line_width(0.15)
canvas.rect(
line_box.llx,
line_box.lly,
line_box.width,
line_box.height,
fill=0,
)
canvas.pop()
with canvas.enter_context():
canvas.set_stroke_color(color).set_line_width(0.15).rect(
line_box.llx, line_box.lly, line_box.width, line_box.height, fill=0
)
def _debug_draw_word_triangle(self, canvas: Canvas, box, color=RED, line_width=0.1):
def _debug_draw_word_triangle(
self, canvas: Canvas, box: Rectangle, color=RED, line_width=0.1
):
"""Render a triangle that conveys word height and drawing direction."""
if not self.render_options.render_triangle: # pragma: no cover
return
canvas.push()
canvas.set_stroke_color(color)
canvas.set_line_width(line_width)
# Draw a triangle that conveys word height and drawing direction
canvas.line(box.llx, box.lly, box.urx, box.lly) # across bottom
canvas.line(box.urx, box.lly, box.llx, box.ury) # diagonal
canvas.line(box.llx, box.lly, box.llx, box.ury) # rise
canvas.pop()
with canvas.enter_context():
canvas.set_stroke_color(color).set_line_width(line_width).line(
box.llx, box.lly, box.urx, box.lly
).line(box.urx, box.lly, box.llx, box.ury).line(
box.llx, box.lly, box.llx, box.ury
)
def _debug_draw_word_bbox(self, canvas: Canvas, box, color=GREEN, line_width=0.1):
def _debug_draw_word_bbox(
self, canvas: Canvas, box: Rectangle, color=GREEN, line_width=0.1
):
"""Render a box depicting the word."""
if not self.render_options.render_word_bbox: # pragma: no cover
return
canvas.push()
canvas.set_dashes()
canvas.set_stroke_color(color)
canvas.set_line_width(line_width)
canvas.rect(box.llx, box.lly, box.width, box.height, fill=0)
canvas.pop()
with canvas.enter_context():
canvas.set_stroke_color(color).set_line_width(line_width).rect(
box.llx, box.lly, box.width, box.height, fill=0
)
def _debug_draw_space_bbox(
self, canvas: Canvas, box, color=DARKGREEN, line_width=0.1
self, canvas: Canvas, box: Rectangle, color=DARKGREEN, line_width=0.1
):
"""Render a box depicting the space between two words."""
if not self.render_options.render_space_bbox: # pragma: no cover
return
canvas.push()
canvas.set_dashes()
canvas.set_fill_color(color)
canvas.set_line_width(line_width)
canvas.rect(box.llx, box.lly, box.width, box.height, fill=1)
canvas.pop()
with canvas.enter_context():
canvas.set_fill_color(color).set_line_width(line_width).rect(
box.llx, box.lly, box.width, box.height, fill=1
)
def _debug_draw_baseline(
self, canvas, line_box, baseline_lly, color=MAGENTA, line_width=0.25
self,
canvas: Canvas,
line_box: Rectangle,
baseline_lly,
color=MAGENTA,
line_width=0.25,
):
"""Render the text baseline."""
if not self.render_options.render_baseline:
return
canvas.push()
canvas.set_dashes()
canvas.set_stroke_color(color)
canvas.set_line_width(line_width)
canvas.line(
line_box.llx,
baseline_lly,
line_box.urx,
baseline_lly,
)
canvas.pop()
with canvas.enter_context():
canvas.set_stroke_color(color).set_line_width(line_width).line(
line_box.llx,
baseline_lly,
line_box.urx,
baseline_lly,
)