mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-05-16 18:45:51 -04:00
Improve canvas interface with chaining
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import unicodedata
|
||||
from contextlib import contextmanager
|
||||
from importlib.resources import files as package_files
|
||||
from pathlib import Path
|
||||
|
||||
@@ -14,6 +16,8 @@ from pikepdf import (
|
||||
unparse_content_stream,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
GLYPHLESS_FONT_NAME = 'pdf.ttf'
|
||||
|
||||
GLYPHLESS_FONT = (package_files('ocrmypdf.data') / GLYPHLESS_FONT_NAME).read_bytes()
|
||||
@@ -250,23 +254,28 @@ class PikepdfCanvas:
|
||||
self._pdf = Pdf.new()
|
||||
self._page = self._pdf.add_blank_page(page_size=page_size)
|
||||
self._cs = ContentStreamBuilder()
|
||||
self._cs.push()
|
||||
self._stack_depth = 0
|
||||
self.push()
|
||||
self._font_name = Name("/f-0-0")
|
||||
|
||||
def set_stroke_color(self, color):
|
||||
r, g, b = color.red, color.green, color.blue
|
||||
self._cs.set_stroke_color(r, g, b)
|
||||
return self
|
||||
|
||||
def set_fill_color(self, color):
|
||||
r, g, b = color.red, color.green, color.blue
|
||||
self._cs.set_fill_color(r, g, b)
|
||||
return self
|
||||
|
||||
def set_line_width(self, width):
|
||||
self._cs.set_line_width(width)
|
||||
return self
|
||||
|
||||
def line(self, x1, y1, x2, y2):
|
||||
self._cs.line(x1, y1, x2, y2)
|
||||
self._cs.stroke_and_close()
|
||||
return self
|
||||
|
||||
def rect(self, x, y, w, h, fill):
|
||||
self._cs.append_rectangle(x, y, w, h)
|
||||
@@ -274,12 +283,13 @@ class PikepdfCanvas:
|
||||
self._cs.fill()
|
||||
else:
|
||||
self._cs.stroke_and_close()
|
||||
return self
|
||||
|
||||
def begin_text(self, x=0, y=0, direction=None):
|
||||
return PikepdfText(x, y, direction)
|
||||
|
||||
def draw_text(self, text: PikepdfText):
|
||||
self._cs._instructions.extend(text._cs._instructions)
|
||||
self._cs._instructions.extend(text._cs.build())
|
||||
self._end_text()
|
||||
|
||||
def _end_text(self):
|
||||
@@ -294,18 +304,36 @@ class PikepdfCanvas:
|
||||
|
||||
def set_dashes(self, *args):
|
||||
self._cs.set_dashes(*args)
|
||||
return self
|
||||
|
||||
def push(self):
|
||||
self._cs.push()
|
||||
self._stack_depth += 1
|
||||
return self
|
||||
|
||||
def pop(self):
|
||||
self._cs.pop()
|
||||
self._stack_depth -= 1
|
||||
return self
|
||||
|
||||
@contextmanager
|
||||
def enter_context(self):
|
||||
"""Save the graphics state and restore it on exit."""
|
||||
self.push()
|
||||
yield self
|
||||
self.pop()
|
||||
|
||||
def cm(self, matrix):
|
||||
self._cs.cm(matrix)
|
||||
return self
|
||||
|
||||
def save(self):
|
||||
self._cs.pop()
|
||||
if self._stack_depth != 0:
|
||||
log.warning(
|
||||
"Graphics state stack is not empty when page saved - "
|
||||
"rendering may be incorrect"
|
||||
)
|
||||
self._page.Contents = self._pdf.make_stream(
|
||||
unparse_content_stream(self._cs.build())
|
||||
)
|
||||
@@ -322,22 +350,28 @@ class PikepdfText:
|
||||
|
||||
def set_font(self, font, size):
|
||||
self._cs.set_text_font(Name("/f-0-0"), size)
|
||||
return self
|
||||
|
||||
def set_render_mode(self, mode):
|
||||
self._cs.set_text_rendering(mode)
|
||||
return self
|
||||
|
||||
def set_text_transform(self, matrix: Matrix):
|
||||
self._cs.set_text_matrix(matrix)
|
||||
self._p0 = (matrix.e, matrix.f)
|
||||
return self
|
||||
|
||||
def show(self, text):
|
||||
self._cs.show_text(text)
|
||||
return self
|
||||
|
||||
def set_horiz_scale(self, scale):
|
||||
self._cs.set_text_horizontal_scaling(scale)
|
||||
return self
|
||||
|
||||
def get_start_of_line(self):
|
||||
return self._p0
|
||||
|
||||
def move_cursor(self, x, y):
|
||||
self._cs.move_cursor(x, y)
|
||||
return self
|
||||
|
||||
@@ -351,83 +351,78 @@ class HocrTransform:
|
||||
"""Draw boxes around paragraphs in the document."""
|
||||
if not self.render_options.render_paragraph_bbox: # pragma: no cover
|
||||
return
|
||||
for elem in self.hocr.iterfind(self._child_xpath('p', 'ocr_par')):
|
||||
elemtxt = self._get_element_text(elem).rstrip()
|
||||
if len(elemtxt) == 0:
|
||||
continue
|
||||
ocr_par = self.element_coordinates(elem)
|
||||
with canvas.enter_context():
|
||||
# draw box around paragraph
|
||||
canvas.set_stroke_color(color)
|
||||
canvas.set_line_width(0.1) # no line for bounding box
|
||||
canvas.rect(ocr_par.llx, ocr_par.lly, ocr_par.width, ocr_par.height, fill=0)
|
||||
for elem in self.hocr.iterfind(self._child_xpath('p', 'ocr_par')):
|
||||
elemtxt = self._get_element_text(elem).strip()
|
||||
if len(elemtxt) == 0:
|
||||
continue
|
||||
ocr_par = self.element_coordinates(elem)
|
||||
canvas.rect(
|
||||
ocr_par.llx, ocr_par.lly, ocr_par.width, ocr_par.height, fill=0
|
||||
)
|
||||
|
||||
def _debug_draw_line_bbox(self, canvas: Canvas, line_box, color=BLUE):
|
||||
def _debug_draw_line_bbox(self, canvas: Canvas, line_box: Rectangle, color=BLUE):
|
||||
"""Render the bounding box of a text line."""
|
||||
if not self.render_options.render_line_bbox: # pragma: no cover
|
||||
return
|
||||
canvas.push()
|
||||
canvas.set_stroke_color(color)
|
||||
canvas.set_line_width(0.15)
|
||||
canvas.rect(
|
||||
line_box.llx,
|
||||
line_box.lly,
|
||||
line_box.width,
|
||||
line_box.height,
|
||||
fill=0,
|
||||
)
|
||||
canvas.pop()
|
||||
with canvas.enter_context():
|
||||
canvas.set_stroke_color(color).set_line_width(0.15).rect(
|
||||
line_box.llx, line_box.lly, line_box.width, line_box.height, fill=0
|
||||
)
|
||||
|
||||
def _debug_draw_word_triangle(self, canvas: Canvas, box, color=RED, line_width=0.1):
|
||||
def _debug_draw_word_triangle(
|
||||
self, canvas: Canvas, box: Rectangle, color=RED, line_width=0.1
|
||||
):
|
||||
"""Render a triangle that conveys word height and drawing direction."""
|
||||
if not self.render_options.render_triangle: # pragma: no cover
|
||||
return
|
||||
canvas.push()
|
||||
canvas.set_stroke_color(color)
|
||||
canvas.set_line_width(line_width)
|
||||
# Draw a triangle that conveys word height and drawing direction
|
||||
canvas.line(box.llx, box.lly, box.urx, box.lly) # across bottom
|
||||
canvas.line(box.urx, box.lly, box.llx, box.ury) # diagonal
|
||||
canvas.line(box.llx, box.lly, box.llx, box.ury) # rise
|
||||
canvas.pop()
|
||||
with canvas.enter_context():
|
||||
canvas.set_stroke_color(color).set_line_width(line_width).line(
|
||||
box.llx, box.lly, box.urx, box.lly
|
||||
).line(box.urx, box.lly, box.llx, box.ury).line(
|
||||
box.llx, box.lly, box.llx, box.ury
|
||||
)
|
||||
|
||||
def _debug_draw_word_bbox(self, canvas: Canvas, box, color=GREEN, line_width=0.1):
|
||||
def _debug_draw_word_bbox(
|
||||
self, canvas: Canvas, box: Rectangle, color=GREEN, line_width=0.1
|
||||
):
|
||||
"""Render a box depicting the word."""
|
||||
if not self.render_options.render_word_bbox: # pragma: no cover
|
||||
return
|
||||
canvas.push()
|
||||
canvas.set_dashes()
|
||||
canvas.set_stroke_color(color)
|
||||
canvas.set_line_width(line_width)
|
||||
canvas.rect(box.llx, box.lly, box.width, box.height, fill=0)
|
||||
canvas.pop()
|
||||
with canvas.enter_context():
|
||||
canvas.set_stroke_color(color).set_line_width(line_width).rect(
|
||||
box.llx, box.lly, box.width, box.height, fill=0
|
||||
)
|
||||
|
||||
def _debug_draw_space_bbox(
|
||||
self, canvas: Canvas, box, color=DARKGREEN, line_width=0.1
|
||||
self, canvas: Canvas, box: Rectangle, color=DARKGREEN, line_width=0.1
|
||||
):
|
||||
"""Render a box depicting the space between two words."""
|
||||
if not self.render_options.render_space_bbox: # pragma: no cover
|
||||
return
|
||||
canvas.push()
|
||||
canvas.set_dashes()
|
||||
canvas.set_fill_color(color)
|
||||
canvas.set_line_width(line_width)
|
||||
canvas.rect(box.llx, box.lly, box.width, box.height, fill=1)
|
||||
canvas.pop()
|
||||
with canvas.enter_context():
|
||||
canvas.set_fill_color(color).set_line_width(line_width).rect(
|
||||
box.llx, box.lly, box.width, box.height, fill=1
|
||||
)
|
||||
|
||||
def _debug_draw_baseline(
|
||||
self, canvas, line_box, baseline_lly, color=MAGENTA, line_width=0.25
|
||||
self,
|
||||
canvas: Canvas,
|
||||
line_box: Rectangle,
|
||||
baseline_lly,
|
||||
color=MAGENTA,
|
||||
line_width=0.25,
|
||||
):
|
||||
"""Render the text baseline."""
|
||||
if not self.render_options.render_baseline:
|
||||
return
|
||||
canvas.push()
|
||||
canvas.set_dashes()
|
||||
canvas.set_stroke_color(color)
|
||||
canvas.set_line_width(line_width)
|
||||
canvas.line(
|
||||
line_box.llx,
|
||||
baseline_lly,
|
||||
line_box.urx,
|
||||
baseline_lly,
|
||||
)
|
||||
canvas.pop()
|
||||
with canvas.enter_context():
|
||||
canvas.set_stroke_color(color).set_line_width(line_width).line(
|
||||
line_box.llx,
|
||||
baseline_lly,
|
||||
line_box.urx,
|
||||
baseline_lly,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user