Fix placement of spaces in debug mode

This commit is contained in:
James R. Barlow
2023-11-20 22:44:12 -08:00
parent 6c6aca2f1e
commit 729c7febd9
2 changed files with 20 additions and 22 deletions

View File

@@ -33,8 +33,8 @@ CHAR_ASPECT = 2
class TextDirection(Enum):
LTR = ...
RTL = ...
LTR = 1
RTL = 2
def register_glyphlessfont(pdf: Pdf):

View File

@@ -333,26 +333,24 @@ class HocrTransform:
hocr_next_box = (
self.element_coordinates(next_elem) if next_elem is not None else None
)
if hocr_next_box is not None:
# Render a space this word and the next word. The explicit space helps
# PDF viewers identify the word break, and horizontally scaling it to
# occupy the space the between the words helps the PDF viewer
# avoid combiningthewordstogether.
next_box = line_matrix.inverse().transform(hocr_next_box)
if text_direction == TextDirection.LTR:
space_box = Rectangle(box.urx, box.lly, next_box.llx, next_box.ury)
self._debug_draw_space_bbox(canvas, space_box)
text.set_text_transform(Matrix(1, 0, 0, 1, space_box.llx, 0))
space_width = canvas.string_width(' ', fontname, fontsize)
space_box_width = space_box.urx - space_box.llx
elif text_direction == TextDirection.RTL:
space_box = Rectangle(next_box.urx, box.lly, box.llx, next_box.ury)
self._debug_draw_space_bbox(canvas, space_box)
text.set_text_transform(Matrix(1, 0, 0, 1, space_box.llx, 0))
space_width = canvas.string_width(' ', fontname, fontsize)
space_box_width = space_box.ury - space_box.lly
text.set_horiz_scale(100 * space_box_width / space_width)
text.show(' ')
if hocr_next_box is None:
return
# Render a space this word and the next word. The explicit space helps
# PDF viewers identify the word break, and horizontally scaling it to
# occupy the space the between the words helps the PDF viewer
# avoid combiningthewordstogether.
next_box = line_matrix.inverse().transform(hocr_next_box)
if text_direction == TextDirection.LTR:
space_box = Rectangle(box.urx, box.lly, next_box.llx, next_box.ury)
self._debug_draw_space_bbox(canvas, space_box)
text.set_text_transform(Matrix(1, 0, 0, 1, space_box.llx, 0))
elif text_direction == TextDirection.RTL:
space_box = Rectangle(next_box.urx, box.lly, box.llx, next_box.ury)
self._debug_draw_space_bbox(canvas, space_box)
text.set_text_transform(Matrix(1, 0, 0, 1, space_box.llx, 0))
space_width = canvas.string_width(' ', fontname, fontsize)
text.set_horiz_scale(100 * space_box.width / space_width)
text.show(' ')
def _debug_draw_paragraph_boxes(self, canvas: Canvas, color=CYAN):
"""Draw boxes around paragraphs in the document."""