Add --oversample test for hocr rendering

This commit is contained in:
James R. Barlow
2015-07-27 17:18:02 -07:00
parent 7bcd48c269
commit b40eec4cb0
4 changed files with 162 additions and 360 deletions

View File

@@ -1,230 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.38.0 (20140413.2041)
-->
<!-- Title: Pipeline: Pages: 1 -->
<svg width="752pt" height="651pt"
viewBox="0.00 0.00 752.00 650.53" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 646.53)">
<title>Pipeline:</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-646.53 748,-646.53 748,4 -4,4"/>
<g id="clust1" class="cluster"><title>clustertasks</title>
<polygon fill="none" stroke="black" points="8,-8 8,-634.53 736,-634.53 736,-8 8,-8"/>
<text text-anchor="middle" x="372" y="-606.53" font-family="Times,serif" font-size="30.00" fill="#ff3232">Pipeline:</text>
</g>
<!-- t0 -->
<g id="node1" class="node"><title>t0</title>
<polygon fill="#efa03b" stroke="#006000" points="489.791,-588.53 394.209,-588.53 390.209,-584.53 390.209,-552.53 485.791,-552.53 489.791,-556.53 489.791,-588.53"/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 390.209,-584.53 "/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 485.791,-552.53 "/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 489.791,-588.53 "/>
<text text-anchor="middle" x="440" y="-564.53" font-family="Times,serif" font-size="20.00" fill="#006000">repair_pdf</text>
</g>
<!-- t1 -->
<g id="node2" class="node"><title>t1</title>
<polygon fill="#efa03b" stroke="black" points="474.782,-509.564 382,-526.497 289.218,-509.564 289.304,-482.165 474.696,-482.165 474.782,-509.564"/>
<polygon fill="none" stroke="black" points="478.799,-512.902 382,-530.569 285.201,-512.902 285.311,-478.159 478.689,-478.159 478.799,-512.902"/>
<text text-anchor="middle" x="382" y="-495.991" font-family="Times,serif" font-size="20.00">split_pages</text>
</g>
<!-- t0&#45;&gt;t1 -->
<g id="edge1" class="edge"><title>t0&#45;&gt;t1</title>
<path fill="none" stroke="gray" d="M425.064,-552.394C420.296,-546.925 414.86,-540.689 409.493,-534.532"/>
<polygon fill="gray" stroke="gray" points="411.996,-532.077 402.787,-526.838 406.719,-536.676 411.996,-532.077"/>
</g>
<!-- t10 -->
<g id="node12" class="node"><title>t10</title>
<polygon fill="#efa03b" stroke="#006000" points="728.338,-451.452 517.662,-451.452 513.662,-447.452 513.662,-415.452 724.338,-415.452 728.338,-419.452 728.338,-451.452"/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 513.662,-447.452 "/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 724.338,-415.452 "/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 728.338,-451.452 "/>
<text text-anchor="middle" x="621" y="-427.452" font-family="Times,serif" font-size="20.00" fill="#006000">generate_postscript_stub</text>
</g>
<!-- t0&#45;&gt;t10 -->
<g id="edge15" class="edge"><title>t0&#45;&gt;t10</title>
<path fill="none" stroke="gray" d="M462.924,-552.422C495.068,-528.433 553.637,-484.724 589.75,-457.773"/>
<polygon fill="gray" stroke="gray" points="592.056,-460.42 597.977,-451.634 587.87,-454.81 592.056,-460.42"/>
</g>
<!-- t2 -->
<g id="node3" class="node"><title>t2</title>
<polygon fill="#efa03b" stroke="black" points="495.555,-451.452 272.445,-451.452 268.445,-447.452 268.445,-415.452 491.555,-415.452 495.555,-419.452 495.555,-451.452"/>
<polyline fill="none" stroke="black" points="491.555,-447.452 268.445,-447.452 "/>
<polyline fill="none" stroke="black" points="491.555,-447.452 491.555,-415.452 "/>
<polyline fill="none" stroke="black" points="491.555,-447.452 495.555,-451.452 "/>
<text text-anchor="middle" x="382" y="-427.452" font-family="Times,serif" font-size="20.00">rasterize_with_ghostscript</text>
</g>
<!-- t1&#45;&gt;t2 -->
<g id="edge2" class="edge"><title>t1&#45;&gt;t2</title>
<path fill="none" stroke="#0044a0" d="M382,-478.092C382,-472.777 382,-467.07 382,-461.593"/>
<polygon fill="#0044a0" stroke="#0044a0" points="385.5,-461.506 382,-451.506 378.5,-461.506 385.5,-461.506"/>
</g>
<!-- t11 -->
<g id="node10" class="node"><title>t11</title>
<polygon fill="#efa03b" stroke="black" points="159.594,-393.452 66.4062,-393.452 62.4062,-389.452 62.4062,-357.452 155.594,-357.452 159.594,-361.452 159.594,-393.452"/>
<polyline fill="none" stroke="black" points="155.594,-389.452 62.4062,-389.452 "/>
<polyline fill="none" stroke="black" points="155.594,-389.452 155.594,-357.452 "/>
<polyline fill="none" stroke="black" points="155.594,-389.452 159.594,-393.452 "/>
<text text-anchor="middle" x="111" y="-369.452" font-family="Times,serif" font-size="20.00">skip_page</text>
</g>
<!-- t1&#45;&gt;t11 -->
<g id="edge13" class="edge"><title>t1&#45;&gt;t11</title>
<path fill="none" stroke="#0044a0" d="M322.128,-478.077C301.919,-470.026 279.335,-460.679 259,-451.452 222.194,-434.751 181.273,-413.827 151.805,-398.326"/>
<polygon fill="#0044a0" stroke="#0044a0" points="153.137,-395.071 142.66,-393.496 149.868,-401.261 153.137,-395.071"/>
</g>
<!-- t3 -->
<g id="node4" class="node"><title>t3</title>
<polygon fill="#efa03b" stroke="black" points="467.999,-393.452 300.001,-393.452 296.001,-389.452 296.001,-357.452 463.999,-357.452 467.999,-361.452 467.999,-393.452"/>
<polyline fill="none" stroke="black" points="463.999,-389.452 296.001,-389.452 "/>
<polyline fill="none" stroke="black" points="463.999,-389.452 463.999,-357.452 "/>
<polyline fill="none" stroke="black" points="463.999,-389.452 467.999,-393.452 "/>
<text text-anchor="middle" x="382" y="-369.452" font-family="Times,serif" font-size="20.00">preprocess_deskew</text>
</g>
<!-- t2&#45;&gt;t3 -->
<g id="edge3" class="edge"><title>t2&#45;&gt;t3</title>
<path fill="none" stroke="#0044a0" d="M382,-415.346C382,-411.655 382,-407.665 382,-403.695"/>
<polygon fill="#0044a0" stroke="#0044a0" points="385.5,-403.453 382,-393.453 378.5,-403.453 385.5,-403.453"/>
</g>
<!-- t6 -->
<g id="node6" class="node"><title>t6</title>
<polygon fill="#efa03b" stroke="black" points="634.375,-277.452 447.625,-277.452 443.625,-273.452 443.625,-241.452 630.375,-241.452 634.375,-245.452 634.375,-277.452"/>
<polyline fill="none" stroke="black" points="630.375,-273.452 443.625,-273.452 "/>
<polyline fill="none" stroke="black" points="630.375,-273.452 630.375,-241.452 "/>
<polyline fill="none" stroke="black" points="630.375,-273.452 634.375,-277.452 "/>
<text text-anchor="middle" x="539" y="-253.452" font-family="Times,serif" font-size="20.00">select_image_for_pdf</text>
</g>
<!-- t2&#45;&gt;t6 -->
<g id="edge7" class="edge"><title>t2&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M442.809,-415.378C455.168,-409.854 467.327,-402.684 477,-393.452 507.53,-364.315 524.51,-317.195 532.692,-287.567"/>
<polygon fill="#0044a0" stroke="#0044a0" points="536.178,-288.068 535.317,-277.508 529.405,-286.3 536.178,-288.068"/>
</g>
<!-- t4 -->
<g id="node5" class="node"><title>t4</title>
<polygon fill="#efa03b" stroke="black" points="439.705,-335.452 290.295,-335.452 286.295,-331.452 286.295,-299.452 435.705,-299.452 439.705,-303.452 439.705,-335.452"/>
<polyline fill="none" stroke="black" points="435.705,-331.452 286.295,-331.452 "/>
<polyline fill="none" stroke="black" points="435.705,-331.452 435.705,-299.452 "/>
<polyline fill="none" stroke="black" points="435.705,-331.452 439.705,-335.452 "/>
<text text-anchor="middle" x="363" y="-311.452" font-family="Times,serif" font-size="20.00">preprocess_clean</text>
</g>
<!-- t3&#45;&gt;t4 -->
<g id="edge4" class="edge"><title>t3&#45;&gt;t4</title>
<path fill="none" stroke="#0044a0" d="M376.196,-357.346C374.87,-353.438 373.43,-349.194 372.006,-344.995"/>
<polygon fill="#0044a0" stroke="#0044a0" points="375.296,-343.799 368.768,-335.453 368.667,-346.048 375.296,-343.799"/>
</g>
<!-- t3&#45;&gt;t6 -->
<g id="edge6" class="edge"><title>t3&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M413.697,-357.449C425.088,-350.965 437.865,-343.25 449,-335.452 471.538,-319.668 495.388,-299.622 512.871,-284.226"/>
<polygon fill="#0044a0" stroke="#0044a0" points="515.277,-286.77 520.427,-277.512 510.627,-281.538 515.277,-286.77"/>
</g>
<!-- t4&#45;&gt;t6 -->
<g id="edge5" class="edge"><title>t4&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M416.762,-299.346C435.442,-293.402 456.56,-286.683 475.923,-280.522"/>
<polygon fill="#0044a0" stroke="#0044a0" points="477.099,-283.821 485.567,-277.453 474.977,-277.15 477.099,-283.821"/>
</g>
<!-- t5 -->
<g id="node7" class="node"><title>t5</title>
<polygon fill="#efa03b" stroke="black" points="425.922,-277.452 264.078,-277.452 260.078,-273.452 260.078,-241.452 421.922,-241.452 425.922,-245.452 425.922,-277.452"/>
<polyline fill="none" stroke="black" points="421.922,-273.452 260.078,-273.452 "/>
<polyline fill="none" stroke="black" points="421.922,-273.452 421.922,-241.452 "/>
<polyline fill="none" stroke="black" points="421.922,-273.452 425.922,-277.452 "/>
<text text-anchor="middle" x="343" y="-253.452" font-family="Times,serif" font-size="20.00">ocr_tesseract_hocr</text>
</g>
<!-- t4&#45;&gt;t5 -->
<g id="edge8" class="edge"><title>t4&#45;&gt;t5</title>
<path fill="none" stroke="#0044a0" d="M356.891,-299.346C355.495,-295.438 353.979,-291.194 352.48,-286.995"/>
<polygon fill="#0044a0" stroke="#0044a0" points="355.731,-285.694 349.072,-277.453 349.139,-288.048 355.731,-285.694"/>
</g>
<!-- t9 -->
<g id="node11" class="node"><title>t9</title>
<polygon fill="#efa03b" stroke="black" points="292.496,-219.452 39.5039,-219.452 35.5039,-215.452 35.5039,-183.452 288.496,-183.452 292.496,-187.452 292.496,-219.452"/>
<polyline fill="none" stroke="black" points="288.496,-215.452 35.5039,-215.452 "/>
<polyline fill="none" stroke="black" points="288.496,-215.452 288.496,-183.452 "/>
<polyline fill="none" stroke="black" points="288.496,-215.452 292.496,-219.452 "/>
<text text-anchor="middle" x="164" y="-195.452" font-family="Times,serif" font-size="20.00">tesseract_ocr_and_render_pdf</text>
</g>
<!-- t4&#45;&gt;t9 -->
<g id="edge14" class="edge"><title>t4&#45;&gt;t9</title>
<path fill="none" stroke="#0044a0" d="M300.717,-299.405C284.01,-293.605 266.338,-286.3 251,-277.452 227.033,-263.625 203.489,-242.755 186.95,-226.538"/>
<polygon fill="#0044a0" stroke="#0044a0" points="189.414,-224.052 179.868,-219.456 184.465,-229.002 189.414,-224.052"/>
</g>
<!-- t7 -->
<g id="node8" class="node"><title>t7</title>
<polygon fill="#efa03b" stroke="black" points="708.366,-219.452 551.634,-219.452 547.634,-215.452 547.634,-183.452 704.366,-183.452 708.366,-187.452 708.366,-219.452"/>
<polyline fill="none" stroke="black" points="704.366,-215.452 547.634,-215.452 "/>
<polyline fill="none" stroke="black" points="704.366,-215.452 704.366,-183.452 "/>
<polyline fill="none" stroke="black" points="704.366,-215.452 708.366,-219.452 "/>
<text text-anchor="middle" x="628" y="-195.452" font-family="Times,serif" font-size="20.00">render_hocr_page</text>
</g>
<!-- t6&#45;&gt;t7 -->
<g id="edge9" class="edge"><title>t6&#45;&gt;t7</title>
<path fill="none" stroke="#0044a0" d="M566.187,-241.346C574.468,-236.135 583.697,-230.328 592.445,-224.824"/>
<polygon fill="#0044a0" stroke="#0044a0" points="594.38,-227.741 600.98,-219.453 590.652,-221.817 594.38,-227.741"/>
</g>
<!-- t8 -->
<g id="node9" class="node"><title>t8</title>
<polygon fill="#efa03b" stroke="black" points="529.742,-219.452 314.258,-219.452 310.258,-215.452 310.258,-183.452 525.742,-183.452 529.742,-187.452 529.742,-219.452"/>
<polyline fill="none" stroke="black" points="525.742,-215.452 310.258,-215.452 "/>
<polyline fill="none" stroke="black" points="525.742,-215.452 525.742,-183.452 "/>
<polyline fill="none" stroke="black" points="525.742,-215.452 529.742,-219.452 "/>
<text text-anchor="middle" x="420" y="-195.452" font-family="Times,serif" font-size="20.00">render_hocr_debug_page</text>
</g>
<!-- t6&#45;&gt;t8 -->
<g id="edge11" class="edge"><title>t6&#45;&gt;t8</title>
<path fill="none" stroke="#0044a0" d="M502.649,-241.346C490.885,-235.809 477.689,-229.6 465.354,-223.795"/>
<polygon fill="#0044a0" stroke="#0044a0" points="466.666,-220.544 456.128,-219.453 463.686,-226.878 466.666,-220.544"/>
</g>
<!-- t5&#45;&gt;t7 -->
<g id="edge10" class="edge"><title>t5&#45;&gt;t7</title>
<path fill="none" stroke="#0044a0" d="M426.2,-242.104C461.192,-235.228 501.99,-227.212 537.71,-220.193"/>
<polygon fill="#0044a0" stroke="#0044a0" points="538.503,-223.604 547.641,-218.242 537.154,-216.736 538.503,-223.604"/>
</g>
<!-- t5&#45;&gt;t8 -->
<g id="edge12" class="edge"><title>t5&#45;&gt;t8</title>
<path fill="none" stroke="#0044a0" d="M366.521,-241.346C373.387,-236.352 381.007,-230.811 388.291,-225.513"/>
<polygon fill="#0044a0" stroke="#0044a0" points="390.594,-228.166 396.623,-219.453 386.477,-222.504 390.594,-228.166"/>
</g>
<!-- t12 -->
<g id="node13" class="node"><title>t12</title>
<polygon fill="#efa03b" stroke="black" points="339.845,-105.456 420,-78.0208 500.155,-105.456 500.08,-149.848 339.92,-149.848 339.845,-105.456"/>
<polygon fill="none" stroke="black" points="335.836,-102.581 420,-73.7729 504.164,-102.581 504.078,-153.869 335.922,-153.869 335.836,-102.581"/>
<text text-anchor="middle" x="420" y="-111.726" font-family="Times,serif" font-size="20.00">merge_pages</text>
</g>
<!-- t7&#45;&gt;t12 -->
<g id="edge19" class="edge"><title>t7&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M584.65,-183.419C564.063,-175.33 538.602,-165.326 513.908,-155.624"/>
<polygon fill="#0044a0" stroke="#0044a0" points="515.081,-152.324 504.493,-151.924 512.521,-158.839 515.081,-152.324"/>
</g>
<!-- t8&#45;&gt;t12 -->
<g id="edge18" class="edge"><title>t8&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M420,-183.12C420,-177.585 420,-171.177 420,-164.592"/>
<polygon fill="#0044a0" stroke="#0044a0" points="423.5,-164.201 420,-154.201 416.5,-164.201 423.5,-164.201"/>
</g>
<!-- t11&#45;&gt;t12 -->
<g id="edge16" class="edge"><title>t11&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M92.3128,-357.362C55.4572,-321.636 -19.1794,-237.859 27,-183.452 64.5123,-139.256 221.468,-125.188 325.317,-120.741"/>
<polygon fill="#0044a0" stroke="#0044a0" points="325.658,-124.23 335.508,-120.328 325.375,-117.236 325.658,-124.23"/>
</g>
<!-- t9&#45;&gt;t12 -->
<g id="edge17" class="edge"><title>t9&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M217.354,-183.419C248.764,-173.392 289.391,-160.422 326.012,-148.731"/>
<polygon fill="#0044a0" stroke="#0044a0" points="327.263,-152.006 335.725,-145.63 325.134,-145.337 327.263,-152.006"/>
</g>
<!-- t10&#45;&gt;t12 -->
<g id="edge21" class="edge"><title>t10&#45;&gt;t12</title>
<path fill="none" stroke="gray" d="M653.379,-415.337C684.552,-396.255 727,-362.233 727,-318.452 727,-318.452 727,-318.452 727,-258.452 727,-224.824 739.784,-208.186 717,-183.452 689.882,-154.013 590.624,-136.736 514.239,-127.544"/>
<polygon fill="gray" stroke="gray" points="514.437,-124.044 504.097,-126.355 513.621,-130.996 514.437,-124.044"/>
</g>
<!-- t13 -->
<g id="node14" class="node"><title>t13</title>
<polygon fill="#efa03b" stroke="black" points="482.338,-52 361.662,-52 357.662,-48 357.662,-16 478.338,-16 482.338,-20 482.338,-52"/>
<polyline fill="none" stroke="black" points="478.338,-48 357.662,-48 "/>
<polyline fill="none" stroke="black" points="478.338,-48 478.338,-16 "/>
<polyline fill="none" stroke="black" points="478.338,-48 482.338,-52 "/>
<text text-anchor="middle" x="420" y="-28" font-family="Times,serif" font-size="20.00">validate_pdfa</text>
</g>
<!-- t12&#45;&gt;t13 -->
<g id="edge20" class="edge"><title>t12&#45;&gt;t13</title>
<path fill="none" stroke="#0044a0" d="M420,-73.9482C420,-69.9654 420,-66.007 420,-62.2247"/>
<polygon fill="#0044a0" stroke="#0044a0" points="423.5,-62.1573 420,-52.1573 416.5,-62.1574 423.5,-62.1573"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -162,6 +162,8 @@ if _argv[0].startswith('python'):
_argv = _argv[1:]
if _argv[0].endswith('.py'):
_argv = _argv[1:]
if _argv[0].startswith('ocrmypdf'):
_argv = _argv[1:]
options = parser.parse_args(_argv)
@@ -576,7 +578,7 @@ def render_hocr_page(
image = next(ii for ii in infiles if ii.endswith('.image'))
pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
dpi = round(max(pageinfo['xres'], pageinfo['yres']))
dpi = round(max(pageinfo['xres'], pageinfo['yres'], options.oversample))
hocrtransform = HocrTransform(hocr, dpi)
hocrtransform.to_pdf(output_file, imageFileName=image,
@@ -600,7 +602,7 @@ def render_hocr_debug_page(
image = next(ii for ii in infiles if ii.endswith('.image'))
pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
dpi = round(max(pageinfo['xres'], pageinfo['yres']))
dpi = round(max(pageinfo['xres'], pageinfo['yres'], options.oversample))
hocrtransform = HocrTransform(hocr, dpi)
hocrtransform.to_pdf(output_file, imageFileName=None,

View File

@@ -4,209 +4,227 @@
<!-- Generated by graphviz version 2.38.0 (20140413.2041)
-->
<!-- Title: Pipeline: Pages: 1 -->
<svg width="520pt" height="651pt"
viewBox="0.00 0.00 520.00 650.53" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<svg width="752pt" height="651pt"
viewBox="0.00 0.00 752.00 650.53" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 646.53)">
<title>Pipeline:</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-646.53 516,-646.53 516,4 -4,4"/>
<polygon fill="white" stroke="none" points="-4,4 -4,-646.53 748,-646.53 748,4 -4,4"/>
<g id="clust1" class="cluster"><title>clustertasks</title>
<polygon fill="none" stroke="black" points="8,-8 8,-634.53 504,-634.53 504,-8 8,-8"/>
<text text-anchor="middle" x="256" y="-606.53" font-family="Times,serif" font-size="30.00" fill="#ff3232">Pipeline:</text>
<polygon fill="none" stroke="black" points="8,-8 8,-634.53 736,-634.53 736,-8 8,-8"/>
<text text-anchor="middle" x="372" y="-606.53" font-family="Times,serif" font-size="30.00" fill="#ff3232">Pipeline:</text>
</g>
<!-- t0 -->
<g id="node1" class="node"><title>t0</title>
<polygon fill="#efa03b" stroke="#006000" points="257.791,-588.53 162.209,-588.53 158.209,-584.53 158.209,-552.53 253.791,-552.53 257.791,-556.53 257.791,-588.53"/>
<polyline fill="none" stroke="#006000" points="253.791,-584.53 158.209,-584.53 "/>
<polyline fill="none" stroke="#006000" points="253.791,-584.53 253.791,-552.53 "/>
<polyline fill="none" stroke="#006000" points="253.791,-584.53 257.791,-588.53 "/>
<text text-anchor="middle" x="208" y="-564.53" font-family="Times,serif" font-size="20.00" fill="#006000">repair_pdf</text>
<polygon fill="#efa03b" stroke="#006000" points="489.791,-588.53 394.209,-588.53 390.209,-584.53 390.209,-552.53 485.791,-552.53 489.791,-556.53 489.791,-588.53"/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 390.209,-584.53 "/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 485.791,-552.53 "/>
<polyline fill="none" stroke="#006000" points="485.791,-584.53 489.791,-588.53 "/>
<text text-anchor="middle" x="440" y="-564.53" font-family="Times,serif" font-size="20.00" fill="#006000">repair_pdf</text>
</g>
<!-- t1 -->
<g id="node2" class="node"><title>t1</title>
<polygon fill="#efa03b" stroke="black" points="242.782,-509.564 150,-526.497 57.2177,-509.564 57.3043,-482.165 242.696,-482.165 242.782,-509.564"/>
<polygon fill="none" stroke="black" points="246.799,-512.902 150,-530.569 53.2009,-512.902 53.3107,-478.159 246.689,-478.159 246.799,-512.902"/>
<text text-anchor="middle" x="150" y="-495.991" font-family="Times,serif" font-size="20.00">split_pages</text>
<polygon fill="#efa03b" stroke="black" points="474.782,-509.564 382,-526.497 289.218,-509.564 289.304,-482.165 474.696,-482.165 474.782,-509.564"/>
<polygon fill="none" stroke="black" points="478.799,-512.902 382,-530.569 285.201,-512.902 285.311,-478.159 478.689,-478.159 478.799,-512.902"/>
<text text-anchor="middle" x="382" y="-495.991" font-family="Times,serif" font-size="20.00">split_pages</text>
</g>
<!-- t0&#45;&gt;t1 -->
<g id="edge1" class="edge"><title>t0&#45;&gt;t1</title>
<path fill="none" stroke="gray" d="M193.064,-552.394C188.296,-546.925 182.86,-540.689 177.493,-534.532"/>
<polygon fill="gray" stroke="gray" points="179.996,-532.077 170.787,-526.838 174.719,-536.676 179.996,-532.077"/>
<path fill="none" stroke="gray" d="M425.064,-552.394C420.296,-546.925 414.86,-540.689 409.493,-534.532"/>
<polygon fill="gray" stroke="gray" points="411.996,-532.077 402.787,-526.838 406.719,-536.676 411.996,-532.077"/>
</g>
<!-- t9 -->
<g id="node11" class="node"><title>t9</title>
<polygon fill="#efa03b" stroke="#006000" points="496.338,-451.452 285.662,-451.452 281.662,-447.452 281.662,-415.452 492.338,-415.452 496.338,-419.452 496.338,-451.452"/>
<polyline fill="none" stroke="#006000" points="492.338,-447.452 281.662,-447.452 "/>
<polyline fill="none" stroke="#006000" points="492.338,-447.452 492.338,-415.452 "/>
<polyline fill="none" stroke="#006000" points="492.338,-447.452 496.338,-451.452 "/>
<text text-anchor="middle" x="389" y="-427.452" font-family="Times,serif" font-size="20.00" fill="#006000">generate_postscript_stub</text>
<!-- t10 -->
<g id="node12" class="node"><title>t10</title>
<polygon fill="#efa03b" stroke="#006000" points="728.338,-451.452 517.662,-451.452 513.662,-447.452 513.662,-415.452 724.338,-415.452 728.338,-419.452 728.338,-451.452"/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 513.662,-447.452 "/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 724.338,-415.452 "/>
<polyline fill="none" stroke="#006000" points="724.338,-447.452 728.338,-451.452 "/>
<text text-anchor="middle" x="621" y="-427.452" font-family="Times,serif" font-size="20.00" fill="#006000">generate_postscript_stub</text>
</g>
<!-- t0&#45;&gt;t9 -->
<g id="edge14" class="edge"><title>t0&#45;&gt;t9</title>
<path fill="none" stroke="gray" d="M230.924,-552.422C263.068,-528.433 321.637,-484.724 357.75,-457.773"/>
<polygon fill="gray" stroke="gray" points="360.056,-460.42 365.977,-451.634 355.87,-454.81 360.056,-460.42"/>
<!-- t0&#45;&gt;t10 -->
<g id="edge15" class="edge"><title>t0&#45;&gt;t10</title>
<path fill="none" stroke="gray" d="M462.924,-552.422C495.068,-528.433 553.637,-484.724 589.75,-457.773"/>
<polygon fill="gray" stroke="gray" points="592.056,-460.42 597.977,-451.634 587.87,-454.81 592.056,-460.42"/>
</g>
<!-- t2 -->
<g id="node3" class="node"><title>t2</title>
<polygon fill="#efa03b" stroke="black" points="263.555,-451.452 40.4453,-451.452 36.4453,-447.452 36.4453,-415.452 259.555,-415.452 263.555,-419.452 263.555,-451.452"/>
<polyline fill="none" stroke="black" points="259.555,-447.452 36.4453,-447.452 "/>
<polyline fill="none" stroke="black" points="259.555,-447.452 259.555,-415.452 "/>
<polyline fill="none" stroke="black" points="259.555,-447.452 263.555,-451.452 "/>
<text text-anchor="middle" x="150" y="-427.452" font-family="Times,serif" font-size="20.00">rasterize_with_ghostscript</text>
<polygon fill="#efa03b" stroke="black" points="495.555,-451.452 272.445,-451.452 268.445,-447.452 268.445,-415.452 491.555,-415.452 495.555,-419.452 495.555,-451.452"/>
<polyline fill="none" stroke="black" points="491.555,-447.452 268.445,-447.452 "/>
<polyline fill="none" stroke="black" points="491.555,-447.452 491.555,-415.452 "/>
<polyline fill="none" stroke="black" points="491.555,-447.452 495.555,-451.452 "/>
<text text-anchor="middle" x="382" y="-427.452" font-family="Times,serif" font-size="20.00">rasterize_with_ghostscript</text>
</g>
<!-- t1&#45;&gt;t2 -->
<g id="edge2" class="edge"><title>t1&#45;&gt;t2</title>
<path fill="none" stroke="#0044a0" d="M150,-478.092C150,-472.777 150,-467.07 150,-461.593"/>
<polygon fill="#0044a0" stroke="#0044a0" points="153.5,-461.506 150,-451.506 146.5,-461.506 153.5,-461.506"/>
<path fill="none" stroke="#0044a0" d="M382,-478.092C382,-472.777 382,-467.07 382,-461.593"/>
<polygon fill="#0044a0" stroke="#0044a0" points="385.5,-461.506 382,-451.506 378.5,-461.506 385.5,-461.506"/>
</g>
<!-- t10 -->
<g id="node10" class="node"><title>t10</title>
<polygon fill="#efa03b" stroke="black" points="113.594,-393.452 20.4062,-393.452 16.4062,-389.452 16.4062,-357.452 109.594,-357.452 113.594,-361.452 113.594,-393.452"/>
<polyline fill="none" stroke="black" points="109.594,-389.452 16.4062,-389.452 "/>
<polyline fill="none" stroke="black" points="109.594,-389.452 109.594,-357.452 "/>
<polyline fill="none" stroke="black" points="109.594,-389.452 113.594,-393.452 "/>
<text text-anchor="middle" x="65" y="-369.452" font-family="Times,serif" font-size="20.00">skip_page</text>
<!-- t11 -->
<g id="node10" class="node"><title>t11</title>
<polygon fill="#efa03b" stroke="black" points="159.594,-393.452 66.4062,-393.452 62.4062,-389.452 62.4062,-357.452 155.594,-357.452 159.594,-361.452 159.594,-393.452"/>
<polyline fill="none" stroke="black" points="155.594,-389.452 62.4062,-389.452 "/>
<polyline fill="none" stroke="black" points="155.594,-389.452 155.594,-357.452 "/>
<polyline fill="none" stroke="black" points="155.594,-389.452 159.594,-393.452 "/>
<text text-anchor="middle" x="111" y="-369.452" font-family="Times,serif" font-size="20.00">skip_page</text>
</g>
<!-- t1&#45;&gt;t10 -->
<g id="edge13" class="edge"><title>t1&#45;&gt;t10</title>
<path fill="none" stroke="#0044a0" d="M57.0924,-478.13C45.2322,-471.407 34.6013,-462.708 27,-451.452 18.0457,-438.192 20.9732,-430.273 27,-415.452 29.092,-410.307 32.2252,-405.47 35.8114,-401.07"/>
<polygon fill="#0044a0" stroke="#0044a0" points="38.4404,-403.381 42.629,-393.65 33.2858,-398.645 38.4404,-403.381"/>
<!-- t1&#45;&gt;t11 -->
<g id="edge13" class="edge"><title>t1&#45;&gt;t11</title>
<path fill="none" stroke="#0044a0" d="M322.128,-478.077C301.919,-470.026 279.335,-460.679 259,-451.452 222.194,-434.751 181.273,-413.827 151.805,-398.326"/>
<polygon fill="#0044a0" stroke="#0044a0" points="153.137,-395.071 142.66,-393.496 149.868,-401.261 153.137,-395.071"/>
</g>
<!-- t3 -->
<g id="node4" class="node"><title>t3</title>
<polygon fill="#efa03b" stroke="black" points="332.999,-393.452 165.001,-393.452 161.001,-389.452 161.001,-357.452 328.999,-357.452 332.999,-361.452 332.999,-393.452"/>
<polyline fill="none" stroke="black" points="328.999,-389.452 161.001,-389.452 "/>
<polyline fill="none" stroke="black" points="328.999,-389.452 328.999,-357.452 "/>
<polyline fill="none" stroke="black" points="328.999,-389.452 332.999,-393.452 "/>
<text text-anchor="middle" x="247" y="-369.452" font-family="Times,serif" font-size="20.00">preprocess_deskew</text>
<polygon fill="#efa03b" stroke="black" points="467.999,-393.452 300.001,-393.452 296.001,-389.452 296.001,-357.452 463.999,-357.452 467.999,-361.452 467.999,-393.452"/>
<polyline fill="none" stroke="black" points="463.999,-389.452 296.001,-389.452 "/>
<polyline fill="none" stroke="black" points="463.999,-389.452 463.999,-357.452 "/>
<polyline fill="none" stroke="black" points="463.999,-389.452 467.999,-393.452 "/>
<text text-anchor="middle" x="382" y="-369.452" font-family="Times,serif" font-size="20.00">preprocess_deskew</text>
</g>
<!-- t2&#45;&gt;t3 -->
<g id="edge3" class="edge"><title>t2&#45;&gt;t3</title>
<path fill="none" stroke="#0044a0" d="M179.63,-415.346C188.844,-410.026 199.134,-404.086 208.844,-398.48"/>
<polygon fill="#0044a0" stroke="#0044a0" points="210.641,-401.484 217.551,-393.453 207.141,-395.422 210.641,-401.484"/>
<path fill="none" stroke="#0044a0" d="M382,-415.346C382,-411.655 382,-407.665 382,-403.695"/>
<polygon fill="#0044a0" stroke="#0044a0" points="385.5,-403.453 382,-393.453 378.5,-403.453 385.5,-403.453"/>
</g>
<!-- t6 -->
<g id="node6" class="node"><title>t6</title>
<polygon fill="#efa03b" stroke="black" points="239.375,-277.452 52.6246,-277.452 48.6246,-273.452 48.6246,-241.452 235.375,-241.452 239.375,-245.452 239.375,-277.452"/>
<polyline fill="none" stroke="black" points="235.375,-273.452 48.6246,-273.452 "/>
<polyline fill="none" stroke="black" points="235.375,-273.452 235.375,-241.452 "/>
<polyline fill="none" stroke="black" points="235.375,-273.452 239.375,-277.452 "/>
<text text-anchor="middle" x="144" y="-253.452" font-family="Times,serif" font-size="20.00">select_image_for_pdf</text>
<polygon fill="#efa03b" stroke="black" points="634.375,-277.452 447.625,-277.452 443.625,-273.452 443.625,-241.452 630.375,-241.452 634.375,-245.452 634.375,-277.452"/>
<polyline fill="none" stroke="black" points="630.375,-273.452 443.625,-273.452 "/>
<polyline fill="none" stroke="black" points="630.375,-273.452 630.375,-241.452 "/>
<polyline fill="none" stroke="black" points="630.375,-273.452 634.375,-277.452 "/>
<text text-anchor="middle" x="539" y="-253.452" font-family="Times,serif" font-size="20.00">select_image_for_pdf</text>
</g>
<!-- t2&#45;&gt;t6 -->
<g id="edge7" class="edge"><title>t2&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M149.403,-415.331C148.361,-385.453 146.202,-323.565 144.951,-287.726"/>
<polygon fill="#0044a0" stroke="#0044a0" points="148.441,-287.375 144.595,-277.504 141.446,-287.62 148.441,-287.375"/>
<path fill="none" stroke="#0044a0" d="M442.809,-415.378C455.168,-409.854 467.327,-402.684 477,-393.452 507.53,-364.315 524.51,-317.195 532.692,-287.567"/>
<polygon fill="#0044a0" stroke="#0044a0" points="536.178,-288.068 535.317,-277.508 529.405,-286.3 536.178,-288.068"/>
</g>
<!-- t4 -->
<g id="node5" class="node"><title>t4</title>
<polygon fill="#efa03b" stroke="black" points="334.705,-335.452 185.295,-335.452 181.295,-331.452 181.295,-299.452 330.705,-299.452 334.705,-303.452 334.705,-335.452"/>
<polyline fill="none" stroke="black" points="330.705,-331.452 181.295,-331.452 "/>
<polyline fill="none" stroke="black" points="330.705,-331.452 330.705,-299.452 "/>
<polyline fill="none" stroke="black" points="330.705,-331.452 334.705,-335.452 "/>
<text text-anchor="middle" x="258" y="-311.452" font-family="Times,serif" font-size="20.00">preprocess_clean</text>
<polygon fill="#efa03b" stroke="black" points="439.705,-335.452 290.295,-335.452 286.295,-331.452 286.295,-299.452 435.705,-299.452 439.705,-303.452 439.705,-335.452"/>
<polyline fill="none" stroke="black" points="435.705,-331.452 286.295,-331.452 "/>
<polyline fill="none" stroke="black" points="435.705,-331.452 435.705,-299.452 "/>
<polyline fill="none" stroke="black" points="435.705,-331.452 439.705,-335.452 "/>
<text text-anchor="middle" x="363" y="-311.452" font-family="Times,serif" font-size="20.00">preprocess_clean</text>
</g>
<!-- t3&#45;&gt;t4 -->
<g id="edge4" class="edge"><title>t3&#45;&gt;t4</title>
<path fill="none" stroke="#0044a0" d="M250.36,-357.346C251.106,-353.546 251.915,-349.43 252.718,-345.345"/>
<polygon fill="#0044a0" stroke="#0044a0" points="256.167,-345.94 254.66,-335.453 249.299,-344.591 256.167,-345.94"/>
<path fill="none" stroke="#0044a0" d="M376.196,-357.346C374.87,-353.438 373.43,-349.194 372.006,-344.995"/>
<polygon fill="#0044a0" stroke="#0044a0" points="375.296,-343.799 368.768,-335.453 368.667,-346.048 375.296,-343.799"/>
</g>
<!-- t3&#45;&gt;t6 -->
<g id="edge6" class="edge"><title>t3&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M201.207,-357.385C190.535,-351.706 179.974,-344.465 172,-335.452 160.103,-322.004 153.083,-303.029 149.047,-287.53"/>
<polygon fill="#0044a0" stroke="#0044a0" points="152.381,-286.414 146.707,-277.466 145.563,-287.999 152.381,-286.414"/>
<path fill="none" stroke="#0044a0" d="M413.697,-357.449C425.088,-350.965 437.865,-343.25 449,-335.452 471.538,-319.668 495.388,-299.622 512.871,-284.226"/>
<polygon fill="#0044a0" stroke="#0044a0" points="515.277,-286.77 520.427,-277.512 510.627,-281.538 515.277,-286.77"/>
</g>
<!-- t4&#45;&gt;t6 -->
<g id="edge5" class="edge"><title>t4&#45;&gt;t6</title>
<path fill="none" stroke="#0044a0" d="M223.177,-299.346C212.017,-293.864 199.513,-287.721 187.797,-281.966"/>
<polygon fill="#0044a0" stroke="#0044a0" points="189.129,-278.721 178.61,-277.453 186.042,-285.004 189.129,-278.721"/>
<path fill="none" stroke="#0044a0" d="M416.762,-299.346C435.442,-293.402 456.56,-286.683 475.923,-280.522"/>
<polygon fill="#0044a0" stroke="#0044a0" points="477.099,-283.821 485.567,-277.453 474.977,-277.15 477.099,-283.821"/>
</g>
<!-- t5 -->
<g id="node7" class="node"><title>t5</title>
<polygon fill="#efa03b" stroke="black" points="377.385,-277.452 260.615,-277.452 256.615,-273.452 256.615,-241.452 373.385,-241.452 377.385,-245.452 377.385,-277.452"/>
<polyline fill="none" stroke="black" points="373.385,-273.452 256.615,-273.452 "/>
<polyline fill="none" stroke="black" points="373.385,-273.452 373.385,-241.452 "/>
<polyline fill="none" stroke="black" points="373.385,-273.452 377.385,-277.452 "/>
<text text-anchor="middle" x="317" y="-253.452" font-family="Times,serif" font-size="20.00">ocr_tesseract</text>
<polygon fill="#efa03b" stroke="black" points="425.922,-277.452 264.078,-277.452 260.078,-273.452 260.078,-241.452 421.922,-241.452 425.922,-245.452 425.922,-277.452"/>
<polyline fill="none" stroke="black" points="421.922,-273.452 260.078,-273.452 "/>
<polyline fill="none" stroke="black" points="421.922,-273.452 421.922,-241.452 "/>
<polyline fill="none" stroke="black" points="421.922,-273.452 425.922,-277.452 "/>
<text text-anchor="middle" x="343" y="-253.452" font-family="Times,serif" font-size="20.00">ocr_tesseract_hocr</text>
</g>
<!-- t4&#45;&gt;t5 -->
<g id="edge8" class="edge"><title>t4&#45;&gt;t5</title>
<path fill="none" stroke="#0044a0" d="M276.023,-299.346C280.941,-294.678 286.363,-289.531 291.608,-284.552"/>
<polygon fill="#0044a0" stroke="#0044a0" points="294.244,-286.876 299.088,-277.453 289.425,-281.799 294.244,-286.876"/>
<path fill="none" stroke="#0044a0" d="M356.891,-299.346C355.495,-295.438 353.979,-291.194 352.48,-286.995"/>
<polygon fill="#0044a0" stroke="#0044a0" points="355.731,-285.694 349.072,-277.453 349.139,-288.048 355.731,-285.694"/>
</g>
<!-- t9 -->
<g id="node11" class="node"><title>t9</title>
<polygon fill="#efa03b" stroke="black" points="292.496,-219.452 39.5039,-219.452 35.5039,-215.452 35.5039,-183.452 288.496,-183.452 292.496,-187.452 292.496,-219.452"/>
<polyline fill="none" stroke="black" points="288.496,-215.452 35.5039,-215.452 "/>
<polyline fill="none" stroke="black" points="288.496,-215.452 288.496,-183.452 "/>
<polyline fill="none" stroke="black" points="288.496,-215.452 292.496,-219.452 "/>
<text text-anchor="middle" x="164" y="-195.452" font-family="Times,serif" font-size="20.00">tesseract_ocr_and_render_pdf</text>
</g>
<!-- t4&#45;&gt;t9 -->
<g id="edge14" class="edge"><title>t4&#45;&gt;t9</title>
<path fill="none" stroke="#0044a0" d="M300.717,-299.405C284.01,-293.605 266.338,-286.3 251,-277.452 227.033,-263.625 203.489,-242.755 186.95,-226.538"/>
<polygon fill="#0044a0" stroke="#0044a0" points="189.414,-224.052 179.868,-219.456 184.465,-229.002 189.414,-224.052"/>
</g>
<!-- t7 -->
<g id="node8" class="node"><title>t7</title>
<polygon fill="#efa03b" stroke="black" points="368.328,-219.452 257.672,-219.452 253.672,-215.452 253.672,-183.452 364.328,-183.452 368.328,-187.452 368.328,-219.452"/>
<polyline fill="none" stroke="black" points="364.328,-215.452 253.672,-215.452 "/>
<polyline fill="none" stroke="black" points="364.328,-215.452 364.328,-183.452 "/>
<polyline fill="none" stroke="black" points="364.328,-215.452 368.328,-219.452 "/>
<text text-anchor="middle" x="311" y="-195.452" font-family="Times,serif" font-size="20.00">render_page</text>
<polygon fill="#efa03b" stroke="black" points="708.366,-219.452 551.634,-219.452 547.634,-215.452 547.634,-183.452 704.366,-183.452 708.366,-187.452 708.366,-219.452"/>
<polyline fill="none" stroke="black" points="704.366,-215.452 547.634,-215.452 "/>
<polyline fill="none" stroke="black" points="704.366,-215.452 704.366,-183.452 "/>
<polyline fill="none" stroke="black" points="704.366,-215.452 708.366,-219.452 "/>
<text text-anchor="middle" x="628" y="-195.452" font-family="Times,serif" font-size="20.00">render_hocr_page</text>
</g>
<!-- t6&#45;&gt;t7 -->
<g id="edge9" class="edge"><title>t6&#45;&gt;t7</title>
<path fill="none" stroke="#0044a0" d="M195.013,-241.346C212.575,-235.457 232.41,-228.806 250.645,-222.691"/>
<polygon fill="#0044a0" stroke="#0044a0" points="251.931,-225.951 260.299,-219.453 249.706,-219.314 251.931,-225.951"/>
<path fill="none" stroke="#0044a0" d="M566.187,-241.346C574.468,-236.135 583.697,-230.328 592.445,-224.824"/>
<polygon fill="#0044a0" stroke="#0044a0" points="594.38,-227.741 600.98,-219.453 590.652,-221.817 594.38,-227.741"/>
</g>
<!-- t8 -->
<g id="node9" class="node"><title>t8</title>
<polygon fill="#efa03b" stroke="black" points="235.705,-219.452 66.2947,-219.452 62.2947,-215.452 62.2947,-183.452 231.705,-183.452 235.705,-187.452 235.705,-219.452"/>
<polyline fill="none" stroke="black" points="231.705,-215.452 62.2947,-215.452 "/>
<polyline fill="none" stroke="black" points="231.705,-215.452 231.705,-183.452 "/>
<polyline fill="none" stroke="black" points="231.705,-215.452 235.705,-219.452 "/>
<text text-anchor="middle" x="149" y="-195.452" font-family="Times,serif" font-size="20.00">render_debug_page</text>
<polygon fill="#efa03b" stroke="black" points="529.742,-219.452 314.258,-219.452 310.258,-215.452 310.258,-183.452 525.742,-183.452 529.742,-187.452 529.742,-219.452"/>
<polyline fill="none" stroke="black" points="525.742,-215.452 310.258,-215.452 "/>
<polyline fill="none" stroke="black" points="525.742,-215.452 525.742,-183.452 "/>
<polyline fill="none" stroke="black" points="525.742,-215.452 529.742,-219.452 "/>
<text text-anchor="middle" x="420" y="-195.452" font-family="Times,serif" font-size="20.00">render_hocr_debug_page</text>
</g>
<!-- t6&#45;&gt;t8 -->
<g id="edge11" class="edge"><title>t6&#45;&gt;t8</title>
<path fill="none" stroke="#0044a0" d="M145.527,-241.346C145.857,-237.655 146.213,-233.665 146.568,-229.695"/>
<polygon fill="#0044a0" stroke="#0044a0" points="150.079,-229.725 147.482,-219.453 143.106,-229.102 150.079,-229.725"/>
<path fill="none" stroke="#0044a0" d="M502.649,-241.346C490.885,-235.809 477.689,-229.6 465.354,-223.795"/>
<polygon fill="#0044a0" stroke="#0044a0" points="466.666,-220.544 456.128,-219.453 463.686,-226.878 466.666,-220.544"/>
</g>
<!-- t5&#45;&gt;t7 -->
<g id="edge10" class="edge"><title>t5&#45;&gt;t7</title>
<path fill="none" stroke="#0044a0" d="M315.167,-241.346C314.772,-237.655 314.344,-233.665 313.919,-229.695"/>
<polygon fill="#0044a0" stroke="#0044a0" points="317.367,-229.024 312.822,-219.453 310.407,-229.769 317.367,-229.024"/>
<path fill="none" stroke="#0044a0" d="M426.2,-242.104C461.192,-235.228 501.99,-227.212 537.71,-220.193"/>
<polygon fill="#0044a0" stroke="#0044a0" points="538.503,-223.604 547.641,-218.242 537.154,-216.736 538.503,-223.604"/>
</g>
<!-- t5&#45;&gt;t8 -->
<g id="edge12" class="edge"><title>t5&#45;&gt;t8</title>
<path fill="none" stroke="#0044a0" d="M265.682,-241.346C248.014,-235.457 228.061,-228.806 209.716,-222.691"/>
<polygon fill="#0044a0" stroke="#0044a0" points="210.598,-219.295 200.004,-219.453 208.384,-225.936 210.598,-219.295"/>
</g>
<!-- t11 -->
<g id="node12" class="node"><title>t11</title>
<polygon fill="#efa03b" stroke="black" points="149.845,-105.456 230,-78.0208 310.155,-105.456 310.08,-149.848 149.92,-149.848 149.845,-105.456"/>
<polygon fill="none" stroke="black" points="145.836,-102.581 230,-73.7729 314.164,-102.581 314.078,-153.869 145.922,-153.869 145.836,-102.581"/>
<text text-anchor="middle" x="230" y="-111.726" font-family="Times,serif" font-size="20.00">merge_pages</text>
</g>
<!-- t7&#45;&gt;t11 -->
<g id="edge17" class="edge"><title>t7&#45;&gt;t11</title>
<path fill="none" stroke="#0044a0" d="M293.822,-183.12C287.553,-176.794 280.152,-169.328 272.653,-161.762"/>
<polygon fill="#0044a0" stroke="#0044a0" points="274.685,-158.84 265.16,-154.201 269.713,-163.767 274.685,-158.84"/>
</g>
<!-- t8&#45;&gt;t11 -->
<g id="edge16" class="edge"><title>t8&#45;&gt;t11</title>
<path fill="none" stroke="#0044a0" d="M166.178,-183.12C172.447,-176.794 179.848,-169.328 187.347,-161.762"/>
<polygon fill="#0044a0" stroke="#0044a0" points="190.287,-163.767 194.84,-154.201 185.315,-158.84 190.287,-163.767"/>
</g>
<!-- t10&#45;&gt;t11 -->
<g id="edge15" class="edge"><title>t10&#45;&gt;t11</title>
<path fill="none" stroke="#0044a0" d="M56.8713,-357.375C40.6714,-320.554 9.49472,-233.019 53,-183.452 74.3258,-159.154 105.557,-143.971 135.87,-134.485"/>
<polygon fill="#0044a0" stroke="#0044a0" points="137.062,-137.783 145.666,-131.601 135.085,-131.068 137.062,-137.783"/>
</g>
<!-- t9&#45;&gt;t11 -->
<g id="edge19" class="edge"><title>t9&#45;&gt;t11</title>
<path fill="none" stroke="gray" d="M390.73,-415.09C392.803,-392.758 396,-352.756 396,-318.452 396,-318.452 396,-318.452 396,-258.452 396,-224.066 399.207,-209.706 377,-183.452 362.867,-166.743 343.584,-154.196 323.637,-144.829"/>
<polygon fill="gray" stroke="gray" points="325.001,-141.605 314.441,-140.745 322.16,-148.003 325.001,-141.605"/>
<path fill="none" stroke="#0044a0" d="M366.521,-241.346C373.387,-236.352 381.007,-230.811 388.291,-225.513"/>
<polygon fill="#0044a0" stroke="#0044a0" points="390.594,-228.166 396.623,-219.453 386.477,-222.504 390.594,-228.166"/>
</g>
<!-- t12 -->
<g id="node13" class="node"><title>t12</title>
<polygon fill="#efa03b" stroke="black" points="292.338,-52 171.662,-52 167.662,-48 167.662,-16 288.338,-16 292.338,-20 292.338,-52"/>
<polyline fill="none" stroke="black" points="288.338,-48 167.662,-48 "/>
<polyline fill="none" stroke="black" points="288.338,-48 288.338,-16 "/>
<polyline fill="none" stroke="black" points="288.338,-48 292.338,-52 "/>
<text text-anchor="middle" x="230" y="-28" font-family="Times,serif" font-size="20.00">validate_pdfa</text>
<polygon fill="#efa03b" stroke="black" points="339.845,-105.456 420,-78.0208 500.155,-105.456 500.08,-149.848 339.92,-149.848 339.845,-105.456"/>
<polygon fill="none" stroke="black" points="335.836,-102.581 420,-73.7729 504.164,-102.581 504.078,-153.869 335.922,-153.869 335.836,-102.581"/>
<text text-anchor="middle" x="420" y="-111.726" font-family="Times,serif" font-size="20.00">merge_pages</text>
</g>
<!-- t7&#45;&gt;t12 -->
<g id="edge19" class="edge"><title>t7&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M584.65,-183.419C564.063,-175.33 538.602,-165.326 513.908,-155.624"/>
<polygon fill="#0044a0" stroke="#0044a0" points="515.081,-152.324 504.493,-151.924 512.521,-158.839 515.081,-152.324"/>
</g>
<!-- t8&#45;&gt;t12 -->
<g id="edge18" class="edge"><title>t8&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M420,-183.12C420,-177.585 420,-171.177 420,-164.592"/>
<polygon fill="#0044a0" stroke="#0044a0" points="423.5,-164.201 420,-154.201 416.5,-164.201 423.5,-164.201"/>
</g>
<!-- t11&#45;&gt;t12 -->
<g id="edge18" class="edge"><title>t11&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M230,-73.9482C230,-69.9654 230,-66.007 230,-62.2247"/>
<polygon fill="#0044a0" stroke="#0044a0" points="233.5,-62.1573 230,-52.1573 226.5,-62.1574 233.5,-62.1573"/>
<g id="edge16" class="edge"><title>t11&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M92.3128,-357.362C55.4572,-321.636 -19.1794,-237.859 27,-183.452 64.5123,-139.256 221.468,-125.188 325.317,-120.741"/>
<polygon fill="#0044a0" stroke="#0044a0" points="325.658,-124.23 335.508,-120.328 325.375,-117.236 325.658,-124.23"/>
</g>
<!-- t9&#45;&gt;t12 -->
<g id="edge17" class="edge"><title>t9&#45;&gt;t12</title>
<path fill="none" stroke="#0044a0" d="M217.354,-183.419C248.764,-173.392 289.391,-160.422 326.012,-148.731"/>
<polygon fill="#0044a0" stroke="#0044a0" points="327.263,-152.006 335.725,-145.63 325.134,-145.337 327.263,-152.006"/>
</g>
<!-- t10&#45;&gt;t12 -->
<g id="edge21" class="edge"><title>t10&#45;&gt;t12</title>
<path fill="none" stroke="gray" d="M653.379,-415.337C684.552,-396.255 727,-362.233 727,-318.452 727,-318.452 727,-318.452 727,-258.452 727,-224.824 739.784,-208.186 717,-183.452 689.882,-154.013 590.624,-136.736 514.239,-127.544"/>
<polygon fill="gray" stroke="gray" points="514.437,-124.044 504.097,-126.355 513.621,-130.996 514.437,-124.044"/>
</g>
<!-- t13 -->
<g id="node14" class="node"><title>t13</title>
<polygon fill="#efa03b" stroke="black" points="482.338,-52 361.662,-52 357.662,-48 357.662,-16 478.338,-16 482.338,-20 482.338,-52"/>
<polyline fill="none" stroke="black" points="478.338,-48 357.662,-48 "/>
<polyline fill="none" stroke="black" points="478.338,-48 478.338,-16 "/>
<polyline fill="none" stroke="black" points="478.338,-48 482.338,-52 "/>
<text text-anchor="middle" x="420" y="-28" font-family="Times,serif" font-size="20.00">validate_pdfa</text>
</g>
<!-- t12&#45;&gt;t13 -->
<g id="edge20" class="edge"><title>t12&#45;&gt;t13</title>
<path fill="none" stroke="#0044a0" d="M420,-73.9482C420,-69.9654 420,-66.007 420,-62.2247"/>
<polygon fill="#0044a0" stroke="#0044a0" points="423.5,-62.1573 420,-52.1573 416.5,-62.1574 423.5,-62.1573"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -114,3 +114,15 @@ def test_metadata():
assert pdfinfo['Author'] == '孔子'
assert pdfinfo['Subject'] == 'U+1030C is: 𐌌'
assert pdfinfo.get('Keywords', '') == ''
def test_oversample():
oversampled_pdf = run_ocrmypdf(
'skew.pdf', 'test-oversample.pdf', '--oversample', '300')
from ocrmypdf.pageinfo import pdf_get_all_pageinfo
pdfinfo = pdf_get_all_pageinfo(oversampled_pdf)
print(pdfinfo[0]['xres'])
assert abs(pdfinfo[0]['xres'] - 300) < 1