mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-02-08 13:22:34 -05:00
Added use_cropbox parameter to rasterize_pdf_page hook to allow choosing between MediaBox and CropBox rendering: - Default is use_cropbox=False (MediaBox) for consistency with Ghostscript's existing behavior - Ghostscript: passes -dUseCropBox when use_cropbox=True - pypdfium: calculates crop values to expand from CropBox to MediaBox when use_cropbox=False This aligns both rasterizers to produce the same output dimensions by default, making the rasterizer choice transparent for page geometry. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
52 lines
1.3 KiB
Python
52 lines
1.3 KiB
Python
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from subprocess import CalledProcessError
|
|
from unittest.mock import patch
|
|
|
|
from ocrmypdf import hookimpl
|
|
from ocrmypdf.builtin_plugins import ghostscript
|
|
from ocrmypdf.subprocess import run
|
|
|
|
|
|
def fail_if_stoponerror(args, **kwargs):
|
|
if '-dPDFSTOPONERROR' in args:
|
|
raise CalledProcessError(1, 'gs', output=b"", stderr=b"PDF STOP ON ERROR")
|
|
return run(args, **kwargs)
|
|
|
|
|
|
@hookimpl
|
|
def rasterize_pdf_page(
|
|
input_file,
|
|
output_file,
|
|
raster_device,
|
|
raster_dpi,
|
|
pageno,
|
|
page_dpi,
|
|
rotation,
|
|
filter_vector,
|
|
stop_on_soft_error,
|
|
options,
|
|
use_cropbox,
|
|
) -> Path:
|
|
with patch('ocrmypdf._exec.ghostscript.run') as mock:
|
|
mock.side_effect = fail_if_stoponerror
|
|
ghostscript.rasterize_pdf_page(
|
|
input_file=input_file,
|
|
output_file=output_file,
|
|
raster_device=raster_device,
|
|
raster_dpi=raster_dpi,
|
|
pageno=pageno,
|
|
page_dpi=page_dpi,
|
|
rotation=rotation,
|
|
filter_vector=filter_vector,
|
|
stop_on_soft_error=stop_on_soft_error,
|
|
options=options,
|
|
use_cropbox=use_cropbox,
|
|
)
|
|
mock.assert_called()
|
|
return output_file
|