Files
OCRmyPDF/tests/plugins/gs_raster_soft_error.py
James R. Barlow 3e46b039ed feat: add use_cropbox parameter to align rasterizer APIs
Added use_cropbox parameter to rasterize_pdf_page hook to allow
choosing between MediaBox and CropBox rendering:

- Default is use_cropbox=False (MediaBox) for consistency with
  Ghostscript's existing behavior
- Ghostscript: passes -dUseCropBox when use_cropbox=True
- pypdfium: calculates crop values to expand from CropBox to MediaBox
  when use_cropbox=False

This aligns both rasterizers to produce the same output dimensions
by default, making the rasterizer choice transparent for page
geometry.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-21 12:29:17 -08:00

52 lines
1.3 KiB
Python

# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MIT
from __future__ import annotations
from pathlib import Path
from subprocess import CalledProcessError
from unittest.mock import patch
from ocrmypdf import hookimpl
from ocrmypdf.builtin_plugins import ghostscript
from ocrmypdf.subprocess import run
def fail_if_stoponerror(args, **kwargs):
if '-dPDFSTOPONERROR' in args:
raise CalledProcessError(1, 'gs', output=b"", stderr=b"PDF STOP ON ERROR")
return run(args, **kwargs)
@hookimpl
def rasterize_pdf_page(
input_file,
output_file,
raster_device,
raster_dpi,
pageno,
page_dpi,
rotation,
filter_vector,
stop_on_soft_error,
options,
use_cropbox,
) -> Path:
with patch('ocrmypdf._exec.ghostscript.run') as mock:
mock.side_effect = fail_if_stoponerror
ghostscript.rasterize_pdf_page(
input_file=input_file,
output_file=output_file,
raster_device=raster_device,
raster_dpi=raster_dpi,
pageno=pageno,
page_dpi=page_dpi,
rotation=rotation,
filter_vector=filter_vector,
stop_on_soft_error=stop_on_soft_error,
options=options,
use_cropbox=use_cropbox,
)
mock.assert_called()
return output_file