mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-06-10 23:17:00 -04:00
When Ghostscript reports a DeviceN colorspace with an inappropriate alternate, the resulting PDF/A may render blank in viewers such as Adobe Reader (#1187). The error is gated on that Ghostscript warning, which is the authoritative signal that the *output* is broken. Previously the error message always told the user to "use --color-conversion-strategy", which is confusing when they already set one and it didn't help. Crucially, the warning persists for strategies that don't actually normalize the colorspace -- notably UseDeviceIndependentColor (confirmed in #1187) -- so silencing the error for any non-default strategy would emit a silently-broken PDF/A. Keep raising whenever Ghostscript still reports the warning, regardless of strategy, but tailor the guidance: if no conversion was requested, suggest RGB/CMYK/Gray; if a conversion was requested but the warning persisted, say so and point at strategies that work or --output-type pdf. Add unit tests (mocked Ghostscript) covering the default case, the warning-persists-despite-strategy case for both an ineffective strategy and a normally-effective one, and the no-warning happy path.
This commit is contained in:
@@ -405,4 +405,8 @@ def generate_pdfa(
|
||||
for part in stderr.split('****'):
|
||||
log.error(part)
|
||||
if _gs_devicen_reported(stderr):
|
||||
raise ColorConversionNeededError()
|
||||
# Ghostscript could not normalize the DeviceN colorspace for PDF/A,
|
||||
# even if the user requested a conversion strategy. The output is
|
||||
# liable to render blank in some viewers, so raise regardless of the
|
||||
# strategy and tailor the guidance to what was attempted.
|
||||
raise ColorConversionNeededError(color_conversion_strategy)
|
||||
|
||||
@@ -140,13 +140,42 @@ class TaggedPDFError(InputFileError):
|
||||
|
||||
|
||||
class ColorConversionNeededError(BadArgsError):
|
||||
"""PDF needs color conversion."""
|
||||
"""PDF needs color conversion to a standard color space.
|
||||
|
||||
message = dedent(
|
||||
"""\
|
||||
The input PDF has an unusual color space. Use
|
||||
--color-conversion-strategy to convert to a common color space
|
||||
such as RGB, or use --output-type pdf to skip PDF/A conversion
|
||||
and retain the original color space.
|
||||
"""
|
||||
)
|
||||
Ghostscript reported a DeviceN colorspace with an inappropriate alternate.
|
||||
The resulting PDF/A is liable to render incorrectly (often blank) in some
|
||||
viewers such as Adobe Reader, so the colorspace must be normalized to a
|
||||
common one. RGB, CMYK, and Gray are known to work; LeaveColorUnchanged
|
||||
performs no conversion and UseDeviceIndependentColor does not resolve the
|
||||
problem (see https://github.com/ocrmypdf/OCRmyPDF/issues/1187).
|
||||
"""
|
||||
|
||||
# Strategies that can normalize an unusual DeviceN colorspace into one that
|
||||
# PDF/A viewers render correctly.
|
||||
_effective_strategies = "RGB, CMYK, or Gray"
|
||||
|
||||
def __init__(self, color_conversion_strategy: str = "LeaveColorUnchanged"):
|
||||
"""Build guidance tailored to the conversion strategy that was used."""
|
||||
super().__init__()
|
||||
if color_conversion_strategy == "LeaveColorUnchanged":
|
||||
self.message = dedent(
|
||||
f"""\
|
||||
The input PDF has an unusual DeviceN color space that cannot be
|
||||
represented in PDF/A; the output may appear blank in some viewers
|
||||
such as Adobe Reader. Convert it to a common color space with
|
||||
--color-conversion-strategy ({self._effective_strategies}), or use
|
||||
--output-type pdf to skip PDF/A conversion and retain the original
|
||||
color space.
|
||||
"""
|
||||
)
|
||||
else:
|
||||
self.message = dedent(
|
||||
f"""\
|
||||
Color conversion with --color-conversion-strategy
|
||||
{color_conversion_strategy} did not resolve the input PDF's unusual
|
||||
DeviceN color space; the output may appear blank in some viewers
|
||||
such as Adobe Reader. Try a different --color-conversion-strategy
|
||||
({self._effective_strategies}), or use --output-type pdf to skip
|
||||
PDF/A conversion and retain the original color space.
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -327,6 +327,88 @@ def test_ghostscript_mandatory_color_conversion(resources, outpdf):
|
||||
)
|
||||
|
||||
|
||||
def _run_generate_pdfa_with_devicen_warning(outdir, color_conversion_strategy):
|
||||
"""Invoke generate_pdfa with Ghostscript mocked to emit the DeviceN warning.
|
||||
|
||||
Ghostscript emits this warning when it writes a DeviceN colorspace with an
|
||||
inappropriate alternate, i.e. when it could not normalize the colorspace for
|
||||
PDF/A. The output is then liable to render blank in viewers such as Adobe
|
||||
Reader (see issue #1187), regardless of which conversion strategy was
|
||||
requested.
|
||||
"""
|
||||
(outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n')
|
||||
with (
|
||||
patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')),
|
||||
patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock,
|
||||
):
|
||||
run_mock.return_value = subprocess.CompletedProcess(
|
||||
['gs'],
|
||||
returncode=0,
|
||||
stdout='',
|
||||
stderr='Attempting to write a DeviceN space with an inappropriate '
|
||||
'alternate, reverting to the alternate color space.',
|
||||
)
|
||||
ghostscript.generate_pdfa(
|
||||
pdf_pages=[outdir / 'input.pdf'],
|
||||
output_file=outdir / 'out.pdf',
|
||||
compression='auto',
|
||||
color_conversion_strategy=color_conversion_strategy,
|
||||
)
|
||||
|
||||
|
||||
def test_devicen_warning_default_strategy_raises_with_guidance(outdir):
|
||||
"""Default (no conversion): raise and tell the user to pick a strategy."""
|
||||
with pytest.raises(ColorConversionNeededError) as exc_info:
|
||||
_run_generate_pdfa_with_devicen_warning(outdir, 'LeaveColorUnchanged')
|
||||
message = str(exc_info.value)
|
||||
assert '--color-conversion-strategy' in message
|
||||
assert 'RGB' in message
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'strategy',
|
||||
[
|
||||
# A strategy that genuinely cannot fix the colorspace; confirmed in #1187.
|
||||
'UseDeviceIndependentColor',
|
||||
# A normally-effective strategy that nonetheless failed on this input:
|
||||
# if Ghostscript still warns, the output is still broken and we must not
|
||||
# silently pass it through (the behaviour PR #1692 would have introduced).
|
||||
'RGB',
|
||||
],
|
||||
)
|
||||
def test_devicen_warning_persists_despite_strategy_still_raises(outdir, strategy):
|
||||
"""If the warning survives the requested conversion, the output is broken.
|
||||
|
||||
We must still raise rather than silently emit a PDF/A that may render blank.
|
||||
The guidance should acknowledge that the chosen strategy did not work and
|
||||
point at strategies that do (or --output-type pdf).
|
||||
"""
|
||||
with pytest.raises(ColorConversionNeededError) as exc_info:
|
||||
_run_generate_pdfa_with_devicen_warning(outdir, strategy)
|
||||
message = str(exc_info.value)
|
||||
assert strategy in message
|
||||
assert '--output-type pdf' in message
|
||||
|
||||
|
||||
def test_no_devicen_warning_does_not_raise(outdir):
|
||||
"""When Ghostscript does not warn, conversion succeeded; never raise."""
|
||||
(outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n')
|
||||
with (
|
||||
patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')),
|
||||
patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock,
|
||||
):
|
||||
run_mock.return_value = subprocess.CompletedProcess(
|
||||
['gs'], returncode=0, stdout='', stderr=''
|
||||
)
|
||||
# Must not raise for any strategy when there is no DeviceN warning.
|
||||
ghostscript.generate_pdfa(
|
||||
pdf_pages=[outdir / 'input.pdf'],
|
||||
output_file=outdir / 'out.pdf',
|
||||
compression='auto',
|
||||
color_conversion_strategy='RGB',
|
||||
)
|
||||
|
||||
|
||||
def test_rasterize_pdf_errors(resources, no_outpdf, caplog):
|
||||
with patch('ocrmypdf._exec.ghostscript.run') as mock:
|
||||
# ghostscript can produce empty files with return code 0
|
||||
|
||||
Reference in New Issue
Block a user