Improve DeviceN color conversion guidance (#1623) (#1694)

When Ghostscript reports a DeviceN colorspace with an inappropriate
alternate, the resulting PDF/A may render blank in viewers such as Adobe
Reader (#1187). The error is gated on that Ghostscript warning, which is
the authoritative signal that the *output* is broken.

Previously the error message always told the user to "use
--color-conversion-strategy", which is confusing when they already set
one and it didn't help. Crucially, the warning persists for strategies
that don't actually normalize the colorspace -- notably
UseDeviceIndependentColor (confirmed in #1187) -- so silencing the error
for any non-default strategy would emit a silently-broken PDF/A.

Keep raising whenever Ghostscript still reports the warning, regardless
of strategy, but tailor the guidance: if no conversion was requested,
suggest RGB/CMYK/Gray; if a conversion was requested but the warning
persisted, say so and point at strategies that work or --output-type pdf.

Add unit tests (mocked Ghostscript) covering the default case, the
warning-persists-despite-strategy case for both an ineffective strategy
and a normally-effective one, and the no-warning happy path.
This commit is contained in:
jbarlow
2026-06-04 14:48:24 -07:00
committed by GitHub
parent 5d7b5742e4
commit c33f073d4f
3 changed files with 125 additions and 10 deletions

View File

@@ -405,4 +405,8 @@ def generate_pdfa(
for part in stderr.split('****'):
log.error(part)
if _gs_devicen_reported(stderr):
raise ColorConversionNeededError()
# Ghostscript could not normalize the DeviceN colorspace for PDF/A,
# even if the user requested a conversion strategy. The output is
# liable to render blank in some viewers, so raise regardless of the
# strategy and tailor the guidance to what was attempted.
raise ColorConversionNeededError(color_conversion_strategy)

View File

@@ -140,13 +140,42 @@ class TaggedPDFError(InputFileError):
class ColorConversionNeededError(BadArgsError):
"""PDF needs color conversion."""
"""PDF needs color conversion to a standard color space.
message = dedent(
"""\
The input PDF has an unusual color space. Use
--color-conversion-strategy to convert to a common color space
such as RGB, or use --output-type pdf to skip PDF/A conversion
and retain the original color space.
"""
)
Ghostscript reported a DeviceN colorspace with an inappropriate alternate.
The resulting PDF/A is liable to render incorrectly (often blank) in some
viewers such as Adobe Reader, so the colorspace must be normalized to a
common one. RGB, CMYK, and Gray are known to work; LeaveColorUnchanged
performs no conversion and UseDeviceIndependentColor does not resolve the
problem (see https://github.com/ocrmypdf/OCRmyPDF/issues/1187).
"""
# Strategies that can normalize an unusual DeviceN colorspace into one that
# PDF/A viewers render correctly.
_effective_strategies = "RGB, CMYK, or Gray"
def __init__(self, color_conversion_strategy: str = "LeaveColorUnchanged"):
"""Build guidance tailored to the conversion strategy that was used."""
super().__init__()
if color_conversion_strategy == "LeaveColorUnchanged":
self.message = dedent(
f"""\
The input PDF has an unusual DeviceN color space that cannot be
represented in PDF/A; the output may appear blank in some viewers
such as Adobe Reader. Convert it to a common color space with
--color-conversion-strategy ({self._effective_strategies}), or use
--output-type pdf to skip PDF/A conversion and retain the original
color space.
"""
)
else:
self.message = dedent(
f"""\
Color conversion with --color-conversion-strategy
{color_conversion_strategy} did not resolve the input PDF's unusual
DeviceN color space; the output may appear blank in some viewers
such as Adobe Reader. Try a different --color-conversion-strategy
({self._effective_strategies}), or use --output-type pdf to skip
PDF/A conversion and retain the original color space.
"""
)

View File

@@ -327,6 +327,88 @@ def test_ghostscript_mandatory_color_conversion(resources, outpdf):
)
def _run_generate_pdfa_with_devicen_warning(outdir, color_conversion_strategy):
"""Invoke generate_pdfa with Ghostscript mocked to emit the DeviceN warning.
Ghostscript emits this warning when it writes a DeviceN colorspace with an
inappropriate alternate, i.e. when it could not normalize the colorspace for
PDF/A. The output is then liable to render blank in viewers such as Adobe
Reader (see issue #1187), regardless of which conversion strategy was
requested.
"""
(outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n')
with (
patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')),
patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock,
):
run_mock.return_value = subprocess.CompletedProcess(
['gs'],
returncode=0,
stdout='',
stderr='Attempting to write a DeviceN space with an inappropriate '
'alternate, reverting to the alternate color space.',
)
ghostscript.generate_pdfa(
pdf_pages=[outdir / 'input.pdf'],
output_file=outdir / 'out.pdf',
compression='auto',
color_conversion_strategy=color_conversion_strategy,
)
def test_devicen_warning_default_strategy_raises_with_guidance(outdir):
"""Default (no conversion): raise and tell the user to pick a strategy."""
with pytest.raises(ColorConversionNeededError) as exc_info:
_run_generate_pdfa_with_devicen_warning(outdir, 'LeaveColorUnchanged')
message = str(exc_info.value)
assert '--color-conversion-strategy' in message
assert 'RGB' in message
@pytest.mark.parametrize(
'strategy',
[
# A strategy that genuinely cannot fix the colorspace; confirmed in #1187.
'UseDeviceIndependentColor',
# A normally-effective strategy that nonetheless failed on this input:
# if Ghostscript still warns, the output is still broken and we must not
# silently pass it through (the behaviour PR #1692 would have introduced).
'RGB',
],
)
def test_devicen_warning_persists_despite_strategy_still_raises(outdir, strategy):
"""If the warning survives the requested conversion, the output is broken.
We must still raise rather than silently emit a PDF/A that may render blank.
The guidance should acknowledge that the chosen strategy did not work and
point at strategies that do (or --output-type pdf).
"""
with pytest.raises(ColorConversionNeededError) as exc_info:
_run_generate_pdfa_with_devicen_warning(outdir, strategy)
message = str(exc_info.value)
assert strategy in message
assert '--output-type pdf' in message
def test_no_devicen_warning_does_not_raise(outdir):
"""When Ghostscript does not warn, conversion succeeded; never raise."""
(outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n')
with (
patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')),
patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock,
):
run_mock.return_value = subprocess.CompletedProcess(
['gs'], returncode=0, stdout='', stderr=''
)
# Must not raise for any strategy when there is no DeviceN warning.
ghostscript.generate_pdfa(
pdf_pages=[outdir / 'input.pdf'],
output_file=outdir / 'out.pdf',
compression='auto',
color_conversion_strategy='RGB',
)
def test_rasterize_pdf_errors(resources, no_outpdf, caplog):
with patch('ocrmypdf._exec.ghostscript.run') as mock:
# ghostscript can produce empty files with return code 0