From c33f073d4f37cd54dbcc638beeac3cdb397d4fac Mon Sep 17 00:00:00 2001 From: jbarlow Date: Thu, 4 Jun 2026 14:48:24 -0700 Subject: [PATCH] Improve DeviceN color conversion guidance (#1623) (#1694) When Ghostscript reports a DeviceN colorspace with an inappropriate alternate, the resulting PDF/A may render blank in viewers such as Adobe Reader (#1187). The error is gated on that Ghostscript warning, which is the authoritative signal that the *output* is broken. Previously the error message always told the user to "use --color-conversion-strategy", which is confusing when they already set one and it didn't help. Crucially, the warning persists for strategies that don't actually normalize the colorspace -- notably UseDeviceIndependentColor (confirmed in #1187) -- so silencing the error for any non-default strategy would emit a silently-broken PDF/A. Keep raising whenever Ghostscript still reports the warning, regardless of strategy, but tailor the guidance: if no conversion was requested, suggest RGB/CMYK/Gray; if a conversion was requested but the warning persisted, say so and point at strategies that work or --output-type pdf. Add unit tests (mocked Ghostscript) covering the default case, the warning-persists-despite-strategy case for both an ineffective strategy and a normally-effective one, and the no-warning happy path. --- src/ocrmypdf/_exec/ghostscript.py | 6 ++- src/ocrmypdf/exceptions.py | 47 ++++++++++++++---- tests/test_ghostscript.py | 82 +++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 10 deletions(-) diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py index 6457fe43..5afd6e17 100644 --- a/src/ocrmypdf/_exec/ghostscript.py +++ b/src/ocrmypdf/_exec/ghostscript.py @@ -405,4 +405,8 @@ def generate_pdfa( for part in stderr.split('****'): log.error(part) if _gs_devicen_reported(stderr): - raise ColorConversionNeededError() + # Ghostscript could not normalize the DeviceN colorspace for PDF/A, + # even if the user requested a conversion strategy. The output is + # liable to render blank in some viewers, so raise regardless of the + # strategy and tailor the guidance to what was attempted. + raise ColorConversionNeededError(color_conversion_strategy) diff --git a/src/ocrmypdf/exceptions.py b/src/ocrmypdf/exceptions.py index fd221e87..6f3b3af4 100644 --- a/src/ocrmypdf/exceptions.py +++ b/src/ocrmypdf/exceptions.py @@ -140,13 +140,42 @@ class TaggedPDFError(InputFileError): class ColorConversionNeededError(BadArgsError): - """PDF needs color conversion.""" + """PDF needs color conversion to a standard color space. - message = dedent( - """\ - The input PDF has an unusual color space. Use - --color-conversion-strategy to convert to a common color space - such as RGB, or use --output-type pdf to skip PDF/A conversion - and retain the original color space. - """ - ) + Ghostscript reported a DeviceN colorspace with an inappropriate alternate. + The resulting PDF/A is liable to render incorrectly (often blank) in some + viewers such as Adobe Reader, so the colorspace must be normalized to a + common one. RGB, CMYK, and Gray are known to work; LeaveColorUnchanged + performs no conversion and UseDeviceIndependentColor does not resolve the + problem (see https://github.com/ocrmypdf/OCRmyPDF/issues/1187). + """ + + # Strategies that can normalize an unusual DeviceN colorspace into one that + # PDF/A viewers render correctly. + _effective_strategies = "RGB, CMYK, or Gray" + + def __init__(self, color_conversion_strategy: str = "LeaveColorUnchanged"): + """Build guidance tailored to the conversion strategy that was used.""" + super().__init__() + if color_conversion_strategy == "LeaveColorUnchanged": + self.message = dedent( + f"""\ + The input PDF has an unusual DeviceN color space that cannot be + represented in PDF/A; the output may appear blank in some viewers + such as Adobe Reader. Convert it to a common color space with + --color-conversion-strategy ({self._effective_strategies}), or use + --output-type pdf to skip PDF/A conversion and retain the original + color space. + """ + ) + else: + self.message = dedent( + f"""\ + Color conversion with --color-conversion-strategy + {color_conversion_strategy} did not resolve the input PDF's unusual + DeviceN color space; the output may appear blank in some viewers + such as Adobe Reader. Try a different --color-conversion-strategy + ({self._effective_strategies}), or use --output-type pdf to skip + PDF/A conversion and retain the original color space. + """ + ) diff --git a/tests/test_ghostscript.py b/tests/test_ghostscript.py index 21f73420..81fc69dd 100644 --- a/tests/test_ghostscript.py +++ b/tests/test_ghostscript.py @@ -327,6 +327,88 @@ def test_ghostscript_mandatory_color_conversion(resources, outpdf): ) +def _run_generate_pdfa_with_devicen_warning(outdir, color_conversion_strategy): + """Invoke generate_pdfa with Ghostscript mocked to emit the DeviceN warning. + + Ghostscript emits this warning when it writes a DeviceN colorspace with an + inappropriate alternate, i.e. when it could not normalize the colorspace for + PDF/A. The output is then liable to render blank in viewers such as Adobe + Reader (see issue #1187), regardless of which conversion strategy was + requested. + """ + (outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n') + with ( + patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')), + patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock, + ): + run_mock.return_value = subprocess.CompletedProcess( + ['gs'], + returncode=0, + stdout='', + stderr='Attempting to write a DeviceN space with an inappropriate ' + 'alternate, reverting to the alternate color space.', + ) + ghostscript.generate_pdfa( + pdf_pages=[outdir / 'input.pdf'], + output_file=outdir / 'out.pdf', + compression='auto', + color_conversion_strategy=color_conversion_strategy, + ) + + +def test_devicen_warning_default_strategy_raises_with_guidance(outdir): + """Default (no conversion): raise and tell the user to pick a strategy.""" + with pytest.raises(ColorConversionNeededError) as exc_info: + _run_generate_pdfa_with_devicen_warning(outdir, 'LeaveColorUnchanged') + message = str(exc_info.value) + assert '--color-conversion-strategy' in message + assert 'RGB' in message + + +@pytest.mark.parametrize( + 'strategy', + [ + # A strategy that genuinely cannot fix the colorspace; confirmed in #1187. + 'UseDeviceIndependentColor', + # A normally-effective strategy that nonetheless failed on this input: + # if Ghostscript still warns, the output is still broken and we must not + # silently pass it through (the behaviour PR #1692 would have introduced). + 'RGB', + ], +) +def test_devicen_warning_persists_despite_strategy_still_raises(outdir, strategy): + """If the warning survives the requested conversion, the output is broken. + + We must still raise rather than silently emit a PDF/A that may render blank. + The guidance should acknowledge that the chosen strategy did not work and + point at strategies that do (or --output-type pdf). + """ + with pytest.raises(ColorConversionNeededError) as exc_info: + _run_generate_pdfa_with_devicen_warning(outdir, strategy) + message = str(exc_info.value) + assert strategy in message + assert '--output-type pdf' in message + + +def test_no_devicen_warning_does_not_raise(outdir): + """When Ghostscript does not warn, conversion succeeded; never raise.""" + (outdir / 'input.pdf').write_bytes(b'%PDF-1.5\n%fake\n') + with ( + patch('ocrmypdf._exec.ghostscript.version', return_value=Version('10.05.1')), + patch('ocrmypdf._exec.ghostscript.run_polling_stderr') as run_mock, + ): + run_mock.return_value = subprocess.CompletedProcess( + ['gs'], returncode=0, stdout='', stderr='' + ) + # Must not raise for any strategy when there is no DeviceN warning. + ghostscript.generate_pdfa( + pdf_pages=[outdir / 'input.pdf'], + output_file=outdir / 'out.pdf', + compression='auto', + color_conversion_strategy='RGB', + ) + + def test_rasterize_pdf_errors(resources, no_outpdf, caplog): with patch('ocrmypdf._exec.ghostscript.run') as mock: # ghostscript can produce empty files with return code 0