mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-02-15 08:42:25 -05:00
Replace GPLv3-derived PDF/A template with PostScript generator
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# © 2015 James R. Barlow: github.com/jbarlow83
|
||||
# © 2020 James R. Barlow: github.com/jbarlow83
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
@@ -11,8 +11,7 @@ Utilities for PDF/A production and confirmation with Ghostspcript.
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from string import Template
|
||||
from typing import Dict, Union
|
||||
from typing import Dict, Iterator, Union
|
||||
|
||||
import pikepdf
|
||||
import pkg_resources
|
||||
@@ -22,29 +21,56 @@ ICC_PROFILE_RELPATH = 'data/sRGB.icc'
|
||||
SRGB_ICC_PROFILE = pkg_resources.resource_filename('ocrmypdf', ICC_PROFILE_RELPATH)
|
||||
|
||||
|
||||
# This is a template written in PostScript which is needed to create PDF/A
|
||||
# files, from the Ghostscript documentation. Lines beginning with % are
|
||||
# comments. Python substitution variables have a '$' prefix.
|
||||
pdfa_def_template = u"""%!
|
||||
% Define an ICC profile :
|
||||
/ICCProfile $icc_profile
|
||||
def
|
||||
def _postscript_objdef(
|
||||
alias: str,
|
||||
dictionary: Dict[str, str],
|
||||
*,
|
||||
stream_name: str = None,
|
||||
stream_data: bytes = None,
|
||||
) -> Iterator[str]:
|
||||
assert (stream_name is None) == (stream_data is None)
|
||||
|
||||
[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
|
||||
[{icc_PDFA} << /N 3 >> /PUT pdfmark
|
||||
[{icc_PDFA} ICCProfile /PUT pdfmark
|
||||
objtype = '/stream' if stream_name else '/dict'
|
||||
|
||||
% Define the output intent dictionary :
|
||||
if stream_name:
|
||||
a85_data = base64.a85encode(stream_data, adobe=True).decode('ascii')
|
||||
yield f'{stream_name} ' + a85_data
|
||||
yield 'def'
|
||||
|
||||
[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
|
||||
[{OutputIntent_PDFA} <<
|
||||
/Type /OutputIntent % Must be so (the standard requires).
|
||||
/S /GTS_PDFA1 % Must be so (the standard requires).
|
||||
/DestOutputProfile {icc_PDFA} % Must be so (see above).
|
||||
/OutputConditionIdentifier ($icc_identifier)
|
||||
>> /PUT pdfmark
|
||||
[{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
|
||||
"""
|
||||
if alias != '{Catalog}': # Catalog needs no definition
|
||||
yield f'[/_objdef {alias} /type {objtype} /OBJ pdfmark'
|
||||
|
||||
yield f'[{alias} <<'
|
||||
for key, val in dictionary.items():
|
||||
yield f' {key} {val}'
|
||||
yield '>> /PUT pdfmark'
|
||||
|
||||
if stream_name:
|
||||
yield f'[{alias} {stream_name[1:]} /PUT pdfmark'
|
||||
|
||||
|
||||
def _make_postscript(icc_name: str, icc_data: bytes, colors: int) -> Iterator[str]:
|
||||
yield '%!'
|
||||
yield from _postscript_objdef(
|
||||
'{icc_PDFA}', # Not an f-string
|
||||
{'/N': str(colors)},
|
||||
stream_name='/ICCProfile',
|
||||
stream_data=icc_data,
|
||||
)
|
||||
yield ''
|
||||
yield from _postscript_objdef(
|
||||
'{OutputIntent_PDFA}',
|
||||
{
|
||||
'/Type': '/OutputIntent',
|
||||
'/S': '/GTS_PDFA1',
|
||||
'/DestOutputProfile': '{icc_PDFA}',
|
||||
'/OutputConditionIdentifier': f'({icc_name})', # Only f-string
|
||||
},
|
||||
)
|
||||
yield ''
|
||||
yield from _postscript_objdef(
|
||||
'{Catalog}', {'/OutputIntents': '[ {OutputIntent_PDFA} ]'}
|
||||
)
|
||||
|
||||
|
||||
def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
|
||||
@@ -75,13 +101,8 @@ def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
|
||||
else:
|
||||
raise NotImplementedError("Only supporting sRGB")
|
||||
|
||||
# Read the ICC profile, encode as ASCII85 and convert to a string which we
|
||||
# will insert in the .ps file
|
||||
bytes_icc_profile = Path(icc_profile).read_bytes()
|
||||
icc_profile = base64.a85encode(bytes_icc_profile, adobe=True).decode('ascii')
|
||||
|
||||
t = Template(pdfa_def_template)
|
||||
ps = t.substitute(icc_profile=icc_profile, icc_identifier=icc)
|
||||
ps = '\n'.join(_make_postscript(icc, bytes_icc_profile, 3))
|
||||
|
||||
# We should have encoded everything to pure ASCII by this point, and
|
||||
# to be safe, only allow ASCII in PostScript
|
||||
|
||||
Reference in New Issue
Block a user