From 86ec63f215a0e7d3167c4fa9dbffeb5bda9aaace Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Mon, 22 Jun 2020 01:07:22 -0700 Subject: [PATCH] Decouple plugin manager forking from PdfContext/Pagecontext --- src/ocrmypdf/_jobcontext.py | 12 +-------- src/ocrmypdf/_plugin_manager.py | 48 ++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/ocrmypdf/_jobcontext.py b/src/ocrmypdf/_jobcontext.py index 37e541ce..4cb910a1 100644 --- a/src/ocrmypdf/_jobcontext.py +++ b/src/ocrmypdf/_jobcontext.py @@ -22,7 +22,6 @@ from argparse import Namespace from pathlib import Path from typing import Iterator -from ocrmypdf._plugin_manager import get_plugin_manager from ocrmypdf.pdfinfo import PdfInfo @@ -49,7 +48,7 @@ class PdfContext: def get_path(self, name: str) -> Path: return self.work_folder / name - def get_page_contexts(self) -> Iterator[PageContext]: + def get_page_contexts(self) -> Iterator['PageContext']: npages = len(self.pdfinfo) for n in range(npages): yield PageContext(self, n) @@ -74,15 +73,6 @@ class PageContext: def get_path(self, name: str) -> Path: return self.work_folder / ("%06d_%s" % (self.pageno + 1, name)) - def __getstate__(self): - state = self.__dict__.copy() - state['plugin_manager'] = None # We cannot serialize the plugin manager... - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.plugin_manager = get_plugin_manager(self.options.plugins) - def cleanup_working_files(work_folder: Path, options: Namespace): if options.keep_temporary_files: diff --git a/src/ocrmypdf/_plugin_manager.py b/src/ocrmypdf/_plugin_manager.py index 5bc1ab60..ba867525 100644 --- a/src/ocrmypdf/_plugin_manager.py +++ b/src/ocrmypdf/_plugin_manager.py @@ -19,8 +19,9 @@ import argparse import importlib import importlib.util import sys +from functools import partial from pathlib import Path -from typing import List, Tuple +from typing import Callable, List, Tuple import pluggy @@ -28,8 +29,42 @@ from ocrmypdf import pluginspec from ocrmypdf.cli import get_parser, plugins_only_parser -def get_plugin_manager(plugins: List[str], builtins=True): - pm = pluggy.PluginManager('ocrmypdf') +class OcrmypdfPluginManager(pluggy.PluginManager): + """pluggy.PluginManager that can fork. + + Capable of reconstructing itself in child workers. + + Arguments: + setup_func: callback that initializes the plugin manager with all + standard plugins + """ + + def __init__( + self, *args, setup_func: Callable[[pluggy.PluginManager], None], **kwargs + ): + self._init_args = args + self._setup_func = setup_func + self._init_kwargs = kwargs + super().__init__(*args, **kwargs) + setup_func(self) + + def __getstate__(self): + state = dict( + _init_args=self._init_args, + _setup_func=self._setup_func, + _init_kwargs=self._init_kwargs, + ) + return state + + def __setstate__(self, state): + self.__init__( + *state['_init_args'], + setup_func=state['_setup_func'], + **state['_init_kwargs'], + ) + + +def _setup_plugins(pm: pluggy.PluginManager, plugins: List[str], builtins: bool = True): pm.add_hookspecs(pluginspec) if builtins: @@ -51,6 +86,13 @@ def get_plugin_manager(plugins: List[str], builtins=True): # Import by dotted module name module = importlib.import_module(name) pm.register(module) + + +def get_plugin_manager(plugins: List[str], builtins=True): + pm = OcrmypdfPluginManager( + project_name='ocrmypdf', + setup_func=partial(_setup_plugins, plugins=plugins, builtins=builtins), + ) return pm