From 22298b31becda46fd444420a4c8d9937d2e4644e Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Thu, 23 May 2019 01:19:58 -0700 Subject: [PATCH] Fix distinction between clean and clean_final lost in API refactor --- src/ocrmypdf/_sync.py | 19 ++++++++++++++----- src/ocrmypdf/_validation.py | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/ocrmypdf/_sync.py b/src/ocrmypdf/_sync.py index a1a3f083..45ccdca9 100644 --- a/src/ocrmypdf/_sync.py +++ b/src/ocrmypdf/_sync.py @@ -92,17 +92,26 @@ def exec_page_sync(page_context): page_context.origin, page_context, correction=orientation_correction ) - preprocess_out = rasterize_out + preprocess = rasterize_out if options.remove_background: - preprocess_out = preprocess_remove_background(preprocess_out, page_context) + preprocess = preprocess_remove_background(preprocess, page_context) if options.deskew: - preprocess_out = preprocess_deskew(preprocess_out, page_context) + preprocess = preprocess_deskew(preprocess, page_context) if options.clean: - preprocess_out = preprocess_clean(preprocess_out, page_context) + cleaned = preprocess_clean(preprocess, page_context) + if options.clean_final: + preprocess_out = cleaned + ocr_image = cleaned + else: + preprocess_out = preprocess + ocr_image = cleaned + else: + preprocess_out = preprocess + ocr_image = preprocess - ocr_image_out = create_ocr_image(preprocess_out, page_context) + ocr_image_out = create_ocr_image(ocr_image, page_context) pdf_page_from_image_out = None if not options.lossless_reconstruction: diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py index 07a8c481..3e0e3526 100644 --- a/src/ocrmypdf/_validation.py +++ b/src/ocrmypdf/_validation.py @@ -136,7 +136,7 @@ def check_options_sidecar(options): if options.sidecar == '\0': if options.output_file == '-': raise BadArgsError( - "--sidecar filename must be specified when output file is " "stdout." + "--sidecar filename must be specified when output file is stdout." ) options.sidecar = options.output_file + '.txt'