diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 609e1e72..36105fb7 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -23,7 +23,7 @@ from contextlib import suppress from datetime import datetime, timezone from pathlib import Path from shutil import copyfileobj -from typing import Dict, Iterable, Optional +from typing import BinaryIO, Dict, Iterable, Optional, Union, cast import img2pdf import pikepdf @@ -817,16 +817,17 @@ def merge_sidecars(txt_files: Iterable[Optional[Path]], context: PdfContext): return output_file -def copy_final(input_file: Path, output_file: Path, _context: PdfContext): +def copy_final(input_file, output_file, _context: PdfContext): log.debug('%s -> %s', input_file, output_file) with open(input_file, 'rb') as input_stream: if output_file == '-': copyfileobj(input_stream, sys.stdout.buffer) sys.stdout.flush() elif hasattr(output_file, 'writable'): - copyfileobj(input_stream, output_file) + output_stream = output_file + copyfileobj(input_stream, output_stream) with suppress(AttributeError): - output_file.flush() + output_stream.flush() else: # At this point we overwrite the output_file specified by the user # use copyfileobj because then we use open() to create the file and diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py index de7ba97c..f64ed497 100644 --- a/src/ocrmypdf/_validation.py +++ b/src/ocrmypdf/_validation.py @@ -342,7 +342,7 @@ def create_input_file(options, work_folder: Path) -> Tuple[Path, str]: if not options.input_file.readable(): raise InputFileError("Input file stream is not readable") log.info('reading file from input stream') - target = os.path.join(work_folder, 'stream') + target = work_folder / 'stream' with open(target, 'wb') as stream_buffer: copyfileobj(options.input_file, stream_buffer) return target, "stream" diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py index 1800f1e4..b9aad6d3 100644 --- a/src/ocrmypdf/pdfinfo/info.py +++ b/src/ocrmypdf/pdfinfo/info.py @@ -25,6 +25,7 @@ from functools import partial from math import hypot, isclose from os import PathLike from pathlib import Path +from typing import Any, Dict, List from warnings import warn import pikepdf @@ -568,7 +569,7 @@ def simplify_textboxes(miner, textbox_getter): def _pdf_get_pageinfo( pdf, pageno: int, infile: PathLike, check_pages, detailed_analysis ): - pageinfo = {} + pageinfo: Dict[str, Any] = {} pageinfo['pageno'] = pageno pageinfo['images'] = []