OCRmyPDF/tests/test_null_ocr_engine.py

# SPDX-FileCopyrightText: 2025 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

"""Unit tests for NullOcrEngine (--ocr-engine none).

Tests verify that the Null OCR engine exists and functions correctly
for scenarios where users want PDF processing without OCR.
"""

from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock

import pytest


class TestNullOcrEngineExists:
    """Test that NullOcrEngine plugin exists and is loadable."""

    def test_null_ocr_module_importable(self):
        """null_ocr module should be importable."""
        from ocrmypdf.builtin_plugins import null_ocr

        assert null_ocr is not None

    def test_null_ocr_engine_class_exists(self):
        """NullOcrEngine class should exist."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        assert NullOcrEngine is not None


class TestNullOcrEngineInterface:
    """Test NullOcrEngine implements OcrEngine interface."""

    def test_version_returns_none(self):
        """NullOcrEngine.version() should return 'none'."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        assert NullOcrEngine.version() == "none"

    def test_creator_tag(self):
        """NullOcrEngine.creator_tag() should indicate no OCR."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        tag = NullOcrEngine.creator_tag(MagicMock())
        tag_lower = tag.lower()
        assert "no ocr" in tag_lower or "null" in tag_lower or "none" in tag_lower

    def test_languages_returns_empty_set(self):
        """NullOcrEngine.languages() should return empty set."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        langs = NullOcrEngine.languages(MagicMock())
        assert langs == set()

    def test_supports_generate_ocr_returns_true(self):
        """NullOcrEngine should support generate_ocr()."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        assert NullOcrEngine.supports_generate_ocr() is True

    def test_get_orientation_returns_zero(self):
        """NullOcrEngine.get_orientation() should return angle=0."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        result = NullOcrEngine.get_orientation(Path("test.png"), MagicMock())
        assert result.angle == 0

    def test_get_deskew_returns_zero(self):
        """NullOcrEngine.get_deskew() should return 0.0."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        result = NullOcrEngine.get_deskew(Path("test.png"), MagicMock())
        assert result == 0.0


class TestNullOcrEngineGenerateOcr:
    """Test NullOcrEngine.generate_ocr() output."""

    @pytest.fixture
    def sample_image(self, tmp_path):
        """Create a simple test image."""
        from PIL import Image

        img_path = tmp_path / "test.png"
        img = Image.new('RGB', (100, 100), color='white')
        img.save(img_path, dpi=(300, 300))
        return img_path

    def test_generate_ocr_returns_tuple(self, sample_image):
        """generate_ocr() should return (OcrElement, str) tuple."""
        from ocrmypdf import OcrElement
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        result = NullOcrEngine.generate_ocr(sample_image, MagicMock(), 0)

        assert isinstance(result, tuple)
        assert len(result) == 2
        assert isinstance(result[0], OcrElement)
        assert isinstance(result[1], str)

    def test_generate_ocr_returns_empty_text(self, sample_image):
        """generate_ocr() should return empty text string."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        _, text = NullOcrEngine.generate_ocr(sample_image, MagicMock(), 0)

        assert text == ""

    def test_generate_ocr_returns_page_element(self, sample_image):
        """generate_ocr() should return OcrElement with ocr_class PAGE."""
        from ocrmypdf import OcrClass
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        ocr_tree, _ = NullOcrEngine.generate_ocr(sample_image, MagicMock(), 0)

        assert ocr_tree.ocr_class == OcrClass.PAGE

    def test_generate_ocr_page_has_correct_dimensions(self, sample_image):
        """generate_ocr() page element should have image dimensions."""
        from ocrmypdf.builtin_plugins.null_ocr import NullOcrEngine

        ocr_tree, _ = NullOcrEngine.generate_ocr(sample_image, MagicMock(), 0)

        # Image is 100x100
        assert ocr_tree.bbox.right == 100
        assert ocr_tree.bbox.bottom == 100


class TestOcrEngineOption:
    """Test --ocr-engine CLI option."""

    def test_ocr_engine_option_accepted(self):
        """CLI should accept --ocr-engine option."""
        from ocrmypdf.cli import get_parser

        parser = get_parser()

        # Should not raise
        args = parser.parse_args(['--ocr-engine', 'none', 'in.pdf', 'out.pdf'])
        assert args.ocr_engine == 'none'

    def test_ocr_engine_choices_include_none(self):
        """--ocr-engine should include 'none' as a choice."""
        from ocrmypdf.cli import get_parser

        parser = get_parser()

        # Find the --ocr-engine action
        for action in parser._actions:
            if '--ocr-engine' in action.option_strings:
                assert 'none' in action.choices
                break
        else:
            pytest.fail("--ocr-engine option not found")

    def test_ocr_engine_choices_include_auto(self):
        """--ocr-engine should include 'auto' as default."""
        from ocrmypdf.cli import get_parser

        parser = get_parser()

        for action in parser._actions:
            if '--ocr-engine' in action.option_strings:
                assert 'auto' in action.choices
                assert action.default == 'auto'
                break