From 700abbb8a51790bb1fba62c6c13fbc8f933aa92a Mon Sep 17 00:00:00 2001
From: "James R. Barlow" <jim@purplerock.ca>
Date: Sat, 10 Nov 2018 15:48:41 -0800
Subject: [PATCH] Documentation for OCR quality features

---
 .gitignore               | 1 +
 docs/release_notes.rst   | 8 +++++++-
 src/ocrmypdf/__main__.py | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 610fe588..e545c422 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ htmlcov/
 *.profile
 /*.pdf
 /*.qdf
+/*.png
 /scratch.py
 IDEAS
 log/
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index b6be039f..d36cf4d4 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -29,7 +29,13 @@ v7.3.0
 
     -   OCRmyPDF now warns when a PDF that contains Adobe AcroForms, since such files probably do not need OCR. It can work with these files.
 
--   Added a new feature ``--mask-barcodes`` to detect and suppress barcodes in files. We have observed that barcodes can interfere with OCR.
+-   Added three new **experimental** features. The name, syntax and behavior of these arguments is subject to change. They may also be incompatible with some other features.
+
+    -   ``--remove-vectors`` which strips out vector graphics. This can improve OCR quality since OCR will not search artwork for readable text; however, it currently removes "text as curves" as well.
+
+    -   ``--mask-barcodes`` to detect and suppress barcodes in files. We have observed that barcodes can interfere with OCR.
+
+    -   ``--threshold`` which uses a more sophisticated thresholding algorithm than is currently in use in Tesseract OCR. This works around a `known issue in Tesseract <https://github.com/tesseract-ocr/tesseract/issues/1990>`_ with text on bright backgrounds.
 
 -   Fixed an issue where an error message was not reported when the installed Ghostscript was very old.
 
diff --git a/src/ocrmypdf/__main__.py b/src/ocrmypdf/__main__.py
index 2caf45a4..52ae8629 100755
--- a/src/ocrmypdf/__main__.py
+++ b/src/ocrmypdf/__main__.py
@@ -250,12 +250,12 @@ preprocessing.add_argument(
          "will not be included in OCR. This can eliminate false characters.")
 preprocessing.add_argument(
     '--mask-barcodes', action='store_true',
-    help="Mask out any barcodes that appear in the PDF so they are not "
+    help="EXPERIMENTAL. Mask out any barcodes that appear in the PDF so they are not "
          "considered during OCR. Barcodes can introduce false characters into "
          "OCR.")
 preprocessing.add_argument(
     '--threshold', action='store_true',
-    help="Threshold image to 1bpp before sending it to Tesseract for OCR. Can "
+    help="EXPERIMENTAL. Threshold image to 1bpp before sending it to Tesseract for OCR. Can "
          "improve OCR quality compared to Tesseract's thresholder.")
 
 ocrsettings = parser.add_argument_group(