diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
index bb2f7e18..3be65faa 100644
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@@ -147,11 +147,9 @@ def triage(original_filename, input_file, output_file, options):
     return output_file
 
 
-def get_pdfinfo(input_file, detailed_page_analysis=False, progbar=False):
+def get_pdfinfo(input_file, progbar=False):
     try:
-        return PdfInfo(
-            input_file, detailed_page_analysis=detailed_page_analysis, progbar=progbar
-        )
+        return PdfInfo(input_file, progbar=progbar)
     except pikepdf.PasswordError:
         raise EncryptedPdfError()
     except pikepdf.PdfError:
diff --git a/src/ocrmypdf/_sync.py b/src/ocrmypdf/_sync.py
index f54c32a6..9f5c65d8 100644
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@@ -326,11 +326,7 @@ def run_pipeline(options, api=False):
         )
 
         # Gather pdfinfo and create context
-        pdfinfo = get_pdfinfo(
-            origin_pdf,
-            detailed_page_analysis=options.redo_ocr,
-            progbar=options.progress_bar,
-        )
+        pdfinfo = get_pdfinfo(origin_pdf, progbar=options.progress_bar)
 
         context = PDFContext(options, work_folder, origin_pdf, pdfinfo)
 
diff --git a/src/ocrmypdf/exec/ghostscript.py b/src/ocrmypdf/exec/ghostscript.py
index 5c27488f..44b81682 100644
--- a/src/ocrmypdf/exec/ghostscript.py
+++ b/src/ocrmypdf/exec/ghostscript.py
@@ -83,55 +83,6 @@ def _gs_error_reported(stream) -> bool:
     return re.search(r'error', stream, flags=re.IGNORECASE)
 
 
-def extract_text(input_file, pageno=1):
-    """Use the txtwrite device to get text layout information out
-
-    For details on options of -dTextFormat see
-    https://www.ghostscript.com/doc/current/VectorDevices.htm#TXT
-
-    Format is like
-    <page>
-    <line>
-    <span bbox="left top right bottom" font="..." size="...">
-    <char bbox="...." c="X"/>
-
-    :param pageno: number of page to extract, or all pages if None
-    :return: XML-ish text representation in bytes
-    """
-
-    if pageno is not None:
-        pages = ['-dFirstPage=%i' % pageno, '-dLastPage=%i' % pageno]
-    else:
-        pages = []
-
-    # Note due to bug https://bugs.ghostscript.com/show_bug.cgi?id=701971
-    # Ghostscript <= 9.50 will truncate output unless we write to stdout, so
-    # don't write to a file.
-    args_gs = (
-        [
-            GS,
-            '-dQUIET',
-            '-dSAFER',
-            '-dBATCH',
-            '-dNOPAUSE',
-            '-sDEVICE=txtwrite',
-            '-dTextFormat=0',
-        ]
-        + pages
-        + ['-o', '-', fspath(input_file), "-sstdout=%stderr"]
-    )
-
-    try:
-        p = run(args_gs, stdout=PIPE, stderr=PIPE, check=True)
-    except CalledProcessError as e:
-        raise SubprocessOutputError(
-            'Ghostscript text extraction failed\n%s\n%s'
-            % (input_file, e.stderr.decode(errors='replace'))
-        )
-
-    return p.stdout
-
-
 def rasterize_pdf(
     input_file: os.PathLike,
     output_file: os.PathLike,
diff --git a/src/ocrmypdf/pdfinfo/ghosttext.py b/src/ocrmypdf/pdfinfo/ghosttext.py
deleted file mode 100644
index 07e72f19..00000000
--- a/src/ocrmypdf/pdfinfo/ghosttext.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# © 2018 James R. Barlow: github.com/jbarlow83
-#
-# This file is part of OCRmyPDF.
-#
-# OCRmyPDF is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# OCRmyPDF is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.
-
-import logging
-import re
-import xml.etree.ElementTree as ET
-
-from ..exec import ghostscript
-
-log = logging.getLogger(__name__)
-
-# Forgive me for I have sinned
-# I am using regular expressions to parse XML. However the XML in this case,
-# generated by Ghostscript, is self-consistent enough to be parseable.
-regex_remove_char_tags = re.compile(
-    br"""
-    <char\b
-    (?:   [^>]   # anything single character but >
-        | \">\"  # special case: trap ">"
-    )*
-    />           # terminate with '/>'
-""",
-    re.VERBOSE,
-)
-
-
-def page_get_textblocks(infile, pageno, xmltext, height):
-    """Get text boxes out of Ghostscript txtwrite xml"""
-
-    root = xmltext
-    if not hasattr(xmltext, 'findall'):
-        return []
-
-    def blocks():
-        for span in root.findall('.//span'):
-            bbox_str = span.attrib['bbox']
-            font_size = span.attrib['size']
-            pts = [int(pt) for pt in bbox_str.split()]
-            pts[1] = pts[1] - int(float(font_size) + 0.5)
-            bbox_topdown = tuple(pts)
-            bb = bbox_topdown
-            bbox_bottomup = (bb[0], height - bb[3], bb[2], height - bb[1])
-            yield bbox_bottomup
-
-    def joined_blocks():
-        prev = None
-        for bbox in blocks():
-            if prev is None:
-                prev = bbox
-            if bbox[1] == prev[1] and bbox[3] == prev[3]:
-                gap = prev[2] - bbox[0]
-                height = abs(bbox[3] - bbox[1])
-                if gap < height:
-                    # Join boxes
-                    prev = (prev[0], prev[1], bbox[2], bbox[3])
-                    continue
-            # yield previously joined bboxes and start anew
-            yield prev
-            prev = bbox
-        if prev is not None:
-            yield prev
-
-    return [block for block in joined_blocks()]
-
-
-def extract_text_xml(infile, pdf, pageno=None):
-    existing_text = ghostscript.extract_text(infile, pageno=None)
-    existing_text = regex_remove_char_tags.sub(b' ', existing_text)
-
-    try:
-        root = ET.fromstringlist([b'<document>\n', existing_text, b'</document>\n'])
-        page_xml = root.findall('page')
-    except ET.ParseError as e:
-        log.error(
-            "An error occurred while attempting to retrieve existing text in "
-            "the input file. Will attempt to continue assuming that there is "
-            "no existing text in the file. The error was:"
-        )
-        log.error(e)
-        page_xml = [None] * len(pdf.pages)
-
-    page_count_difference = len(pdf.pages) - len(page_xml)
-    if page_count_difference != 0:
-        log.error("The number of pages in the input file is inconsistent.")
-        log.error(f"Expected {len(pdf.pages)}, txtwrite says {len(page_xml)}")
-        if page_count_difference > 0:
-            page_xml.extend([None] * page_count_difference)
-    return page_xml
diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py
index bb232ee5..bd7641c4 100644
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@@ -33,9 +33,7 @@ from pikepdf import PdfMatrix
 
 from ocrmypdf._concurrent import exec_progress_pool
 from ocrmypdf.exceptions import EncryptedPdfError
-from ocrmypdf.exec import ghostscript
 from ocrmypdf.helpers import Resolution
-from ocrmypdf.pdfinfo import ghosttext
 from ocrmypdf.pdfinfo.layout import get_page_analysis, get_text_boxes
 
 logger = logging.getLogger()
@@ -557,7 +555,7 @@ def simplify_textboxes(miner, textbox_getter):
         yield TextboxInfo(box.bbox, visible, corrupt)
 
 
-def _pdf_get_pageinfo(pdf, pageno: int, infile: PathLike, xmltext: str):
+def _pdf_get_pageinfo(pdf, pageno: int, infile: PathLike):
     pageinfo = {}
     pageinfo['pageno'] = pageno
     pageinfo['images'] = []
@@ -567,16 +565,10 @@ def _pdf_get_pageinfo(pdf, pageno: int, infile: PathLike, xmltext: str):
     width_pt = mediabox[2] - mediabox[0]
     height_pt = mediabox[3] - mediabox[1]
 
-    if xmltext is not None:
-        bboxes = ghosttext.page_get_textblocks(
-            fspath(infile), pageno, xmltext=xmltext, height=height_pt
-        )
-        pageinfo['bboxes'] = bboxes
-    else:
-        pscript5_mode = str(pdf.docinfo.get('/Creator')).startswith('PScript5')
-        miner = get_page_analysis(infile, pageno, pscript5_mode)
-        pageinfo['textboxes'] = list(simplify_textboxes(miner, get_text_boxes))
-        bboxes = (box.bbox for box in pageinfo['textboxes'])
+    pscript5_mode = str(pdf.docinfo.get('/Creator')).startswith('PScript5')
+    miner = get_page_analysis(infile, pageno, pscript5_mode)
+    pageinfo['textboxes'] = list(simplify_textboxes(miner, get_text_boxes))
+    bboxes = (box.bbox for box in pageinfo['textboxes'])
 
     pageinfo['has_text'] = _page_has_text(bboxes, width_pt, height_pt)
 
@@ -624,12 +616,12 @@ def _pdf_pageinfo_sync_init(infile):
 
 def _pdf_pageinfo_sync(args):
     global worker_pdf  # pylint: disable=global-statement
-    pageno, infile, xmltext, detailed_analysis = args
-    page = PageInfo(worker_pdf, pageno, infile, xmltext, detailed_analysis)
+    pageno, infile = args
+    page = PageInfo(worker_pdf, pageno, infile)
     return page
 
 
-def _pdf_pageinfo_concurrent(pdf, infile, pages_xml, detailed_analysis, progbar):
+def _pdf_pageinfo_concurrent(pdf, infile, progbar):
     pages = [None] * len(pdf.pages)
 
     def update_pageinfo(result, pbar):
@@ -637,11 +629,7 @@ def _pdf_pageinfo_concurrent(pdf, infile, pages_xml, detailed_analysis, progbar)
         pages[page.pageno] = page
         pbar.update()
 
-    contexts = (
-        (n, infile, pages_xml[n] if pages_xml else None, detailed_analysis)
-        for n in range(len(pdf.pages))
-    )
-
+    contexts = ((n, infile) for n in range(len(pdf.pages)))
     if os.name == 'nt':
         # We can't parallelize on Windows, because Windows cannot fork.
         # We are trying to fork, then take advantage of the preloaded pikepdf.Pdf
@@ -668,19 +656,12 @@ def _pdf_pageinfo_concurrent(pdf, infile, pages_xml, detailed_analysis, progbar)
     return pages
 
 
-def _pdf_get_all_pageinfo(infile, detailed_analysis=False, progbar=False):
+def _pdf_get_all_pageinfo(infile, progbar=False):
     pdf = pikepdf.open(infile)  # Do not close in this function
     try:
         if pdf.is_encrypted:
             raise EncryptedPdfError()  # Triggered by encryption with empty passwd
-        if detailed_analysis:
-            pages_xml = None
-        else:
-            pages_xml = ghosttext.extract_text_xml(infile, pdf, pageno=None)
-
-        pages = _pdf_pageinfo_concurrent(
-            pdf, infile, pages_xml, detailed_analysis, progbar
-        )
+        pages = _pdf_pageinfo_concurrent(pdf, infile, progbar)
     except Exception:
         pdf.close()
         raise
@@ -689,11 +670,10 @@ def _pdf_get_all_pageinfo(infile, detailed_analysis=False, progbar=False):
 
 
 class PageInfo:
-    def __init__(self, pdf, pageno, infile, xmltext, detailed_analysis=False):
+    def __init__(self, pdf, pageno, infile):
         self._pageno = pageno
         self._infile = infile
-        self._pageinfo = _pdf_get_pageinfo(pdf, pageno, infile, xmltext)
-        self._detailed_analysis = detailed_analysis
+        self._pageinfo = _pdf_get_pageinfo(pdf, pageno, infile)
 
     @property
     def pageno(self):
@@ -705,8 +685,6 @@ class PageInfo:
 
     @property
     def has_corrupt_text(self):
-        if not self._detailed_analysis:
-            raise NotImplementedError('Did not do detailed analysis')
         return any(tbox.is_corrupt for tbox in self._pageinfo['textboxes'])
 
     @property
@@ -757,7 +735,7 @@ class PageInfo:
 
         if 'textboxes' not in self._pageinfo:
             if visible is not None and corrupt is not None:
-                raise NotImplementedError('Ghostscript textboxes cannot be classified')
+                raise NotImplementedError('Incomplete information on textboxes')
             return self._pageinfo['bboxes']
 
         return (
@@ -792,13 +770,9 @@ class PageInfo:
 class PdfInfo:
     """Get summary information about a PDF"""
 
-    def __init__(self, infile, detailed_page_analysis=False, progbar=False):
+    def __init__(self, infile, progbar=False):
         self._infile = infile
-        if ghostscript.version() in ('9.52',):
-            detailed_page_analysis = True  # txtwrite doesn't work in these versions
-        self._pages, pdf = _pdf_get_all_pageinfo(
-            infile, detailed_page_analysis, progbar=progbar
-        )
+        self._pages, pdf = _pdf_get_all_pageinfo(infile, progbar=progbar)
         self._needs_rendering = pdf.root.get('/NeedsRendering', False)
         self._has_acroform = False
         if '/AcroForm' in pdf.root:
diff --git a/tests/cache/manifest.jsonl b/tests/cache/manifest.jsonl
index 05a0e86c..23e50166 100644
--- a/tests/cache/manifest.jsonl
+++ b/tests/cache/manifest.jsonl
@@ -69,3 +69,4 @@
 {"tesseract_version": "tesseract 4.1.0  leptonica-1.78.0   libgif 5.1.4 : libjpeg 9c : libpng 1.6.37 : libtiff 4.1.0 : zlib 1.2.11 : libwebp 1.0.3 : libopenjp2 2.3.1  Found AVX2  Found AVX  Found SSE ", "platform": "Darwin-18.7.0-x86_64-i386-64bit", "python": "3.7.5", "argv_slug": "__-l__eng__000002_ocr.png__000002_ocr_tess__pdf__txt", "sourcefile": "resources/3small.pdf", "args": ["-l", "eng", "-c", "textonly_pdf=1", "$TMPDIR/000002_ocr.png", "$TMPDIR/000002_ocr_tess", "pdf", "txt"]}
 {"tesseract_version": "tesseract 4.1.0  leptonica-1.78.0   libgif 5.1.4 : libjpeg 9c : libpng 1.6.37 : libtiff 4.1.0 : zlib 1.2.11 : libwebp 1.0.3 : libopenjp2 2.3.1  Found AVX2  Found AVX  Found SSE ", "platform": "Darwin-18.7.0-x86_64-i386-64bit", "python": "3.7.5", "argv_slug": "__-l__eng__000001_ocr.png__000001_ocr_tess__pdf__txt", "sourcefile": "resources/3small.pdf", "args": ["-l", "eng", "-c", "textonly_pdf=1", "$TMPDIR/000001_ocr.png", "$TMPDIR/000001_ocr_tess", "pdf", "txt"]}
 {"tesseract_version": "tesseract 4.1.0  leptonica-1.78.0   libgif 5.1.4 : libjpeg 9c : libpng 1.6.37 : libtiff 4.1.0 : zlib 1.2.11 : libwebp 1.0.3 : libopenjp2 2.3.1  Found AVX2  Found AVX  Found SSE ", "platform": "Darwin-18.7.0-x86_64-i386-64bit", "python": "3.7.5", "argv_slug": "__-l__eng__000003_ocr.png__000003_ocr_tess__pdf__txt", "sourcefile": "resources/3small.pdf", "args": ["-l", "eng", "-c", "textonly_pdf=1", "$TMPDIR/000003_ocr.png", "$TMPDIR/000003_ocr_tess", "pdf", "txt"]}
+{"tesseract_version": "tesseract 4.1.1  leptonica-1.79.0   libgif 5.2.1 : libjpeg 9d : libpng 1.6.37 : libtiff 4.1.0 : zlib 1.2.11 : libwebp 1.1.0 : libopenjp2 2.3.1  Found AVX2  Found AVX  Found FMA  Found SSE ", "platform": "Darwin-18.7.0-x86_64-i386-64bit", "python": "3.7.7", "argv_slug": "__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt", "sourcefile": "resources/multipage.pdf", "args": ["-l", "eng", "$TMPDIR/000002_ocr.png", "$TMPDIR/000002_ocr_hocr", "hocr", "txt"]}
diff --git a/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/hocr.bin b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/hocr.bin
new file mode 100644
index 00000000..27e769f6
Binary files /dev/null and b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/hocr.bin differ
diff --git a/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/stderr.bin b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/stderr.bin
new file mode 100644
index 00000000..16b617e5
Binary files /dev/null and b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/stderr.bin differ
diff --git a/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/stdout.bin b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/stdout.bin
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/txt.bin b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/txt.bin
new file mode 100644
index 00000000..21e1e995
Binary files /dev/null and b/tests/cache/multipage/__-l__eng__000002_ocr.png__000002_ocr_hocr__hocr__txt/txt.bin differ
diff --git a/tests/test_main.py b/tests/test_main.py
index d4146ccf..9b0cd17e 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -101,10 +101,10 @@ def test_skip_ocr(spoof_tesseract_cache, resources, outpdf):
 
 def test_redo_ocr(resources, outpdf):
     in_ = resources / 'graph_ocred.pdf'
-    before = PdfInfo(in_, detailed_page_analysis=True)
+    before = PdfInfo(in_)
     out = outpdf
     out = check_ocrmypdf(in_, out, '--redo-ocr')
-    after = PdfInfo(out, detailed_page_analysis=True)
+    after = PdfInfo(out)
     assert before[0].has_text and after[0].has_text
     assert (
         before[0].get_textareas() != after[0].get_textareas()
diff --git a/tests/test_pdfinfo.py b/tests/test_pdfinfo.py
index 13fb8a8b..cfa90d94 100644
--- a/tests/test_pdfinfo.py
+++ b/tests/test_pdfinfo.py
@@ -151,22 +151,6 @@ def test_pickle(resources):
     pickle.dumps(pdf)
 
 
-def test_regex():
-    rx = pdfinfo.ghosttext.regex_remove_char_tags
-
-    must_match = [
-        b'<char bbox="0 108 0 108" c="/"/>',
-        b'<char bbox="0 108 0 108" c=">"/>',
-        b'<char bbox="0 108 0 108" c="X"/>',
-    ]
-    must_not_match = [b'<span stuff="c">', b'<span>', b'</span>', b'</page>']
-
-    for s in must_match:
-        assert rx.match(s)
-    for s in must_not_match:
-        assert not rx.match(s)
-
-
 def test_vector(resources):
     filename = resources / 'vector.pdf'
     pdf = pdfinfo.PdfInfo(filename)
@@ -184,16 +168,9 @@ def test_ocr_detection(resources):
 @pytest.mark.parametrize(
     'testfile', ('truetype_font_nomapping.pdf', 'type3_font_nomapping.pdf')
 )
-@pytest.mark.xfail(
-    ghostscript.version() in ('9.52',), reason="gs 9.52 txtwrite doesn't work"
-)
 def test_corrupt_font_detection(resources, testfile):
     filename = resources / testfile
-    with pytest.raises(NotImplementedError):
-        pdf = pdfinfo.PdfInfo(filename)
-        pdf[0].has_corrupt_text
-
-    pdf = pdfinfo.PdfInfo(filename, detailed_page_analysis=True)
+    pdf = pdfinfo.PdfInfo(filename)
     assert pdf[0].has_corrupt_text