diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 7c79cab6..6c35625d 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -67,6 +67,8 @@ RUN add-apt-repository -y ppa:alex-p/tesseract-ocr5 RUN apt-get update && apt-get install -y --no-install-recommends \ ghostscript \ fonts-droid-fallback \ + fonts-noto-core \ + fonts-noto-cjk \ jbig2dec \ pngquant \ tesseract-ocr \ diff --git a/.docker/Dockerfile.alpine b/.docker/Dockerfile.alpine index fe1bdc69..3d3681eb 100644 --- a/.docker/Dockerfile.alpine +++ b/.docker/Dockerfile.alpine @@ -57,6 +57,7 @@ RUN apk add --no-cache \ tesseract-ocr-data-osd \ tesseract-ocr-data-por \ tesseract-ocr-data-spa \ + font-noto \ ttf-droid \ unpaper \ && rm -rf /var/cache/apk/* diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 058c5373..1fc790d0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -57,6 +57,8 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends \ curl \ + fonts-noto-core \ + fonts-noto-cjk \ ghostscript \ jbig2dec \ img2pdf \ diff --git a/docs/release_notes.md b/docs/release_notes.md index 657847bc..101ce2a1 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -107,6 +107,7 @@ official when it's tagged and posted to PyPI. - Preferred: both - Recommended: `pypdfium2` for PDF rasterization (new dependency) - Recommended: `ghostscript` (used to be Required) +- Recommended: Noto fonts for improved OCR text positioning - Optional: `verapdf` for fast PDF/A validation (new dependency) - Requires: `fpdf2` for text layer rendering (new dependency) - Recommended: replace `typer` with `cyclopts` in misc scripts (new dependency)