# OCRmyPDF # # VERSION 3.0.0 FROM debian:stretch MAINTAINER James R. Barlow # Add unprivileged user RUN useradd docker \ && mkdir /home/docker \ && chown docker:docker /home/docker # Update system and install our dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ locales \ ghostscript \ tesseract-ocr \ tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-eng tesseract-ocr-fra \ qpdf \ poppler-utils \ python3 \ python3-pip \ python3-venv \ python3-reportlab \ python3-pil # Enforce UTF-8 # Borrowed from https://index.docker.io/u/crosbymichael/python/ RUN dpkg-reconfigure locales && \ locale-gen C.UTF-8 && \ /usr/sbin/update-locale LANG=C.UTF-8 ENV LC_ALL C.UTF-8 # Build unpaper 6.1 RUN apt-get install -y \ wget \ gcc \ libavformat-dev \ libavcodec-dev \ libavutil-dev \ autoconf \ automake \ make \ pkg-config \ xsltproc WORKDIR /root RUN wget -q https://github.com/Flameeyes/unpaper/archive/unpaper-6.1.tar.gz RUN tar xf unpaper-6.1.tar.gz WORKDIR /root/unpaper-unpaper-6.1 RUN autoreconf -i RUN ./configure CFLAGS="-O2 -march=native -pipe -flto" RUN make -j install WORKDIR / RUN apt-get remove -y \ gcc \ autoconf \ automake \ pkg-config \ xsltproc \ make RUN apt-get autoremove -y && apt-get clean -y RUN rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /root/* # Set up a Python virtualenv and take all of the system packages, so we can # rely on the platform packages rather than importing GCC and compiling them RUN pyvenv /appenv \ && pyvenv --system-site-packages /appenv COPY . /application/ # Install application and dependencies # In this arrangement Pillow and reportlab will be provided by the system RUN . /appenv/bin/activate; \ pip install --upgrade pip \ && pip install --no-cache-dir /application \ && pip install --no-cache-dir -r /application/test_requirements.txt USER docker WORKDIR /home/docker ENV DEFAULT_RUFFUS_HISTORY_FILE=/tmp/.{basename}.ruffus_history.sqlite ENV OCRMYPDF_TEST_OUTPUT=/tmp/test-output ENV OCRMYPDF_IN_DOCKER=1 # Must use array form of ENTRYPOINT # Non-array form does not append other arguments, because that is "intuitive" ENTRYPOINT ["/application/docker-wrapper.sh"]