| 1 | TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract |
| 2 | TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available" |
| 3 | TERMUX_PKG_VERSION=3.05.00 |
| 4 | TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica" |
| 5 | TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz |
| 6 | TERMUX_PKG_SHA256=3fe83e06d0f73b39f6e92ed9fc7ccba3ef734877b76aa5ddaaa778fac095d996 |
| 7 | TERMUX_PKG_FOLDERNAME=tesseract-${TERMUX_PKG_VERSION} |
| 8 | |
| 9 | termux_step_pre_configure() { |
| 10 | # http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion |
| 11 | export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica |
| 12 | |
| 13 | perl -p -i -e 's|ADD_RT], true|ADD_RT], false|g' configure.ac |
| 14 | ./autogen.sh |
| 15 | } |
| 16 | |
| 17 | termux_step_post_make_install() { |
| 18 | # download english trained data |
| 19 | cd "${TERMUX_PREFIX}/share/tessdata" |
| 20 | rm -f eng.* |
| 21 | for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do |
| 22 | f=eng.$f |
| 23 | # From the tessdata README: "These language data files only work with |
| 24 | # Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub. |
| 25 | # Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree." |
| 26 | termux_download \ |
| 27 | https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \ |
| 28 | $f |
| 29 | done |
| 30 | } |