mdw@git.distorted.org.uk Git - termux-packages/blob - packages/tesseract/build.sh

   1 TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract
   2 TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available"
   3 TERMUX_PKG_VERSION=3.05.00
   4 TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica"
   5 TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz
   6 TERMUX_PKG_SHA256=3fe83e06d0f73b39f6e92ed9fc7ccba3ef734877b76aa5ddaaa778fac095d996
   7 TERMUX_PKG_FOLDERNAME=tesseract-${TERMUX_PKG_VERSION}
   8
   9 termux_step_pre_configure() {
  10         # http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion
  11         export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica
  12
  13         cd $TERMUX_PKG_SRCDIR
  14         perl -p -i -e 's|ADD_RT], true|ADD_RT], false|g' configure.ac
  15         ./autogen.sh
  16 }
  17
  18 termux_step_post_make_install() {
  19         # download english trained data
  20         cd "${TERMUX_PREFIX}/share/tessdata"
  21         rm -f eng.*
  22         for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do
  23                 f=eng.$f
  24                 # From the tessdata README: "These language data files only work with
  25                 # Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub.
  26                 # Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree."
  27                 termux_download \
  28                         https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \
  29                         $f
  30         done
  31 }