X-Git-Url: https://git.distorted.org.uk/~mdw/termux-packages/blobdiff_plain/a4279e8660fe2dc1c9c910649f32a9dea8092a58..ca7699c16955e3dd8cff7a5e75b20e7b0360d036:/packages/tesseract/build.sh diff --git a/packages/tesseract/build.sh b/packages/tesseract/build.sh index 928e3bbb..a0b0584d 100644 --- a/packages/tesseract/build.sh +++ b/packages/tesseract/build.sh @@ -1,29 +1,30 @@ TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available" -TERMUX_PKG_VERSION=3.04.00 -TERMUX_PKG_BUILD_REVISION=1 +TERMUX_PKG_VERSION=3.05.00 TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica" TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz +TERMUX_PKG_SHA256=3fe83e06d0f73b39f6e92ed9fc7ccba3ef734877b76aa5ddaaa778fac095d996 TERMUX_PKG_FOLDERNAME=tesseract-${TERMUX_PKG_VERSION} termux_step_pre_configure() { + # http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica - cd $TERMUX_PKG_SRCDIR - perl -p -i -e 's|ADD_RT, true|ADD_RT, false|g' configure.ac + perl -p -i -e 's|ADD_RT], true|ADD_RT], false|g' configure.ac ./autogen.sh } termux_step_post_make_install() { + # download english trained data cd "${TERMUX_PREFIX}/share/tessdata" rm -f eng.* - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.bigrams - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.fold - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.lm - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.nn - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.params - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.size - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.cube.word-freq - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.tesseract_cube.nn - wget https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata + for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do + f=eng.$f + # From the tessdata README: "These language data files only work with + # Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub. + # Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree." + termux_download \ + https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \ + $f + done }