Commit | Line | Data |
---|---|---|
93779b4a DM |
1 | TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract |
2 | TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available" | |
c32267d2 | 3 | TERMUX_PKG_VERSION=3.05.01 |
d946e67c | 4 | TERMUX_PKG_REVISION=1 |
93779b4a DM |
5 | TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica" |
6 | TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz | |
c32267d2 | 7 | TERMUX_PKG_SHA256=05898f93c5d057fada49b9a116fc86ad9310ff1726a0f499c3e5211b3af47ec1 |
93779b4a DM |
8 | |
9 | termux_step_pre_configure() { | |
f2730837 | 10 | # http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion |
93779b4a DM |
11 | export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica |
12 | ||
052e347e | 13 | perl -p -i -e 's|ADD_RT], true|ADD_RT], false|g' configure.ac |
93779b4a DM |
14 | ./autogen.sh |
15 | } | |
16 | ||
17 | termux_step_post_make_install() { | |
f2730837 | 18 | # download english trained data |
93779b4a DM |
19 | cd "${TERMUX_PREFIX}/share/tessdata" |
20 | rm -f eng.* | |
eb5ac24b FF |
21 | for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do |
22 | f=eng.$f | |
bb290b16 FF |
23 | # From the tessdata README: "These language data files only work with |
24 | # Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub. | |
25 | # Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree." | |
052e347e FF |
26 | termux_download \ |
27 | https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \ | |
28 | $f | |
eb5ac24b | 29 | done |
93779b4a | 30 | } |